Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
b496f579
Commit
b496f579
authored
Jul 18, 2022
by
ZCMax
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Refactor] Refactor Mono3D models
parent
35667791
Changes
36
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1225 additions
and
522 deletions
+1225
-522
configs/_base_/datasets/kitti-mono3d.py
configs/_base_/datasets/kitti-mono3d.py
+55
-61
configs/_base_/datasets/nus-mono3d.py
configs/_base_/datasets/nus-mono3d.py
+74
-64
configs/_base_/models/fcos3d.py
configs/_base_/models/fcos3d.py
+15
-7
configs/_base_/models/pgd.py
configs/_base_/models/pgd.py
+6
-5
configs/_base_/models/smoke.py
configs/_base_/models/smoke.py
+11
-3
configs/_base_/schedules/mmdet_schedule_1x.py
configs/_base_/schedules/mmdet_schedule_1x.py
+21
-10
configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
...3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
+55
-47
configs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
...igs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
+60
-50
configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
...smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
+44
-38
mmdet3d/datasets/__init__.py
mmdet3d/datasets/__init__.py
+2
-1
mmdet3d/datasets/convert_utils.py
mmdet3d/datasets/convert_utils.py
+256
-0
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+1
-0
mmdet3d/datasets/nuscenes_dataset.py
mmdet3d/datasets/nuscenes_dataset.py
+102
-6
mmdet3d/datasets/pipelines/formating.py
mmdet3d/datasets/pipelines/formating.py
+1
-1
mmdet3d/datasets/pipelines/loading.py
mmdet3d/datasets/pipelines/loading.py
+55
-3
mmdet3d/datasets/pipelines/transforms_3d.py
mmdet3d/datasets/pipelines/transforms_3d.py
+46
-44
mmdet3d/metrics/kitti_metric.py
mmdet3d/metrics/kitti_metric.py
+2
-0
mmdet3d/metrics/nuscenes_metric.py
mmdet3d/metrics/nuscenes_metric.py
+297
-25
mmdet3d/models/data_preprocessors/data_preprocessor.py
mmdet3d/models/data_preprocessors/data_preprocessor.py
+1
-2
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
+121
-155
No files found.
configs/_base_/datasets/kitti-mono3d.py
View file @
b496f579
dataset_type
=
'Kitti
Mono
Dataset'
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
img_norm_cfg
=
dict
(
metainfo
=
dict
(
CLASSES
=
class_names
)
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
,
'data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
}))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'LoadAnnotations3D'
,
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_bbox
=
True
,
...
@@ -14,79 +27,60 @@ train_pipeline = [
...
@@ -14,79 +27,60 @@ train_pipeline = [
with_bbox_3d
=
True
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'Resize'
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'centers
_
2d'
,
'depths'
]),
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'Resize'
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
type
=
'MultiScaleFlipAug'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
img_scale
=
(
1242
,
375
),
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
])
]
]
data
=
dict
(
samples_per_gpu
=
2
,
train_dataloader
=
dict
(
workers_per_gpu
=
2
,
batch_size
=
2
,
train
=
dict
(
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train_mono3d.coco.json'
,
ann_file
=
'kitti_infos_train.pkl'
,
info_file
=
data_root
+
'kitti_infos_train.pkl'
,
data_prefix
=
dict
(
img
=
'training/image_2'
),
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
modality
=
input_modality
,
test_mode
=
False
,
test_mode
=
False
,
box_type_3d
=
'Camera'
),
metainfo
=
metainfo
,
val
=
dict
(
# we use box_type_3d='Camera' in monocular 3d
type
=
dataset_type
,
# detection task
data_root
=
data_root
,
box_type_3d
=
'Camera'
))
ann_file
=
data_root
+
'kitti_infos_val_mono3d.coco.json'
,
val_dataloader
=
dict
(
info_file
=
data_root
+
'kitti_infos_val.pkl'
,
batch_size
=
1
,
img_prefix
=
data_root
,
num_workers
=
2
,
classes
=
class_names
,
persistent_workers
=
True
,
pipeline
=
test_pipeline
,
drop_last
=
False
,
modality
=
input_modality
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
test_mode
=
True
,
dataset
=
dict
(
box_type_3d
=
'Camera'
),
test
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val_mono3d.coco.json'
,
data_prefix
=
dict
(
img
=
'training/image_2'
),
info_file
=
data_root
+
'kitti_infos_val.pkl'
,
ann_file
=
'kitti_infos_val.pkl'
,
img_prefix
=
data_root
,
classes
=
class_names
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
modality
=
input_modality
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Camera'
))
box_type_3d
=
'Camera'
))
evaluation
=
dict
(
interval
=
2
)
test_dataloader
=
val_dataloader
val_evaluator
=
dict
(
type
=
'KittiMetric'
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
metric
=
'bbox'
,
pred_box_type_3d
=
'Camera'
)
test_evaluator
=
val_evaluator
configs/_base_/datasets/nus-mono3d.py
View file @
b496f579
dataset_type
=
'NuScenes
Mono
Dataset'
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
data_root
=
'data/nuscenes/'
class_names
=
[
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
]
metainfo
=
dict
(
CLASSES
=
class_names
)
# Input modality for nuScenes dataset, this is consistent with the submission
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
# format which requires the information in input_modality.
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
use_lidar
=
False
,
use_camera
=
True
,
# file_client_args = dict(backend='disk')
use_radar
=
False
,
# Uncomment the following if use ceph or other file clients.
use_map
=
False
,
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
use_external
=
False
)
# for more details.
img_norm_cfg
=
dict
(
file_client_args
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
,
'data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
}))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'LoadAnnotations3D'
,
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_bbox
=
True
,
...
@@ -26,75 +34,77 @@ train_pipeline = [
...
@@ -26,75 +34,77 @@ train_pipeline = [
with_bbox_depth
=
True
),
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'attr_labels'
,
'gt_bboxes_3d'
,
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'attr_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'gt_labels_3d'
,
'centers
_
2d'
,
'depths'
]),
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'mmdet.Resize'
,
scale
=
(
1600
,
900
),
keep_ratio
=
True
),
type
=
'MultiScaleFlipAug'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
scale_factor
=
1.0
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
])
]
]
data
=
dict
(
train_dataloader
=
dict
(
samples_per_gpu
=
2
,
batch_size
=
2
,
workers_per_gpu
=
2
,
num_workers
=
2
,
train
=
dict
(
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train_mono3d.coco.json'
,
data_prefix
=
dict
(
img_prefix
=
data_root
,
pts
=
''
,
classes
=
class_names
,
CAM_FRONT
=
'samples/CAM_FRONT'
,
CAM_FRONT_LEFT
=
'samples/CAM_FRONT_LEFT'
,
CAM_FRONT_RIGHT
=
'samples/CAM_FRONT_RIGHT'
,
CAM_BACK
=
'samples/CAM_BACK'
,
CAM_BACK_RIGHT
=
'samples/CAM_BACK_RIGHT'
,
CAM_BACK_LEFT
=
'samples/CAM_BACK_LEFT'
),
ann_file
=
'nuscenes_infos_train.pkl'
,
task
=
'mono3d'
,
pipeline
=
train_pipeline
,
pipeline
=
train_pipeline
,
metainfo
=
metainfo
,
modality
=
input_modality
,
modality
=
input_modality
,
test_mode
=
False
,
test_mode
=
False
,
box_type_3d
=
'Camera'
),
# we use box_type_3d='Camera' in monocular 3d
val
=
dict
(
# detection task
box_type_3d
=
'Camera'
,
use_valid_flag
=
True
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
2
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
type
=
dataset_type
,
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val_mono3d.coco.json'
,
data_prefix
=
dict
(
img_prefix
=
data_root
,
pts
=
''
,
classes
=
class_names
,
CAM_FRONT
=
'samples/CAM_FRONT'
,
CAM_FRONT_LEFT
=
'samples/CAM_FRONT_LEFT'
,
CAM_FRONT_RIGHT
=
'samples/CAM_FRONT_RIGHT'
,
CAM_BACK
=
'samples/CAM_BACK'
,
CAM_BACK_RIGHT
=
'samples/CAM_BACK_RIGHT'
,
CAM_BACK_LEFT
=
'samples/CAM_BACK_LEFT'
),
ann_file
=
'nuscenes_infos_val.pkl'
,
task
=
'mono3d'
,
pipeline
=
test_pipeline
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
modality
=
input_modality
,
metainfo
=
metainfo
,
test_mode
=
True
,
test_mode
=
True
,
box_type_3d
=
'Camera'
),
box_type_3d
=
'Camera'
,
test
=
dict
(
use_valid_flag
=
True
))
type
=
dataset_type
,
test_dataloader
=
val_dataloader
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val_mono3d.coco.json'
,
val_evaluator
=
dict
(
img_prefix
=
data_root
,
type
=
'NuScenesMetric'
,
classes
=
class_names
,
data_root
=
data_root
,
pipeline
=
test_pipeline
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
modality
=
input_modality
,
metric
=
'bbox'
)
test_mode
=
True
,
box_type_3d
=
'Camera'
))
test_evaluator
=
val_evaluator
evaluation
=
dict
(
interval
=
2
)
configs/_base_/models/fcos3d.py
View file @
b496f579
# model settings
model
=
dict
(
model
=
dict
(
type
=
'FCOSMono3D'
,
type
=
'FCOSMono3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
backbone
=
dict
(
type
=
'ResNet'
,
type
=
'
mmdet.
ResNet'
,
depth
=
101
,
depth
=
101
,
num_stages
=
4
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
out_indices
=
(
0
,
1
,
2
,
3
),
...
@@ -13,7 +20,7 @@ model = dict(
...
@@ -13,7 +20,7 @@ model = dict(
type
=
'Pretrained'
,
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet101_caffe'
)),
checkpoint
=
'open-mmlab://detectron2/resnet101_caffe'
)),
neck
=
dict
(
neck
=
dict
(
type
=
'FPN'
,
type
=
'
mmdet.
FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
out_channels
=
256
,
start_level
=
1
,
start_level
=
1
,
...
@@ -45,18 +52,19 @@ model = dict(
...
@@ -45,18 +52,19 @@ model = dict(
dir_branch
=
(
256
,
),
dir_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
loss_cls
=
dict
(
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
loss_attr
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
bbox_coder
=
dict
(
type
=
'FCOS3DBBoxCoder'
,
code_size
=
9
),
bbox_coder
=
dict
(
type
=
'FCOS3DBBoxCoder'
,
code_size
=
9
),
norm_on_bbox
=
True
,
norm_on_bbox
=
True
,
centerness_on_reg
=
True
,
centerness_on_reg
=
True
,
...
...
configs/_base_/models/pgd.py
View file @
b496f579
...
@@ -28,18 +28,19 @@ model = dict(
...
@@ -28,18 +28,19 @@ model = dict(
dir_branch
=
(
256
,
),
dir_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
loss_cls
=
dict
(
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
loss_attr
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
norm_on_bbox
=
True
,
norm_on_bbox
=
True
,
centerness_on_reg
=
True
,
centerness_on_reg
=
True
,
center_sampling
=
True
,
center_sampling
=
True
,
...
...
configs/_base_/models/smoke.py
View file @
b496f579
# model settings
model
=
dict
(
model
=
dict
(
type
=
'SMOKEMono3D'
,
type
=
'SMOKEMono3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
backbone
=
dict
(
type
=
'DLANet'
,
type
=
'DLANet'
,
depth
=
34
,
depth
=
34
,
...
@@ -42,10 +49,11 @@ model = dict(
...
@@ -42,10 +49,11 @@ model = dict(
base_dims
=
((
0.88
,
1.73
,
0.67
),
(
1.78
,
1.70
,
0.58
),
(
3.88
,
1.63
,
base_dims
=
((
0.88
,
1.73
,
0.67
),
(
1.78
,
1.70
,
0.58
),
(
3.88
,
1.63
,
1.53
)),
1.53
)),
code_size
=
7
),
code_size
=
7
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1
/
300
),
loss_bbox
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1
/
300
),
loss_dir
=
dict
(
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
None
,
loss_attr
=
None
,
conv_bias
=
True
,
conv_bias
=
True
,
dcn_on_last_conv
=
False
),
dcn_on_last_conv
=
False
),
...
...
configs/_base_/schedules/mmdet_schedule_1x.py
View file @
b496f579
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
12
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.001
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
# optimizer
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optim_wrapper
=
dict
(
optimizer_config
=
dict
(
grad_clip
=
None
)
type
=
'OptimWrapper'
,
# learning policy
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
8
,
11
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
12
)
configs/fcos3d/fcos3d_r101_caffe_fpn_gn-head_dcn_2x8_1x_nus-mono3d.py
View file @
b496f579
...
@@ -4,18 +4,31 @@ _base_ = [
...
@@ -4,18 +4,31 @@ _base_ = [
]
]
# model settings
# model settings
model
=
dict
(
model
=
dict
(
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
backbone
=
dict
(
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
stage_with_dcn
=
(
False
,
False
,
True
,
True
)))
stage_with_dcn
=
(
False
,
False
,
True
,
True
)))
class_names
=
[
# file_client_args = dict(backend='disk')
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
# Uncomment the following if use ceph or other file clients.
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
]
# for more details.
img_norm_cfg
=
dict
(
file_client_args
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
,
'data/nuscenes/'
:
's3://openmmlab/datasets/detection3d/nuscenes/'
}))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'LoadAnnotations3D'
,
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_bbox
=
True
,
...
@@ -24,52 +37,47 @@ train_pipeline = [
...
@@ -24,52 +37,47 @@ train_pipeline = [
with_bbox_3d
=
True
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_
scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'
mmdet.
Resize'
,
scale
=
(
1600
,
900
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'attr_labels'
,
'gt_bboxes_3d'
,
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'attr_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'gt_labels_3d'
,
'centers
_
2d'
,
'depths'
]),
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'mmdet.Resize'
,
scale_factor
=
1.0
),
type
=
'MultiScaleFlipAug'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
scale_factor
=
1.0
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
]
data
=
dict
(
samples_per_gpu
=
2
,
train_dataloader
=
dict
(
workers_per_gpu
=
2
,
batch_size
=
2
,
train
=
dict
(
pipeline
=
train_pipeline
),
num_workers
=
2
,
val
=
dict
(
pipeline
=
test_pipeline
),
dataset
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
test
=
dict
(
pipeline
=
test_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
# optimizer
optimizer
=
dict
(
optim_wrapper
=
dict
(
lr
=
0.002
,
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
))
optimizer
=
dict
(
lr
=
0.002
),
optimizer_config
=
dict
(
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
),
_delete_
=
True
,
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
# learning rate
policy
=
'step'
,
param_scheduler
=
[
warmup
=
'linear'
,
dict
(
warmup_iters
=
500
,
type
=
'LinearLR'
,
warmup_ratio
=
1.0
/
3
,
start_factor
=
1.0
/
3
,
step
=
[
8
,
11
])
by_epoch
=
False
,
total_epochs
=
12
begin
=
0
,
evaluation
=
dict
(
interval
=
2
)
end
=
500
),
dict
(
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
12
,
by_epoch
=
True
,
milestones
=
[
8
,
11
],
gamma
=
0.1
)
]
configs/pgd/pgd_r101_caffe_fpn_gn-head_3x4_4x_kitti-mono3d.py
View file @
b496f579
...
@@ -4,6 +4,12 @@ _base_ = [
...
@@ -4,6 +4,12 @@ _base_ = [
]
]
# model settings
# model settings
model
=
dict
(
model
=
dict
(
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
frozen_stages
=
0
),
backbone
=
dict
(
frozen_stages
=
0
),
neck
=
dict
(
start_level
=
0
,
num_outs
=
4
),
neck
=
dict
(
start_level
=
0
,
num_outs
=
4
),
bbox_head
=
dict
(
bbox_head
=
dict
(
...
@@ -27,16 +33,17 @@ model = dict(
...
@@ -27,16 +33,17 @@ model = dict(
),
),
centerness_branch
=
(
256
,
),
centerness_branch
=
(
256
,
),
loss_cls
=
dict
(
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
type
=
'
mmdet.
CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
use_depth_classifier
=
True
,
use_depth_classifier
=
True
,
depth_branch
=
(
256
,
),
depth_branch
=
(
256
,
),
depth_range
=
(
0
,
70
),
depth_range
=
(
0
,
70
),
...
@@ -61,11 +68,21 @@ model = dict(
...
@@ -61,11 +68,21 @@ model = dict(
]),
]),
test_cfg
=
dict
(
nms_pre
=
100
,
nms_thr
=
0.05
,
score_thr
=
0.001
,
max_per_img
=
20
))
test_cfg
=
dict
(
nms_pre
=
100
,
nms_thr
=
0.05
,
score_thr
=
0.001
,
max_per_img
=
20
))
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
# file_client_args = dict(backend='disk')
img_norm_cfg
=
dict
(
# Uncomment the following if use ceph or other file clients.
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
file_client_args
=
dict
(
backend
=
'petrel'
,
path_mapping
=
dict
({
'./data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
,
'data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
}))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'LoadAnnotations3D'
,
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_bbox
=
True
,
...
@@ -74,54 +91,47 @@ train_pipeline = [
...
@@ -74,54 +91,47 @@ train_pipeline = [
with_bbox_3d
=
True
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
with_bbox_depth
=
True
),
dict
(
type
=
'Resize'
,
img_
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'
mmdet.
Resize'
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'centers
_
2d'
,
'depths'
]),
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'mmdet.Resize'
,
scale_factor
=
1.0
),
type
=
'MultiScaleFlipAug'
,
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
scale_factor
=
1.0
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
]
data
=
dict
(
samples_per_gpu
=
3
,
train_dataloader
=
dict
(
workers_per_gpu
=
3
,
batch_size
=
3
,
num_workers
=
3
,
dataset
=
dict
(
pipeline
=
train_pipeline
))
train
=
dict
(
pipeline
=
t
rain
_pipeline
)
,
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
t
est
_pipeline
)
)
val
=
dict
(
pipeline
=
test_pipeline
)
,
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
)
)
test
=
dict
(
pipeline
=
test_pipeline
))
# optimizer
# optimizer
optimizer
=
dict
(
optim_wrapper
=
dict
(
lr
=
0.001
,
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
))
optimizer
=
dict
(
lr
=
0.01
),
optimizer_config
=
dict
(
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
),
_delete_
=
True
,
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
# learning rate
policy
=
'step'
,
param_scheduler
=
[
warmup
=
'linear'
,
dict
(
warmup_iters
=
500
,
type
=
'LinearLR'
,
warmup_ratio
=
1.0
/
3
,
start_factor
=
1.0
/
3
,
step
=
[
32
,
44
])
by_epoch
=
False
,
total_epochs
=
48
begin
=
0
,
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
48
)
end
=
500
),
evaluation
=
dict
(
interval
=
2
)
dict
(
checkpoint_config
=
dict
(
interval
=
8
)
type
=
'MultiStepLR'
,
begin
=
0
,
end
=
48
,
by_epoch
=
True
,
milestones
=
[
32
,
44
],
gamma
=
0.1
)
]
train_cfg
=
dict
(
max_epochs
=
48
)
configs/smoke/smoke_dla34_pytorch_dlaneck_gn-all_8x4_6x_kitti-mono3d.py
View file @
b496f579
...
@@ -3,21 +3,21 @@ _base_ = [
...
@@ -3,21 +3,21 @@ _base_ = [
'../_base_/default_runtime.py'
'../_base_/default_runtime.py'
]
]
# optimizer
# file_client_args = dict(backend='disk')
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
2.5e-4
)
# Uncomment the following if use ceph or other file clients.
optimizer_config
=
dict
(
grad_clip
=
None
)
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
50
])
# for more details.
file_client_args
=
dict
(
# runtime settings
backend
=
'petrel'
,
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
72
)
path_mapping
=
dict
({
log_config
=
dict
(
interval
=
10
)
'./data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
,
'data/kitti/'
:
's3://openmmlab/datasets/detection3d/kitti/'
}))
find_unused_parameters
=
True
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
dict
(
type
=
'LoadAnnotations3D'
,
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_bbox
=
True
,
...
@@ -29,36 +29,42 @@ train_pipeline = [
...
@@ -29,36 +29,42 @@ train_pipeline = [
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'RandomShiftScale'
,
shift_scale
=
(
0.2
,
0.4
),
aug_prob
=
0.3
),
dict
(
type
=
'RandomShiftScale'
,
shift_scale
=
(
0.2
,
0.4
),
aug_prob
=
0.3
),
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
dict
(
type
=
'
Collect3D
'
,
type
=
'
Pack3DDetInputs
'
,
keys
=
[
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'img'
,
'gt_bboxes'
,
'gt_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers2d'
,
'depths'
'centers
_
2d'
,
'depths'
]),
]),
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
),
dict
(
type
=
'LoadImageFromFileMono3D'
,
file_client_args
=
file_client_args
),
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
])
]
train_dataloader
=
dict
(
batch_size
=
8
,
num_workers
=
4
,
dataset
=
dict
(
pipeline
=
train_pipeline
))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
))
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
12
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
# learning rate
param_scheduler
=
[
dict
(
dict
(
type
=
'MultiScaleFlipAug'
,
type
=
'MultiStepLR'
,
img_scale
=
(
1280
,
384
),
begin
=
0
,
flip
=
False
,
end
=
12
,
transforms
=
[
by_epoch
=
True
,
dict
(
type
=
'AffineResize'
,
img_scale
=
(
1280
,
384
),
down_ratio
=
4
),
milestones
=
[
8
,
11
],
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
gamma
=
0.1
)
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'img'
]),
])
]
]
data
=
dict
(
samples_per_gpu
=
8
,
# optimizer
workers_per_gpu
=
4
,
optim_wrapper
=
dict
(
t
rain
=
dict
(
pipeline
=
train_pipeline
)
,
t
ype
=
'OptimWrapper'
,
val
=
dict
(
pipeline
=
test_pipeline
),
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
2.5e-4
),
test
=
dict
(
pipeline
=
test_pipeli
ne
)
)
clip_grad
=
No
ne
)
mmdet3d/datasets/__init__.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
.builder
import
DATASETS
,
PIPELINES
,
build_dataset
from
.builder
import
DATASETS
,
PIPELINES
,
build_dataset
from
.convert_utils
import
get_2d_boxes
from
.dataset_wrappers
import
CBGSDataset
from
.dataset_wrappers
import
CBGSDataset
from
.det3d_dataset
import
Det3DDataset
from
.det3d_dataset
import
Det3DDataset
from
.kitti_dataset
import
KittiDataset
from
.kitti_dataset
import
KittiDataset
...
@@ -41,5 +42,5 @@ __all__ = [
...
@@ -41,5 +42,5 @@ __all__ = [
'LoadPointsFromMultiSweeps'
,
'WaymoDataset'
,
'BackgroundPointsFilter'
,
'LoadPointsFromMultiSweeps'
,
'WaymoDataset'
,
'BackgroundPointsFilter'
,
'VoxelBasedPointSampler'
,
'get_loading_pipeline'
,
'RandomDropPointsColor'
,
'VoxelBasedPointSampler'
,
'get_loading_pipeline'
,
'RandomDropPointsColor'
,
'RandomJitterPoints'
,
'ObjectNameFilter'
,
'AffineResize'
,
'RandomJitterPoints'
,
'ObjectNameFilter'
,
'AffineResize'
,
'RandomShiftScale'
,
'LoadPointsFromDict'
,
'PIPELINES'
'RandomShiftScale'
,
'LoadPointsFromDict'
,
'PIPELINES'
,
'get_2d_boxes'
]
]
mmdet3d/datasets/convert_utils.py
0 → 100644
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
from
collections
import
OrderedDict
from
typing
import
List
,
Tuple
,
Union
import
numpy
as
np
from
nuscenes.utils.geometry_utils
import
view_points
from
pyquaternion
import
Quaternion
from
shapely.geometry
import
MultiPoint
,
box
from
mmdet3d.core.bbox
import
points_cam2img
nus_categories
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
nus_attributes
=
(
'cycle.with_rider'
,
'cycle.without_rider'
,
'pedestrian.moving'
,
'pedestrian.standing'
,
'pedestrian.sitting_lying_down'
,
'vehicle.moving'
,
'vehicle.parked'
,
'vehicle.stopped'
,
'None'
)
NameMapping
=
{
'movable_object.barrier'
:
'barrier'
,
'vehicle.bicycle'
:
'bicycle'
,
'vehicle.bus.bendy'
:
'bus'
,
'vehicle.bus.rigid'
:
'bus'
,
'vehicle.car'
:
'car'
,
'vehicle.construction'
:
'construction_vehicle'
,
'vehicle.motorcycle'
:
'motorcycle'
,
'human.pedestrian.adult'
:
'pedestrian'
,
'human.pedestrian.child'
:
'pedestrian'
,
'human.pedestrian.construction_worker'
:
'pedestrian'
,
'human.pedestrian.police_officer'
:
'pedestrian'
,
'movable_object.trafficcone'
:
'traffic_cone'
,
'vehicle.trailer'
:
'trailer'
,
'vehicle.truck'
:
'truck'
}
def
get_2d_boxes
(
nusc
,
sample_data_token
:
str
,
visibilities
:
List
[
str
]):
"""Get the 2D annotation records for a given `sample_data_token`.
Args:
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (list[str]): Visibility filter.
Return:
list[dict]: List of 2D annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec
=
nusc
.
get
(
'sample_data'
,
sample_data_token
)
assert
sd_rec
[
'sensor_modality'
]
==
'camera'
,
'Error: get_2d_boxes only works'
\
' for camera sample_data!'
if
not
sd_rec
[
'is_key_frame'
]:
raise
ValueError
(
'The 2D re-projections are available only for keyframes.'
)
s_rec
=
nusc
.
get
(
'sample'
,
sd_rec
[
'sample_token'
])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec
=
nusc
.
get
(
'calibrated_sensor'
,
sd_rec
[
'calibrated_sensor_token'
])
pose_rec
=
nusc
.
get
(
'ego_pose'
,
sd_rec
[
'ego_pose_token'
])
camera_intrinsic
=
np
.
array
(
cs_rec
[
'camera_intrinsic'
])
# Get all the annotation with the specified visibilties.
ann_recs
=
[
nusc
.
get
(
'sample_annotation'
,
token
)
for
token
in
s_rec
[
'anns'
]
]
ann_recs
=
[
ann_rec
for
ann_rec
in
ann_recs
if
(
ann_rec
[
'visibility_token'
]
in
visibilities
)
]
repro_recs
=
[]
for
ann_rec
in
ann_recs
:
# Augment sample_annotation with token information.
ann_rec
[
'sample_annotation_token'
]
=
ann_rec
[
'token'
]
ann_rec
[
'sample_data_token'
]
=
sample_data_token
# Get the box in global coordinates.
box
=
nusc
.
get_box
(
ann_rec
[
'token'
])
# Move them to the ego-pose frame.
box
.
translate
(
-
np
.
array
(
pose_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
pose_rec
[
'rotation'
]).
inverse
)
# Move them to the calibrated sensor frame.
box
.
translate
(
-
np
.
array
(
cs_rec
[
'translation'
]))
box
.
rotate
(
Quaternion
(
cs_rec
[
'rotation'
]).
inverse
)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d
=
box
.
corners
()
in_front
=
np
.
argwhere
(
corners_3d
[
2
,
:]
>
0
).
flatten
()
corners_3d
=
corners_3d
[:,
in_front
]
# Project 3d box to 2d.
corner_coords
=
view_points
(
corners_3d
,
camera_intrinsic
,
True
).
T
[:,
:
2
].
tolist
()
# Keep only corners that fall within the image.
final_coords
=
post_process_coords
(
corner_coords
)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if
final_coords
is
None
:
continue
else
:
min_x
,
min_y
,
max_x
,
max_y
=
final_coords
# Generate dictionary record to be included in the .json file.
repro_rec
=
generate_record
(
ann_rec
,
min_x
,
min_y
,
max_x
,
max_y
,
sample_data_token
,
sd_rec
[
'filename'
])
# if repro_rec is None, we do not append it into repre_recs
if
repro_rec
is
not
None
:
loc
=
box
.
center
.
tolist
()
dim
=
box
.
wlh
dim
[[
0
,
1
,
2
]]
=
dim
[[
1
,
2
,
0
]]
# convert wlh to our lhw
dim
=
dim
.
tolist
()
rot
=
box
.
orientation
.
yaw_pitch_roll
[
0
]
rot
=
[
-
rot
]
# convert the rot to our cam coordinate
global_velo2d
=
nusc
.
box_velocity
(
box
.
token
)[:
2
]
global_velo3d
=
np
.
array
([
*
global_velo2d
,
0.0
])
e2g_r_mat
=
Quaternion
(
pose_rec
[
'rotation'
]).
rotation_matrix
c2e_r_mat
=
Quaternion
(
cs_rec
[
'rotation'
]).
rotation_matrix
cam_velo3d
=
global_velo3d
@
np
.
linalg
.
inv
(
e2g_r_mat
).
T
@
np
.
linalg
.
inv
(
c2e_r_mat
).
T
velo
=
cam_velo3d
[
0
::
2
].
tolist
()
repro_rec
[
'bbox_3d'
]
=
loc
+
dim
+
rot
repro_rec
[
'velocity'
]
=
velo
center_3d
=
np
.
array
(
loc
).
reshape
([
1
,
3
])
center_2d_with_depth
=
points_cam2img
(
center_3d
,
camera_intrinsic
,
with_depth
=
True
)
center_2d_with_depth
=
center_2d_with_depth
.
squeeze
().
tolist
()
repro_rec
[
'center_2d'
]
=
center_2d_with_depth
[:
2
]
repro_rec
[
'depth'
]
=
center_2d_with_depth
[
2
]
# normalized center2D + depth
# if samples with depth < 0 will be removed
if
repro_rec
[
'depth'
]
<=
0
:
continue
ann_token
=
nusc
.
get
(
'sample_annotation'
,
box
.
token
)[
'attribute_tokens'
]
if
len
(
ann_token
)
==
0
:
attr_name
=
'None'
else
:
attr_name
=
nusc
.
get
(
'attribute'
,
ann_token
[
0
])[
'name'
]
attr_id
=
nus_attributes
.
index
(
attr_name
)
# repro_rec['attribute_name'] = attr_name
repro_rec
[
'attr_label'
]
=
attr_id
repro_recs
.
append
(
repro_rec
)
return
repro_recs
def
post_process_coords
(
corner_coords
:
List
,
imsize
:
Tuple
[
int
,
int
]
=
(
1600
,
900
)
)
->
Union
[
Tuple
[
float
,
float
,
float
,
float
],
None
]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (list[int]): Corner coordinates of reprojected
bounding box.
imsize (tuple[int]): Size of the image canvas.
Return:
tuple [float]: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box
=
MultiPoint
(
corner_coords
).
convex_hull
img_canvas
=
box
(
0
,
0
,
imsize
[
0
],
imsize
[
1
])
if
polygon_from_2d_box
.
intersects
(
img_canvas
):
img_intersection
=
polygon_from_2d_box
.
intersection
(
img_canvas
)
intersection_coords
=
np
.
array
(
[
coord
for
coord
in
img_intersection
.
exterior
.
coords
])
min_x
=
min
(
intersection_coords
[:,
0
])
min_y
=
min
(
intersection_coords
[:,
1
])
max_x
=
max
(
intersection_coords
[:,
0
])
max_y
=
max
(
intersection_coords
[:,
1
])
return
min_x
,
min_y
,
max_x
,
max_y
else
:
return
None
def
generate_record
(
ann_rec
:
dict
,
x1
:
float
,
y1
:
float
,
x2
:
float
,
y2
:
float
,
sample_data_token
:
str
,
filename
:
str
)
->
OrderedDict
:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
sample_data_token (str): Sample data token.
filename (str):The corresponding image file where the annotation
is present.
Returns:
dict: A sample mono3D annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (list[float]): left x, top y, right x, bottom y
of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
repro_rec
=
OrderedDict
()
repro_rec
[
'sample_data_token'
]
=
sample_data_token
coco_rec
=
dict
()
relevant_keys
=
[
'attribute_tokens'
,
'category_name'
,
'instance_token'
,
'next'
,
'num_lidar_pts'
,
'num_radar_pts'
,
'prev'
,
'sample_annotation_token'
,
'sample_data_token'
,
'visibility_token'
,
]
for
key
,
value
in
ann_rec
.
items
():
if
key
in
relevant_keys
:
repro_rec
[
key
]
=
value
repro_rec
[
'bbox_corners'
]
=
[
x1
,
y1
,
x2
,
y2
]
repro_rec
[
'filename'
]
=
filename
if
repro_rec
[
'category_name'
]
not
in
NameMapping
:
return
None
cat_name
=
NameMapping
[
repro_rec
[
'category_name'
]]
coco_rec
[
'bbox_label'
]
=
nus_categories
.
index
(
cat_name
)
coco_rec
[
'bbox_label_3d'
]
=
nus_categories
.
index
(
cat_name
)
coco_rec
[
'bbox'
]
=
[
x1
,
y1
,
x2
,
y2
]
coco_rec
[
'bbox_3d_isvalid'
]
=
True
return
coco_rec
mmdet3d/datasets/det3d_dataset.py
View file @
b496f579
...
@@ -197,6 +197,7 @@ class Det3DDataset(BaseDataset):
...
@@ -197,6 +197,7 @@ class Det3DDataset(BaseDataset):
ann_info
=
dict
()
ann_info
=
dict
()
for
ann_name
in
keys
:
for
ann_name
in
keys
:
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
temp_anns
=
[
item
[
ann_name
]
for
item
in
instances
]
# map the original dataset label to training label
if
'label'
in
ann_name
:
if
'label'
in
ann_name
:
temp_anns
=
[
temp_anns
=
[
self
.
label_mapping
[
item
]
for
item
in
temp_anns
self
.
label_mapping
[
item
]
for
item
in
temp_anns
...
...
mmdet3d/datasets/nuscenes_dataset.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
os
import
path
as
osp
from
typing
import
Dict
,
List
from
typing
import
Dict
,
List
import
numpy
as
np
import
numpy
as
np
from
mmdet3d.core.bbox.structures.cam_box3d
import
CameraInstance3DBoxes
from
mmdet3d.registry
import
DATASETS
from
mmdet3d.registry
import
DATASETS
from
..core.bbox
import
LiDARInstance3DBoxes
from
..core.bbox
import
LiDARInstance3DBoxes
from
.det3d_dataset
import
Det3DDataset
from
.det3d_dataset
import
Det3DDataset
...
@@ -53,6 +55,7 @@ class NuScenesDataset(Det3DDataset):
...
@@ -53,6 +55,7 @@ class NuScenesDataset(Det3DDataset):
def
__init__
(
self
,
def
__init__
(
self
,
data_root
:
str
,
data_root
:
str
,
ann_file
:
str
,
ann_file
:
str
,
task
:
str
=
'3d'
,
pipeline
:
List
[
dict
]
=
None
,
pipeline
:
List
[
dict
]
=
None
,
box_type_3d
:
str
=
'LiDAR'
,
box_type_3d
:
str
=
'LiDAR'
,
modality
:
Dict
=
dict
(
modality
:
Dict
=
dict
(
...
@@ -66,7 +69,12 @@ class NuScenesDataset(Det3DDataset):
...
@@ -66,7 +69,12 @@ class NuScenesDataset(Det3DDataset):
**
kwargs
):
**
kwargs
):
self
.
use_valid_flag
=
use_valid_flag
self
.
use_valid_flag
=
use_valid_flag
self
.
with_velocity
=
with_velocity
self
.
with_velocity
=
with_velocity
assert
box_type_3d
.
lower
()
==
'lidar'
# TODO: Redesign multi-view data process in the future
assert
task
in
(
'3d'
,
'mono3d'
,
'multi-view'
)
self
.
task
=
task
assert
box_type_3d
.
lower
()
in
(
'lidar'
,
'camera'
)
super
().
__init__
(
super
().
__init__
(
data_root
=
data_root
,
data_root
=
data_root
,
ann_file
=
ann_file
,
ann_file
=
ann_file
,
...
@@ -97,6 +105,7 @@ class NuScenesDataset(Det3DDataset):
...
@@ -97,6 +105,7 @@ class NuScenesDataset(Det3DDataset):
anns_results
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
7
),
dtype
=
np
.
float32
)
anns_results
[
'gt_bboxes_3d'
]
=
np
.
zeros
((
0
,
7
),
dtype
=
np
.
float32
)
anns_results
[
'gt_labels_3d'
]
=
np
.
zeros
(
0
,
dtype
=
np
.
int64
)
anns_results
[
'gt_labels_3d'
]
=
np
.
zeros
(
0
,
dtype
=
np
.
int64
)
return
anns_results
return
anns_results
if
self
.
use_valid_flag
:
if
self
.
use_valid_flag
:
mask
=
ann_info
[
'bbox_3d_isvalid'
]
mask
=
ann_info
[
'bbox_3d_isvalid'
]
else
:
else
:
...
@@ -104,6 +113,22 @@ class NuScenesDataset(Det3DDataset):
...
@@ -104,6 +113,22 @@ class NuScenesDataset(Det3DDataset):
gt_bboxes_3d
=
ann_info
[
'gt_bboxes_3d'
][
mask
]
gt_bboxes_3d
=
ann_info
[
'gt_bboxes_3d'
][
mask
]
gt_labels_3d
=
ann_info
[
'gt_labels_3d'
][
mask
]
gt_labels_3d
=
ann_info
[
'gt_labels_3d'
][
mask
]
if
'gt_bboxes'
in
ann_info
:
gt_bboxes
=
ann_info
[
'gt_bboxes'
][
mask
]
gt_labels
=
ann_info
[
'gt_labels'
][
mask
]
attr_labels
=
ann_info
[
'attr_labels'
][
mask
]
else
:
gt_bboxes
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
gt_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
attr_labels
=
np
.
array
([],
dtype
=
np
.
int64
)
if
'centers_2d'
in
ann_info
:
centers_2d
=
ann_info
[
'centers_2d'
][
mask
]
depths
=
ann_info
[
'depths'
][
mask
]
else
:
centers_2d
=
np
.
zeros
((
0
,
2
),
dtype
=
np
.
float32
)
depths
=
np
.
zeros
((
0
),
dtype
=
np
.
float32
)
if
self
.
with_velocity
:
if
self
.
with_velocity
:
gt_velocity
=
ann_info
[
'velocity'
][
mask
]
gt_velocity
=
ann_info
[
'velocity'
][
mask
]
nan_mask
=
np
.
isnan
(
gt_velocity
[:,
0
])
nan_mask
=
np
.
isnan
(
gt_velocity
[:,
0
])
...
@@ -112,11 +137,82 @@ class NuScenesDataset(Det3DDataset):
...
@@ -112,11 +137,82 @@ class NuScenesDataset(Det3DDataset):
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the nuscenes box center is [0.5, 0.5, 0.5], we change it to be
# the same as KITTI (0.5, 0.5, 0)
# the same as KITTI (0.5, 0.5, 0)
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
# TODO: Unify the coordinates
gt_bboxes_3d
,
if
self
.
task
==
'mono3d'
:
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
gt_bboxes_3d
=
CameraInstance3DBoxes
(
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
))
else
:
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
gt_bboxes_3d
,
box_dim
=
gt_bboxes_3d
.
shape
[
-
1
],
origin
=
(
0.5
,
0.5
,
0.5
)).
convert_to
(
self
.
box_mode_3d
)
anns_results
=
dict
(
anns_results
=
dict
(
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
)
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
gt_bboxes
=
gt_bboxes
,
gt_labels
=
gt_labels
,
attr_labels
=
attr_labels
,
centers_2d
=
centers_2d
,
depths
=
depths
)
return
anns_results
return
anns_results
def
parse_data_info
(
self
,
info
:
dict
)
->
dict
:
"""Process the raw data info.
The only difference with it in `Det3DDataset`
is the specific process for `plane`.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if
self
.
task
==
'mono3d'
:
data_list
=
[]
if
self
.
modality
[
'use_lidar'
]:
info
[
'lidar_points'
][
'lidar_path'
]
=
\
osp
.
join
(
self
.
data_prefix
.
get
(
'pts'
,
''
),
info
[
'lidar_points'
][
'lidar_path'
])
if
self
.
modality
[
'use_camera'
]:
for
cam_id
,
img_info
in
info
[
'images'
].
items
():
if
'img_path'
in
img_info
:
if
cam_id
in
self
.
data_prefix
:
cam_prefix
=
self
.
data_prefix
[
cam_id
]
else
:
cam_prefix
=
self
.
data_prefix
.
get
(
'img'
,
''
)
img_info
[
'img_path'
]
=
osp
.
join
(
cam_prefix
,
img_info
[
'img_path'
])
for
idx
,
(
cam_id
,
img_info
)
in
enumerate
(
info
[
'images'
].
items
()):
camera_info
=
dict
()
camera_info
[
'images'
]
=
dict
()
camera_info
[
'images'
][
cam_id
]
=
img_info
if
'cam_instances'
in
info
and
cam_id
in
info
[
'cam_instances'
]:
camera_info
[
'instances'
]
=
info
[
'cam_instances'
][
cam_id
]
else
:
camera_info
[
'instances'
]
=
[]
# TODO: check whether to change sample_idx for 6 cameras
# in one frame
camera_info
[
'sample_idx'
]
=
info
[
'sample_idx'
]
*
6
+
idx
camera_info
[
'token'
]
=
info
[
'token'
]
camera_info
[
'ego2global'
]
=
info
[
'ego2global'
]
if
not
self
.
test_mode
:
# used in traing
camera_info
[
'ann_info'
]
=
self
.
parse_ann_info
(
camera_info
)
if
self
.
test_mode
and
self
.
load_eval_anns
:
camera_info
[
'eval_ann_info'
]
=
\
self
.
parse_ann_info
(
camera_info
)
data_list
.
append
(
camera_info
)
return
data_list
else
:
data_info
=
super
().
parse_data_info
(
info
)
return
data_info
mmdet3d/datasets/pipelines/formating.py
View file @
b496f579
...
@@ -122,7 +122,7 @@ class Pack3DDetInputs(BaseTransform):
...
@@ -122,7 +122,7 @@ class Pack3DDetInputs(BaseTransform):
for
key
in
[
for
key
in
[
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'proposals'
,
'gt_bboxes'
,
'gt_bboxes_ignore'
,
'gt_labels'
,
'gt_labels_3d'
,
'attr_labels'
,
'pts_instance_mask'
,
'gt_labels_3d'
,
'attr_labels'
,
'pts_instance_mask'
,
'pts_semantic_mask'
,
'centers2d'
,
'depths'
'pts_semantic_mask'
,
'centers
_
2d'
,
'depths'
]:
]:
if
key
not
in
results
:
if
key
not
in
results
:
continue
continue
...
...
mmdet3d/datasets/pipelines/loading.py
View file @
b496f579
...
@@ -86,7 +86,7 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
...
@@ -86,7 +86,7 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
:class:`LoadImageFromFile`.
:class:`LoadImageFromFile`.
"""
"""
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Call functions to load image and get image meta information.
"""Call functions to load image and get image meta information.
Args:
Args:
...
@@ -95,8 +95,32 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
...
@@ -95,8 +95,32 @@ class LoadImageFromFileMono3D(LoadImageFromFile):
Returns:
Returns:
dict: The dict contains loaded image and meta information.
dict: The dict contains loaded image and meta information.
"""
"""
super
().
__call__
(
results
)
# TODO: load different camera image from data info,
results
[
'cam2img'
]
=
results
[
'img_info'
][
'cam_intrinsic'
]
# for kitti dataset, we load 'CAM2' image.
# for nuscenes dataset, we load 'CAM_FRONT' image.
if
'CAM2'
in
results
[
'images'
]:
filename
=
results
[
'images'
][
'CAM2'
][
'img_path'
]
results
[
'cam2img'
]
=
results
[
'images'
][
'CAM2'
][
'cam2img'
]
elif
len
(
list
(
results
[
'images'
].
keys
()))
==
1
:
camera_type
=
list
(
results
[
'images'
].
keys
())[
0
]
filename
=
results
[
'images'
][
camera_type
][
'img_path'
]
results
[
'cam2img'
]
=
results
[
'images'
][
camera_type
][
'cam2img'
]
else
:
raise
NotImplementedError
(
'Currently we only support load image from kitti and'
'nuscenes datasets'
)
img_bytes
=
self
.
file_client
.
get
(
filename
)
img
=
mmcv
.
imfrombytes
(
img_bytes
,
flag
=
self
.
color_type
,
backend
=
self
.
imdecode_backend
)
if
self
.
to_float32
:
img
=
img
.
astype
(
np
.
float32
)
results
[
'img'
]
=
img
results
[
'img_shape'
]
=
img
.
shape
[:
2
]
results
[
'ori_shape'
]
=
img
.
shape
[:
2
]
return
results
return
results
...
@@ -608,6 +632,34 @@ class LoadAnnotations3D(LoadAnnotations):
...
@@ -608,6 +632,34 @@ class LoadAnnotations3D(LoadAnnotations):
self
.
with_seg_3d
=
with_seg_3d
self
.
with_seg_3d
=
with_seg_3d
self
.
seg_3d_dtype
=
seg_3d_dtype
self
.
seg_3d_dtype
=
seg_3d_dtype
def
_load_bboxes
(
self
,
results
:
dict
)
->
None
:
"""Private function to load bounding box annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
results
[
'gt_bboxes'
]
=
results
[
'ann_info'
][
'gt_bboxes'
]
def
_load_labels
(
self
,
results
:
dict
)
->
None
:
"""Private function to load label annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded label annotations.
"""
results
[
'gt_labels'
]
=
results
[
'ann_info'
][
'gt_labels'
]
def
_load_bboxes_3d
(
self
,
results
:
dict
)
->
dict
:
def
_load_bboxes_3d
(
self
,
results
:
dict
)
->
dict
:
"""Private function to move the 3D bounding box annotation from
"""Private function to move the 3D bounding box annotation from
`ann_info` field to the root of `results`.
`ann_info` field to the root of `results`.
...
...
mmdet3d/datasets/pipelines/transforms_3d.py
View file @
b496f579
...
@@ -1579,7 +1579,7 @@ class VoxelBasedPointSampler(object):
...
@@ -1579,7 +1579,7 @@ class VoxelBasedPointSampler(object):
@
TRANSFORMS
.
register_module
()
@
TRANSFORMS
.
register_module
()
class
AffineResize
(
object
):
class
AffineResize
(
BaseTransform
):
"""Get the affine transform matrices to the target size.
"""Get the affine transform matrices to the target size.
Different from :class:`RandomAffine` in MMDetection, this class can
Different from :class:`RandomAffine` in MMDetection, this class can
...
@@ -1596,13 +1596,16 @@ class AffineResize(object):
...
@@ -1596,13 +1596,16 @@ class AffineResize(object):
outside the border of the image. Defaults to True.
outside the border of the image. Defaults to True.
"""
"""
def
__init__
(
self
,
img_scale
,
down_ratio
,
bbox_clip_border
=
True
):
def
__init__
(
self
,
img_scale
:
Tuple
,
down_ratio
:
int
,
bbox_clip_border
:
bool
=
True
)
->
None
:
self
.
img_scale
=
img_scale
self
.
img_scale
=
img_scale
self
.
down_ratio
=
down_ratio
self
.
down_ratio
=
down_ratio
self
.
bbox_clip_border
=
bbox_clip_border
self
.
bbox_clip_border
=
bbox_clip_border
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Call function to do affine transform to input image and labels.
"""Call function to do affine transform to input image and labels.
Args:
Args:
...
@@ -1647,39 +1650,38 @@ class AffineResize(object):
...
@@ -1647,39 +1650,38 @@ class AffineResize(object):
results
[
'pad_shape'
]
=
img
.
shape
results
[
'pad_shape'
]
=
img
.
shape
results
[
'trans_mat'
]
=
trans_mat
results
[
'trans_mat'
]
=
trans_mat
self
.
_affine_bboxes
(
results
,
trans_affine
)
if
'gt_bboxes'
in
results
:
self
.
_affine_bboxes
(
results
,
trans_affine
)
if
'centers2d'
in
results
:
if
'centers
_
2d'
in
results
:
centers2d
=
self
.
_affine_transform
(
results
[
'centers2d'
],
centers2d
=
self
.
_affine_transform
(
results
[
'centers
_
2d'
],
trans_affine
)
trans_affine
)
valid_index
=
(
centers2d
[:,
0
]
>
valid_index
=
(
centers2d
[:,
0
]
>
0
)
&
(
centers2d
[:,
0
]
<
0
)
&
(
centers2d
[:,
0
]
<
self
.
img_scale
[
0
])
&
(
centers2d
[:,
1
]
>
0
)
&
(
self
.
img_scale
[
0
])
&
(
centers2d
[:,
1
]
>
0
)
&
(
centers2d
[:,
1
]
<
self
.
img_scale
[
1
])
centers2d
[:,
1
]
<
self
.
img_scale
[
1
])
results
[
'centers2d'
]
=
centers2d
[
valid_index
]
results
[
'centers_2d'
]
=
centers2d
[
valid_index
]
for
key
in
results
.
get
(
'bbox_fields'
,
[]):
if
'gt_bboxes'
in
results
:
if
key
in
[
'gt_bboxes'
]:
results
[
'gt_bboxes'
]
=
results
[
'gt_bboxes'
][
valid_index
]
results
[
key
]
=
results
[
key
][
valid_index
]
if
'gt_labels'
in
results
:
if
'gt_labels'
in
results
:
results
[
'gt_labels'
]
=
results
[
'gt_labels'
][
valid_index
]
results
[
'gt_labels'
]
=
results
[
'gt_labels'
][
if
'gt_masks'
in
results
:
valid_index
]
raise
NotImplementedError
(
if
'gt_masks'
in
results
:
'AffineResize only supports bbox.'
)
raise
NotImplementedError
(
'AffineResize only supports bbox.'
)
if
'gt_bboxes_3d'
in
results
:
results
[
'gt_bboxes_3d'
].
tensor
=
results
[
for
key
in
results
.
get
(
'bbox3d_fields'
,
[]):
'gt_bboxes_3d'
].
tensor
[
valid_index
]
if
key
in
[
'gt_bboxes_3d'
]:
if
'gt_labels_3d'
in
results
:
results
[
key
].
tensor
=
results
[
key
].
tensor
[
valid_index
]
results
[
'gt_labels_3d'
]
=
results
[
'gt_labels_3d'
][
if
'gt_labels_3d'
in
results
:
valid_index
]
results
[
'gt_labels_3d'
]
=
results
[
'gt_labels_3d'
][
valid_index
]
results
[
'depths'
]
=
results
[
'depths'
][
valid_index
]
results
[
'depths'
]
=
results
[
'depths'
][
valid_index
]
return
results
return
results
def
_affine_bboxes
(
self
,
results
,
matrix
)
:
def
_affine_bboxes
(
self
,
results
:
dict
,
matrix
:
np
.
ndarray
)
->
None
:
"""Affine transform bboxes to input image.
"""Affine transform bboxes to input image.
Args:
Args:
...
@@ -1689,20 +1691,18 @@ class AffineResize(object):
...
@@ -1689,20 +1691,18 @@ class AffineResize(object):
shape: (3, 3)
shape: (3, 3)
"""
"""
for
key
in
results
.
get
(
'bbox_fields'
,
[]):
bboxes
=
results
[
'gt_bboxes'
]
bboxes
=
results
[
key
]
bboxes
[:,
:
2
]
=
self
.
_affine_transform
(
bboxes
[:,
:
2
],
matrix
)
bboxes
[:,
:
2
]
=
self
.
_affine_transform
(
bboxes
[:,
:
2
],
matrix
)
bboxes
[:,
2
:]
=
self
.
_affine_transform
(
bboxes
[:,
2
:],
matrix
)
bboxes
[:,
2
:]
=
self
.
_affine_transform
(
bboxes
[:,
2
:],
matrix
)
if
self
.
bbox_clip_border
:
if
self
.
bbox_clip_border
:
bboxes
[:,
[
0
,
2
]]
=
bboxes
[:,
[
0
,
2
]].
clip
(
0
,
bboxes
[:,
self
.
img_scale
[
0
]
-
1
)
[
0
,
2
]]
=
bboxes
[:,
bboxes
[:,
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]].
clip
(
0
,
[
0
,
2
]].
clip
(
0
,
self
.
img_scale
[
0
]
-
1
)
self
.
img_scale
[
1
]
-
1
)
bboxes
[:,
results
[
'gt_bboxes'
]
=
bboxes
[
1
,
3
]]
=
bboxes
[:,
[
1
,
3
]].
clip
(
0
,
self
.
img_scale
[
1
]
-
1
)
def
_affine_transform
(
self
,
points
:
np
.
ndarray
,
results
[
key
]
=
bboxes
matrix
:
np
.
ndarray
)
->
np
.
ndarray
:
def
_affine_transform
(
self
,
points
,
matrix
):
"""Affine transform bbox points to input image.
"""Affine transform bbox points to input image.
Args:
Args:
...
@@ -1721,7 +1721,8 @@ class AffineResize(object):
...
@@ -1721,7 +1721,8 @@ class AffineResize(object):
affined_points
=
np
.
matmul
(
matrix
,
hom_points_2d
).
T
affined_points
=
np
.
matmul
(
matrix
,
hom_points_2d
).
T
return
affined_points
[:,
:
2
]
return
affined_points
[:,
:
2
]
def
_get_transform_matrix
(
self
,
center
,
scale
,
output_scale
):
def
_get_transform_matrix
(
self
,
center
:
Tuple
,
scale
:
Tuple
,
output_scale
:
Tuple
[
float
])
->
np
.
ndarray
:
"""Get affine transform matrix.
"""Get affine transform matrix.
Args:
Args:
...
@@ -1756,7 +1757,8 @@ class AffineResize(object):
...
@@ -1756,7 +1757,8 @@ class AffineResize(object):
return
matrix
.
astype
(
np
.
float32
)
return
matrix
.
astype
(
np
.
float32
)
def
_get_ref_point
(
self
,
ref_point1
,
ref_point2
):
def
_get_ref_point
(
self
,
ref_point1
:
np
.
ndarray
,
ref_point2
:
np
.
ndarray
)
->
np
.
ndarray
:
"""Get reference point to calculate affine transform matrix.
"""Get reference point to calculate affine transform matrix.
While using opencv to calculate the affine matrix, we need at least
While using opencv to calculate the affine matrix, we need at least
...
@@ -1775,7 +1777,7 @@ class AffineResize(object):
...
@@ -1775,7 +1777,7 @@ class AffineResize(object):
@
TRANSFORMS
.
register_module
()
@
TRANSFORMS
.
register_module
()
class
RandomShiftScale
(
object
):
class
RandomShiftScale
(
BaseTransform
):
"""Random shift scale.
"""Random shift scale.
Different from the normal shift and scale function, it doesn't
Different from the normal shift and scale function, it doesn't
...
@@ -1788,12 +1790,12 @@ class RandomShiftScale(object):
...
@@ -1788,12 +1790,12 @@ class RandomShiftScale(object):
aug_prob (float): The shifting and scaling probability.
aug_prob (float): The shifting and scaling probability.
"""
"""
def
__init__
(
self
,
shift_scale
,
aug_prob
):
def
__init__
(
self
,
shift_scale
:
Tuple
[
float
]
,
aug_prob
:
float
):
self
.
shift_scale
=
shift_scale
self
.
shift_scale
=
shift_scale
self
.
aug_prob
=
aug_prob
self
.
aug_prob
=
aug_prob
def
__call__
(
self
,
results
)
:
def
transform
(
self
,
results
:
dict
)
->
dict
:
"""Call function to record random shift and scale infos.
"""Call function to record random shift and scale infos.
Args:
Args:
...
...
mmdet3d/metrics/kitti_metric.py
View file @
b496f579
...
@@ -45,6 +45,7 @@ class KittiMetric(BaseMetric):
...
@@ -45,6 +45,7 @@ class KittiMetric(BaseMetric):
def
__init__
(
self
,
def
__init__
(
self
,
ann_file
:
str
,
ann_file
:
str
,
metric
:
Union
[
str
,
List
[
str
]]
=
'bbox'
,
metric
:
Union
[
str
,
List
[
str
]]
=
'bbox'
,
pred_box_type_3d
:
str
=
'LiDAR'
,
pcd_limit_range
:
List
[
float
]
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
0.0
],
pcd_limit_range
:
List
[
float
]
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
0.0
],
prefix
:
Optional
[
str
]
=
None
,
prefix
:
Optional
[
str
]
=
None
,
pklfile_prefix
:
str
=
None
,
pklfile_prefix
:
str
=
None
,
...
@@ -57,6 +58,7 @@ class KittiMetric(BaseMetric):
...
@@ -57,6 +58,7 @@ class KittiMetric(BaseMetric):
self
.
ann_file
=
ann_file
self
.
ann_file
=
ann_file
self
.
pklfile_prefix
=
pklfile_prefix
self
.
pklfile_prefix
=
pklfile_prefix
self
.
submission_prefix
=
submission_prefix
self
.
submission_prefix
=
submission_prefix
self
.
pred_box_type_3d
=
pred_box_type_3d
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
]
allowed_metrics
=
[
'bbox'
,
'img_bbox'
,
'mAP'
]
self
.
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
self
.
metrics
=
metric
if
isinstance
(
metric
,
list
)
else
[
metric
]
...
...
mmdet3d/metrics/nuscenes_metric.py
View file @
b496f579
...
@@ -7,12 +7,15 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
...
@@ -7,12 +7,15 @@ from typing import Dict, List, Optional, Sequence, Tuple, Union
import
mmcv
import
mmcv
import
numpy
as
np
import
numpy
as
np
import
pyquaternion
import
pyquaternion
import
torch
from
mmengine.evaluator
import
BaseMetric
from
mmengine.evaluator
import
BaseMetric
from
mmengine.logging
import
MMLogger
from
mmengine.logging
import
MMLogger
from
nuscenes.eval.detection.config
import
config_factory
from
nuscenes.eval.detection.config
import
config_factory
from
nuscenes.eval.detection.data_classes
import
DetectionConfig
from
nuscenes.eval.detection.data_classes
import
DetectionConfig
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
nuscenes.utils.data_classes
import
Box
as
NuScenesBox
from
mmdet3d.core
import
bbox3d2result
,
box3d_multiclass_nms
,
xywhr2xyxyr
from
mmdet3d.core.bbox
import
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
mmdet3d.registry
import
METRICS
from
mmdet3d.registry
import
METRICS
...
@@ -288,21 +291,144 @@ class NuScenesMetric(BaseMetric):
...
@@ -288,21 +291,144 @@ class NuScenesMetric(BaseMetric):
for
name
in
results
[
0
]:
for
name
in
results
[
0
]:
if
'pred'
in
name
and
'3d'
in
name
and
name
[
0
]
!=
'_'
:
if
'pred'
in
name
and
'3d'
in
name
and
name
[
0
]
!=
'_'
:
# format result of model output in Det3dDataSample,
# include 'pred_instances_3d','pts_pred_instances_3d',
# 'img_pred_instances_3d'
print
(
f
'
\n
Formating bboxes of
{
name
}
'
)
print
(
f
'
\n
Formating bboxes of
{
name
}
'
)
results_
=
[
out
[
name
]
for
out
in
results
]
results_
=
[
out
[
name
]
for
out
in
results
]
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
tmp_file_
=
osp
.
join
(
jsonfile_prefix
,
name
)
result_dict
[
name
]
=
self
.
_format_bbox
(
results_
,
sample_id_list
,
box_type_3d
=
type
(
results_
[
0
][
'bboxes_3d'
])
classes
,
tmp_file_
)
if
box_type_3d
==
LiDARInstance3DBoxes
:
result_dict
[
name
]
=
self
.
_format_lidar_bbox
(
results_
,
sample_id_list
,
classes
,
tmp_file_
)
elif
box_type_3d
==
CameraInstance3DBoxes
:
result_dict
[
name
]
=
self
.
_format_camera_bbox
(
results_
,
sample_id_list
,
classes
,
tmp_file_
)
return
result_dict
,
tmp_dir
return
result_dict
,
tmp_dir
def
_format_bbox
(
self
,
def
_format_camera_bbox
(
self
,
results
:
List
[
dict
],
results
:
List
[
dict
],
sample_id_list
:
List
[
int
],
sample_id_list
:
List
[
int
],
classes
:
List
[
str
]
=
None
,
classes
:
List
[
str
]
=
None
,
jsonfile_prefix
:
str
=
None
)
->
str
:
jsonfile_prefix
:
str
=
None
)
->
str
:
"""Convert the results to the standard format.
Args:
results (list[dict]): Testing results of the dataset.
jsonfile_prefix (str): The prefix of the output jsonfile.
You can specify the output directory/filename by
modifying the jsonfile_prefix. Default: None.
Returns:
str: Path of the output json file.
"""
nusc_annos
=
{}
print
(
'Start to convert detection format...'
)
# Camera types in Nuscenes datasets
camera_types
=
[
'CAM_FRONT'
,
'CAM_FRONT_RIGHT'
,
'CAM_FRONT_LEFT'
,
'CAM_BACK'
,
'CAM_BACK_LEFT'
,
'CAM_BACK_RIGHT'
,
]
CAM_NUM
=
6
for
i
,
det
in
enumerate
(
mmcv
.
track_iter_progress
(
results
)):
sample_id
=
sample_id_list
[
i
]
camera_type_id
=
sample_id
%
CAM_NUM
if
camera_type_id
==
0
:
boxes_per_frame
=
[]
attrs_per_frame
=
[]
# need to merge results from images of the same sample
annos
=
[]
boxes
,
attrs
=
output_to_nusc_box
(
det
)
sample_token
=
self
.
data_infos
[
sample_id
][
'token'
]
camera_type
=
camera_types
[
camera_type_id
]
boxes
,
attrs
=
cam_nusc_box_to_global
(
self
.
data_infos
[
sample_id
-
camera_type_id
],
boxes
,
attrs
,
camera_type
,
classes
,
self
.
eval_detection_configs
)
boxes_per_frame
.
extend
(
boxes
)
attrs_per_frame
.
extend
(
attrs
)
# Remove redundant predictions caused by overlap of images
if
(
sample_id
+
1
)
%
CAM_NUM
!=
0
:
continue
boxes
=
global_nusc_box_to_cam
(
self
.
data_infos
[
sample_id
+
1
-
CAM_NUM
],
boxes_per_frame
,
classes
,
self
.
eval_detection_configs
)
cam_boxes3d
,
scores
,
labels
=
nusc_box_to_cam_box3d
(
boxes
)
# box nms 3d over 6 images in a frame
# TODO: move this global setting into config
nms_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
4096
,
nms_thr
=
0.05
,
score_thr
=
0.01
,
min_bbox_size
=
0
,
max_per_frame
=
500
)
from
mmcv
import
Config
nms_cfg
=
Config
(
nms_cfg
)
cam_boxes3d_for_nms
=
xywhr2xyxyr
(
cam_boxes3d
.
bev
)
boxes3d
=
cam_boxes3d
.
tensor
# generate attr scores from attr labels
attrs
=
labels
.
new_tensor
([
attr
for
attr
in
attrs_per_frame
])
boxes3d
,
scores
,
labels
,
attrs
=
box3d_multiclass_nms
(
boxes3d
,
cam_boxes3d_for_nms
,
scores
,
nms_cfg
.
score_thr
,
nms_cfg
.
max_per_frame
,
nms_cfg
,
mlvl_attr_scores
=
attrs
)
cam_boxes3d
=
CameraInstance3DBoxes
(
boxes3d
,
box_dim
=
9
)
det
=
bbox3d2result
(
cam_boxes3d
,
scores
,
labels
,
attrs
)
boxes
,
attrs
=
output_to_nusc_box
(
det
)
boxes
,
attrs
=
cam_nusc_box_to_global
(
self
.
data_infos
[
sample_id
+
1
-
CAM_NUM
],
boxes
,
attrs
,
classes
,
self
.
eval_detection_configs
)
for
i
,
box
in
enumerate
(
boxes
):
name
=
classes
[
box
.
label
]
attr
=
self
.
get_attr_name
(
attrs
[
i
],
name
)
nusc_anno
=
dict
(
sample_token
=
sample_token
,
translation
=
box
.
center
.
tolist
(),
size
=
box
.
wlh
.
tolist
(),
rotation
=
box
.
orientation
.
elements
.
tolist
(),
velocity
=
box
.
velocity
[:
2
].
tolist
(),
detection_name
=
name
,
detection_score
=
box
.
score
,
attribute_name
=
attr
)
annos
.
append
(
nusc_anno
)
# other views results of the same frame should be concatenated
if
sample_token
in
nusc_annos
:
nusc_annos
[
sample_token
].
extend
(
annos
)
else
:
nusc_annos
[
sample_token
]
=
annos
nusc_submissions
=
{
'meta'
:
self
.
modality
,
'results'
:
nusc_annos
,
}
mmcv
.
mkdir_or_exist
(
jsonfile_prefix
)
res_path
=
osp
.
join
(
jsonfile_prefix
,
'results_nusc.json'
)
print
(
'Results writes to'
,
res_path
)
mmcv
.
dump
(
nusc_submissions
,
res_path
)
return
res_path
def
_format_lidar_bbox
(
self
,
results
:
List
[
dict
],
sample_id_list
:
List
[
int
],
classes
:
List
[
str
]
=
None
,
jsonfile_prefix
:
str
=
None
)
->
str
:
"""Convert the results to the standard format.
"""Convert the results to the standard format.
Args:
Args:
...
@@ -389,27 +515,59 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
...
@@ -389,27 +515,59 @@ def output_to_nusc_box(detection: dict) -> List[NuScenesBox]:
bbox3d
=
detection
[
'bboxes_3d'
]
bbox3d
=
detection
[
'bboxes_3d'
]
scores
=
detection
[
'scores_3d'
].
numpy
()
scores
=
detection
[
'scores_3d'
].
numpy
()
labels
=
detection
[
'labels_3d'
].
numpy
()
labels
=
detection
[
'labels_3d'
].
numpy
()
attrs
=
None
if
'attr_labels'
in
detection
:
attrs
=
detection
[
'attr_labels'
].
numpy
()
box_gravity_center
=
bbox3d
.
gravity_center
.
numpy
()
box_gravity_center
=
bbox3d
.
gravity_center
.
numpy
()
box_dims
=
bbox3d
.
dims
.
numpy
()
box_dims
=
bbox3d
.
dims
.
numpy
()
box_yaw
=
bbox3d
.
yaw
.
numpy
()
box_yaw
=
bbox3d
.
yaw
.
numpy
()
# our LiDAR coordinate system -> nuScenes box coordinate system
nus_box_dims
=
box_dims
[:,
[
1
,
0
,
2
]]
box_list
=
[]
box_list
=
[]
for
i
in
range
(
len
(
bbox3d
)):
quat
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
if
type
(
bbox3d
)
==
LiDARInstance3DBoxes
:
velocity
=
(
*
bbox3d
.
tensor
[
i
,
7
:
9
],
0.0
)
# our LiDAR coordinate system -> nuScenes box coordinate system
box
=
NuScenesBox
(
nus_box_dims
=
box_dims
[:,
[
1
,
0
,
2
]]
box_gravity_center
[
i
],
for
i
in
range
(
len
(
bbox3d
)):
nus_box_dims
[
i
],
quat
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
box_yaw
[
i
])
quat
,
velocity
=
(
*
bbox3d
.
tensor
[
i
,
7
:
9
],
0.0
)
label
=
labels
[
i
],
# velo_val = np.linalg.norm(box3d[i, 7:9])
score
=
scores
[
i
],
# velo_ori = box3d[i, 6]
velocity
=
velocity
)
# velocity = (
box_list
.
append
(
box
)
# velo_val * np.cos(velo_ori), velo_val * np.sin(velo_ori), 0.0)
return
box_list
box
=
NuScenesBox
(
box_gravity_center
[
i
],
nus_box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
velocity
=
velocity
)
box_list
.
append
(
box
)
elif
type
(
bbox3d
)
==
CameraInstance3DBoxes
:
# our Camera coordinate system -> nuScenes box coordinate system
# convert the dim/rot to nuscbox convention
nus_box_dims
=
box_dims
[:,
[
2
,
0
,
1
]]
nus_box_yaw
=
-
box_yaw
for
i
in
range
(
len
(
bbox3d
)):
q1
=
pyquaternion
.
Quaternion
(
axis
=
[
0
,
0
,
1
],
radians
=
nus_box_yaw
[
i
])
q2
=
pyquaternion
.
Quaternion
(
axis
=
[
1
,
0
,
0
],
radians
=
np
.
pi
/
2
)
quat
=
q2
*
q1
velocity
=
(
bbox3d
.
tensor
[
i
,
7
],
0.0
,
bbox3d
.
tensor
[
i
,
8
])
box
=
NuScenesBox
(
box_gravity_center
[
i
],
nus_box_dims
[
i
],
quat
,
label
=
labels
[
i
],
score
=
scores
[
i
],
velocity
=
velocity
)
box_list
.
append
(
box
)
else
:
raise
NotImplementedError
(
f
'Do not support convert
{
type
(
bbox3d
)
}
bboxes'
'to standard NuScenesBoxes.'
)
return
box_list
,
attrs
def
lidar_nusc_box_to_global
(
def
lidar_nusc_box_to_global
(
...
@@ -448,3 +606,117 @@ def lidar_nusc_box_to_global(
...
@@ -448,3 +606,117 @@ def lidar_nusc_box_to_global(
box
.
translate
(
ego2global
[:
3
,
3
])
box
.
translate
(
ego2global
[:
3
,
3
])
box_list
.
append
(
box
)
box_list
.
append
(
box
)
return
box_list
return
box_list
def
cam_nusc_box_to_global
(
info
:
dict
,
boxes
:
List
[
NuScenesBox
],
attrs
:
List
[
str
],
camera_type
:
str
,
classes
:
List
[
str
],
eval_configs
:
DetectionConfig
)
->
List
[
NuScenesBox
]:
"""Convert the box from camera to global coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
attrs (list[str]): List of attributes.
camera_type (str): Type of camera.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
attr_list
=
[]
for
(
box
,
attr
)
in
zip
(
boxes
,
attrs
):
# Move box to ego vehicle coord system
cam2ego
=
np
.
array
(
info
[
'images'
][
camera_type
][
'cam2ego'
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
cam2ego
,
rtol
=
1e-05
,
atol
=
1e-07
))
box
.
translate
(
cam2ego
[:
3
,
3
])
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to global coord system
ego2global
=
np
.
array
(
info
[
'ego2global'
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
ego2global
,
rtol
=
1e-05
,
atol
=
1e-07
))
box
.
translate
(
ego2global
[:
3
,
3
])
box_list
.
append
(
box
)
attr_list
.
append
(
attr
)
return
box_list
,
attr_list
def
global_nusc_box_to_cam
(
info
:
dict
,
boxes
:
List
[
NuScenesBox
],
classes
:
List
[
str
],
eval_configs
:
DetectionConfig
)
->
List
[
NuScenesBox
]:
"""Convert the box from global to camera coordinate.
Args:
info (dict): Info for a specific sample data, including the
calibration information.
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
classes (list[str]): Mapped classes in the evaluation.
eval_configs (object): Evaluation configuration object.
Returns:
list: List of standard NuScenesBoxes in the global
coordinate.
"""
box_list
=
[]
for
box
in
boxes
:
# Move box to ego vehicle coord system
ego2global
=
np
.
array
(
info
[
'ego2global'
])
box
.
translate
(
-
ego2global
[:
3
,
3
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
ego2global
,
rtol
=
1e-05
,
atol
=
1e-07
).
inverse
)
# filter det in ego.
cls_range_map
=
eval_configs
.
class_range
radius
=
np
.
linalg
.
norm
(
box
.
center
[:
2
],
2
)
det_range
=
cls_range_map
[
classes
[
box
.
label
]]
if
radius
>
det_range
:
continue
# Move box to camera coord system
cam2ego
=
np
.
array
(
info
[
'images'
][
'CAM_FRONT'
][
'cam2ego'
])
box
.
translate
(
-
cam2ego
[:
3
,
:
3
])
box
.
rotate
(
pyquaternion
.
Quaternion
(
matrix
=
cam2ego
,
rtol
=
1e-05
,
atol
=
1e-07
).
inverse
)
box_list
.
append
(
box
)
return
box_list
def
nusc_box_to_cam_box3d
(
boxes
:
List
[
NuScenesBox
]):
"""Convert boxes from :obj:`NuScenesBox` to :obj:`CameraInstance3DBoxes`.
Args:
boxes (list[:obj:`NuScenesBox`]): List of predicted NuScenesBoxes.
Returns:
tuple (:obj:`CameraInstance3DBoxes` | torch.Tensor | torch.Tensor):
Converted 3D bounding boxes, scores and labels.
"""
locs
=
torch
.
Tensor
([
b
.
center
for
b
in
boxes
]).
view
(
-
1
,
3
)
dims
=
torch
.
Tensor
([
b
.
wlh
for
b
in
boxes
]).
view
(
-
1
,
3
)
rots
=
torch
.
Tensor
([
b
.
orientation
.
yaw_pitch_roll
[
0
]
for
b
in
boxes
]).
view
(
-
1
,
1
)
velocity
=
torch
.
Tensor
([
b
.
velocity
[
0
::
2
]
for
b
in
boxes
]).
view
(
-
1
,
2
)
# convert nusbox to cambox convention
dims
[:,
[
0
,
1
,
2
]]
=
dims
[:,
[
1
,
2
,
0
]]
rots
=
-
rots
boxes_3d
=
torch
.
cat
([
locs
,
dims
,
rots
,
velocity
],
dim
=
1
).
cuda
()
cam_boxes3d
=
CameraInstance3DBoxes
(
boxes_3d
,
box_dim
=
9
,
origin
=
(
0.5
,
0.5
,
0.5
))
scores
=
torch
.
Tensor
([
b
.
score
for
b
in
boxes
]).
cuda
()
labels
=
torch
.
LongTensor
([
b
.
label
for
b
in
boxes
]).
cuda
()
nms_scores
=
scores
.
new_zeros
(
scores
.
shape
[
0
],
10
+
1
)
indices
=
labels
.
new_tensor
(
list
(
range
(
scores
.
shape
[
0
])))
nms_scores
[
indices
,
labels
]
=
scores
return
cam_boxes3d
,
nms_scores
,
labels
mmdet3d/models/data_preprocessors/data_preprocessor.py
View file @
b496f579
...
@@ -106,8 +106,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
...
@@ -106,8 +106,7 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
if
'points'
in
inputs_dict
[
0
].
keys
():
if
'points'
in
inputs_dict
[
0
].
keys
():
points
=
[
input
[
'points'
]
for
input
in
inputs_dict
]
points
=
[
input
[
'points'
]
for
input
in
inputs_dict
]
else
:
else
:
raise
KeyError
(
points
=
None
"Model input dict needs to include the 'points' key."
)
if
'img'
in
inputs_dict
[
0
].
keys
():
if
'img'
in
inputs_dict
[
0
].
keys
():
...
...
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
View file @
b496f579
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
abc
import
abstractmethod
from
abc
import
abstractmethod
from
typing
import
Any
,
List
,
Sequence
,
Tuple
,
Union
import
torch
import
torch
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
torch
import
Tensor
from
torch
import
nn
as
nn
from
torch
import
nn
as
nn
from
mmdet3d.core.utils
import
ConfigType
,
InstanceList
,
OptConfigType
from
mmdet3d.registry
import
MODELS
from
mmdet3d.registry
import
MODELS
from
mmdet.core
import
multi_apply
from
mmdet.core
import
multi_apply
from
..builder
import
build_loss
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
...
@@ -20,39 +21,41 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -20,39 +21,41 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
num_classes (int): Number of categories excluding the background
num_classes (int): Number of categories excluding the background
category.
category.
in_channels (int): Number of channels in the input feature map.
in_channels (int): Number of channels in the input feature map.
feat_channels (int
, optional
): Number of hidden channels.
feat_channels (int): Number of hidden channels.
Used in child classes. Defaults to 256.
Used in child classes. Defaults to 256.
stacked_convs (int, optional): Number of stacking convs of the head.
stacked_convs (int): Number of stacking convs of the head.
strides (tuple, optional): Downsample factor of each feature map.
strides (Sequence[int] or Sequence[Tuple[int, int]]): Downsample
dcn_on_last_conv (bool, optional): If true, use dcn in the last
factor of each feature map.
dcn_on_last_conv (bool): If true, use dcn in the last
layer of towers. Default: False.
layer of towers. Default: False.
conv_bias (bool
|
str
, optional
): If specified as `auto`, it will be
conv_bias (bool
or
str): If specified as `auto`, it will be
decided by the norm_cfg. Bias of conv will be set as True
decided by the norm_cfg. Bias of conv will be set as True
if `norm_cfg` is None, otherwise False. Default: 'auto'.
if `norm_cfg` is None, otherwise False. Default: 'auto'.
background_label (
int
,
o
ptional): Label ID of background,
background_label (
bool
,
O
ptional): Label ID of background,
set as 0 for RPN and num_classes for other heads.
set as 0 for RPN and num_classes for other heads.
It will automatically set as `num_classes` if None is given.
It will automatically set as `num_classes` if None is given.
use_direction_classifier (bool
, optional
):
use_direction_classifier (bool):
Whether to add a direction classifier.
Whether to add a direction classifier.
diff_rad_by_sin (bool
, optional
): Whether to change the difference
diff_rad_by_sin (bool): Whether to change the difference
into sin difference for box regression loss. Defaults to True.
into sin difference for box regression loss. Defaults to True.
dir_offset (float
, optional
): Parameter used in direction
dir_offset (float): Parameter used in direction
classification. Defaults to 0.
classification. Defaults to 0.
dir_limit_offset (float
, optional
): Parameter used in direction
dir_limit_offset (float): Parameter used in direction
classification. Defaults to 0.
classification. Defaults to 0.
loss_cls (dict, optional): Config of classification loss.
loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
loss_bbox (dict, optional): Config of localization loss.
loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
loss_dir (dict, optional): Config of direction classifier loss.
loss_dir (:obj:`ConfigDict` or dict): Config of direction classifier
loss_attr (dict, optional): Config of attribute classifier loss,
loss.
which is only active when `pred_attrs=True`.
loss_attr (:obj:`ConfigDict` or dict): Config of attribute classifier
bbox_code_size (int, optional): Dimensions of predicted bounding boxes.
loss, which is only active when `pred_attrs=True`.
pred_attrs (bool, optional): Whether to predict attributes.
bbox_code_size (int): Dimensions of predicted bounding boxes.
pred_attrs (bool): Whether to predict attributes.
Defaults to False.
Defaults to False.
num_attrs (int
, optional
): The number of attributes to be predicted.
num_attrs (int): The number of attributes to be predicted.
Default: 9.
Default: 9.
pred_velo (bool
, optional
): Whether to predict velocity.
pred_velo (bool): Whether to predict velocity.
Defaults to False.
Defaults to False.
pred_bbox2d (bool
, optional
): Whether to predict 2D boxes.
pred_bbox2d (bool): Whether to predict 2D boxes.
Defaults to False.
Defaults to False.
group_reg_dims (tuple[int], optional): The dimension of each regression
group_reg_dims (tuple[int], optional): The dimension of each regression
target group. Default: (2, 1, 3, 1, 2).
target group. Default: (2, 1, 3, 1, 2).
...
@@ -66,68 +69,77 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -66,68 +69,77 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
(64, ), # rot
(64, ), # rot
() # velo
() # velo
),
),
dir_branch (
tuple[int], optional
): Channels for direction
dir_branch (
Sequence[int]
): Channels for direction
classification branch. Default: (64, ).
classification branch. Default: (64, ).
attr_branch (
tuple[int], optional
): Channels for classification branch.
attr_branch (
Sequence[int]
): Channels for classification branch.
Default: (64, ).
Default: (64, ).
conv_cfg (dict, optional): Config dict for convolution layer.
conv_cfg (:obj:`ConfigDict` or dict, Optional): Config dict for
Default: None.
convolution layer. Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
norm_cfg (:obj:`ConfigDict` or dict, Optional): Config dict for
Default: None.
normalization layer. Default: None.
train_cfg (dict, optional): Training config of anchor head.
train_cfg (:obj:`ConfigDict` or dict, Optional): Training config
test_cfg (dict, optional): Testing config of anchor head.
of anchor head.
test_cfg (:obj:`ConfigDict` or dict, Optional): Testing config of
anchor head.
init_cfg (:obj:`ConfigDict` or dict or list[:obj:`ConfigDict` or
\
dict]): Initialization config dict.
"""
# noqa: W605
"""
# noqa: W605
_version
=
1
_version
=
1
def
__init__
(
def
__init__
(
self
,
self
,
num_classes
,
num_classes
:
int
,
in_channels
,
in_channels
:
int
,
feat_channels
=
256
,
feat_channels
:
int
=
256
,
stacked_convs
=
4
,
stacked_convs
:
int
=
4
,
strides
=
(
4
,
8
,
16
,
32
,
64
),
strides
:
Sequence
[
int
]
=
(
4
,
8
,
16
,
32
,
64
),
dcn_on_last_conv
=
False
,
dcn_on_last_conv
:
bool
=
False
,
conv_bias
=
'auto'
,
conv_bias
:
Union
[
bool
,
str
]
=
'auto'
,
background_label
=
None
,
background_label
:
bool
=
None
,
use_direction_classifier
=
True
,
use_direction_classifier
:
bool
=
True
,
diff_rad_by_sin
=
True
,
diff_rad_by_sin
:
bool
=
True
,
dir_offset
=
0
,
dir_offset
:
int
=
0
,
dir_limit_offset
=
0
,
dir_limit_offset
:
int
=
0
,
loss_cls
=
dict
(
loss_cls
:
ConfigType
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
use_sigmoid
=
True
,
gamma
=
2.0
,
gamma
=
2.0
,
alpha
=
0.25
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
loss_bbox
=
dict
(
loss_bbox
:
ConfigType
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
loss_dir
:
ConfigType
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
type
=
'mmdet.CrossEntropyLoss'
,
loss_attr
=
dict
(
use_sigmoid
=
False
,
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_weight
=
1.0
),
bbox_code_size
=
9
,
# For nuscenes
loss_attr
:
ConfigType
=
dict
(
pred_attrs
=
False
,
type
=
'mmdet.CrossEntropyLoss'
,
num_attrs
=
9
,
# For nuscenes
use_sigmoid
=
False
,
pred_velo
=
False
,
loss_weight
=
1.0
),
pred_bbox2d
=
False
,
bbox_code_size
:
int
=
9
,
# For nuscenes
group_reg_dims
=
(
2
,
1
,
3
,
1
,
2
),
# offset, depth, size, rot, velo,
pred_attrs
:
bool
=
False
,
cls_branch
=
(
128
,
64
),
num_attrs
:
int
=
9
,
# For nuscenes
reg_branch
=
(
pred_velo
:
bool
=
False
,
pred_bbox2d
:
bool
=
False
,
group_reg_dims
:
Sequence
[
int
]
=
(
2
,
1
,
3
,
1
,
2
),
# offset, depth, size, rot, velo,
cls_branch
:
Sequence
[
int
]
=
(
128
,
64
),
reg_branch
:
Sequence
[
Tuple
[
int
,
int
]]
=
(
(
128
,
64
),
# offset
(
128
,
64
),
# offset
(
128
,
64
),
# depth
(
128
,
64
),
# depth
(
64
,
),
# size
(
64
,
),
# size
(
64
,
),
# rot
(
64
,
),
# rot
()
# velo
()
# velo
),
),
dir_branch
=
(
64
,
),
dir_branch
:
Sequence
[
int
]
=
(
64
,
),
attr_branch
=
(
64
,
),
attr_branch
:
Sequence
[
int
]
=
(
64
,
),
conv_cfg
=
None
,
conv_cfg
:
OptConfigType
=
None
,
norm_cfg
=
None
,
norm_cfg
:
OptConfigType
=
None
,
train_cfg
=
None
,
train_cfg
:
OptConfigType
=
None
,
test_cfg
=
None
,
test_cfg
:
OptConfigType
=
None
,
init_cfg
=
None
)
:
init_cfg
:
OptConfigType
=
None
)
->
None
:
super
(
AnchorFreeMono3DHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
num_classes
=
num_classes
self
.
cls_out_channels
=
num_classes
self
.
cls_out_channels
=
num_classes
self
.
in_channels
=
in_channels
self
.
in_channels
=
in_channels
...
@@ -141,9 +153,9 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -141,9 +153,9 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
diff_rad_by_sin
=
diff_rad_by_sin
self
.
diff_rad_by_sin
=
diff_rad_by_sin
self
.
dir_offset
=
dir_offset
self
.
dir_offset
=
dir_offset
self
.
dir_limit_offset
=
dir_limit_offset
self
.
dir_limit_offset
=
dir_limit_offset
self
.
loss_cls
=
build
_loss
(
loss_cls
)
self
.
loss_cls
=
MODELS
.
build
(
loss_cls
)
self
.
loss_bbox
=
build
_loss
(
loss_bbox
)
self
.
loss_bbox
=
MODELS
.
build
(
loss_bbox
)
self
.
loss_dir
=
build
_loss
(
loss_dir
)
self
.
loss_dir
=
MODELS
.
build
(
loss_dir
)
self
.
bbox_code_size
=
bbox_code_size
self
.
bbox_code_size
=
bbox_code_size
self
.
group_reg_dims
=
list
(
group_reg_dims
)
self
.
group_reg_dims
=
list
(
group_reg_dims
)
self
.
cls_branch
=
cls_branch
self
.
cls_branch
=
cls_branch
...
@@ -174,7 +186,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -174,7 +186,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
num_attrs
=
num_attrs
self
.
num_attrs
=
num_attrs
if
self
.
pred_attrs
:
if
self
.
pred_attrs
:
self
.
attr_background_label
=
num_attrs
self
.
attr_background_label
=
num_attrs
self
.
loss_attr
=
build
_loss
(
loss_attr
)
self
.
loss_attr
=
MODELS
.
build
(
loss_attr
)
self
.
attr_branch
=
attr_branch
self
.
attr_branch
=
attr_branch
self
.
_init_layers
()
self
.
_init_layers
()
...
@@ -316,11 +328,13 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -316,11 +328,13 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
if
self
.
pred_attrs
:
if
self
.
pred_attrs
:
normal_init
(
self
.
conv_attr
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
conv_attr
,
std
=
0.01
,
bias
=
bias_cls
)
def
forward
(
self
,
feats
):
def
forward
(
self
,
x
:
Tuple
[
Tensor
]
)
->
Tuple
[
List
[
Tensor
],
List
[
Tensor
],
List
[
Tensor
],
List
[
Tensor
]]:
"""Forward features from the upstream network.
"""Forward features from the upstream network.
Args:
Args:
feats
(tuple[Tensor]): Features from the upstream network, each is
x
(tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
a 4D-tensor.
Returns:
Returns:
...
@@ -339,9 +353,9 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -339,9 +353,9 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
level, each is a 4D-tensor, the channel number is
level, each is a 4D-tensor, the channel number is
num_points * num_attrs.
num_points * num_attrs.
"""
"""
return
multi_apply
(
self
.
forward_single
,
feats
)[:
5
]
return
multi_apply
(
self
.
forward_single
,
x
)[:
5
]
def
forward_single
(
self
,
x
)
:
def
forward_single
(
self
,
x
:
Tensor
)
->
Tuple
[
Tensor
,
...]
:
"""Forward features of a single scale level.
"""Forward features of a single scale level.
Args:
Args:
...
@@ -394,77 +408,8 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -394,77 +408,8 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
reg_feat
reg_feat
@
abstractmethod
@
abstractmethod
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
))
def
get_targets
(
self
,
points
:
List
[
Tensor
],
def
loss
(
self
,
batch_gt_instances
:
InstanceList
)
->
Any
:
cls_scores
,
bbox_preds
,
dir_cls_preds
,
attr_preds
,
batch_gt_instances_3d
,
batch_img_metas
,
batch_gt_instances_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
attr_preds (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_attrs.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels3d``、``depths``、``centers2d`` and
attributes.
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
Batch of gt_instances_ignore. It includes ``bboxes`` attribute
data that is ignored during training and testing.
Defaults to None.
"""
raise
NotImplementedError
@
abstractmethod
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
))
def
get_results
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
attr_preds
,
batch_img_metas
,
cfg
=
None
,
rescale
=
None
):
"""Transform network output for a batch into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_points * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_points * bbox_code_size, H, W)
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
attr_preds (list[Tensor]): Attribute scores for each scale level
Has shape (N, num_points * num_attrs, H, W)
batch_img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used
rescale (bool): If True, return boxes in original image space
"""
raise
NotImplementedError
@
abstractmethod
def
get_targets
(
self
,
points
,
batch_gt_instances_3d
):
"""Compute regression, classification and centerss targets for points
"""Compute regression, classification and centerss targets for points
in multiple images.
in multiple images.
...
@@ -473,18 +418,32 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -473,18 +418,32 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
(num_points, 2).
(num_points, 2).
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instance_3d. It usually includes ``bboxes``、``labels``
gt_instance_3d. It usually includes ``bboxes``、``labels``
、``bboxes_3d``、``labels3d``、``depths``、``centers2d``
and
、``bboxes_3d``、``labels
_
3d``、``depths``、``centers
_
2d``
attributes.
and
attributes.
"""
"""
raise
NotImplementedError
raise
NotImplementedError
# TODO: Refactor using MlvlPointGenerator in MMDet.
def
_get_points_single
(
self
,
def
_get_points_single
(
self
,
featmap_size
,
featmap_size
:
Tuple
[
int
],
stride
,
stride
:
int
,
dtype
,
dtype
:
torch
.
dtype
,
device
,
device
:
torch
.
device
,
flatten
=
False
):
flatten
:
bool
=
False
)
->
Tuple
[
Tensor
,
Tensor
]:
"""Get points of a single scale level."""
"""Get points of a single scale level.
Args:
featmap_size (tuple[int]): Single scale level feature map
size.
stride (int): Downsample factor of the feature map.
dtype (torch.dtype): Type of points.
device (torch.device): Device of points.
flatten (bool): Whether to flatten the tensor.
Defaults to False.
Returns:
tuple: points of each image.
"""
h
,
w
=
featmap_size
h
,
w
=
featmap_size
x_range
=
torch
.
arange
(
w
,
dtype
=
dtype
,
device
=
device
)
x_range
=
torch
.
arange
(
w
,
dtype
=
dtype
,
device
=
device
)
y_range
=
torch
.
arange
(
h
,
dtype
=
dtype
,
device
=
device
)
y_range
=
torch
.
arange
(
h
,
dtype
=
dtype
,
device
=
device
)
...
@@ -494,16 +453,23 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
...
@@ -494,16 +453,23 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
x
=
x
.
flatten
()
x
=
x
.
flatten
()
return
y
,
x
return
y
,
x
def
get_points
(
self
,
featmap_sizes
,
dtype
,
device
,
flatten
=
False
):
# TODO: Refactor using MlvlPointGenerator in MMDet.
def
get_points
(
self
,
featmap_sizes
:
List
[
Tuple
[
int
]],
dtype
:
torch
.
dtype
,
device
:
torch
.
device
,
flatten
:
bool
=
False
)
->
List
[
Tuple
[
Tensor
,
Tensor
]]:
"""Get points according to feature map sizes.
"""Get points according to feature map sizes.
Args:
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
featmap_sizes (list[tuple]): Multi-level feature map sizes.
dtype (torch.dtype): Type of points.
dtype (torch.dtype): Type of points.
device (torch.device): Device of points.
device (torch.device): Device of points.
flatten (bool): Whether to flatten the tensor.
Defaults to False.
Returns:
Returns:
tuple: points of each image.
list[
tuple
]
: points of each image.
"""
"""
mlvl_points
=
[]
mlvl_points
=
[]
for
i
in
range
(
len
(
featmap_sizes
)):
for
i
in
range
(
len
(
featmap_sizes
)):
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment