Commit 7aa442d5 authored by raojy's avatar raojy
Browse files

raw_mmdetection

parent 9c03eaa8
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.optim.sgd import SGD
# training schedule for 1x
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=12, val_interval=1)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
# learning rate
param_scheduler = [
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=500),
dict(
type=MultiStepLR,
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)
]
# optimizer
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=SGD, lr=0.02, momentum=0.9, weight_decay=0.0001))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (2 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=16)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.optim.adamw import AdamW
# optimizer
# This schedule is mainly used by models on nuScenes dataset
lr = 0.001
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=lr, weight_decay=0.01),
# max_norm=10 is better for SECOND
clip_grad=dict(max_norm=35, norm_type=2))
# training schedule for 2x
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=24, val_interval=24)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
# learning rate
param_scheduler = [
dict(
type=LinearLR,
start_factor=1.0 / 1000,
by_epoch=False,
begin=0,
end=1000),
dict(
type=MultiStepLR,
begin=0,
end=24,
by_epoch=True,
milestones=[20, 23],
gamma=0.1)
]
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import MultiStepLR
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.optim.adamw import AdamW
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr = 0.008 # max learning rate
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=AdamW, lr=lr, weight_decay=0.01),
clip_grad=dict(max_norm=10, norm_type=2),
)
# training schedule for 3x
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=36, val_interval=1)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
# learning rate
param_scheduler = [
dict(
type=MultiStepLR,
begin=0,
end=36,
by_epoch=True,
milestones=[24, 32],
gamma=0.1)
]
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (4 GPUs) x (8 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR
from torch.optim.sgd import SGD
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.001),
clip_grad=None)
param_scheduler = [
dict(
type=CosineAnnealingLR,
T_max=100,
eta_min=1e-5,
by_epoch=True,
begin=0,
end=100)
]
# runtime settings
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (4 GPUs) x (32 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=128)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR
from torch.optim.sgd import SGD
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=SGD, lr=0.2, momentum=0.9, weight_decay=0.0001),
clip_grad=None)
param_scheduler = [
dict(
type=CosineAnnealingLR,
T_max=150,
eta_min=0.002,
by_epoch=True,
begin=0,
end=150)
]
# runtime settings
train_cfg = dict(by_epoch=True, max_epochs=150, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=64)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR
from torch.optim.adam import Adam
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=Adam, lr=0.001, weight_decay=0.01),
clip_grad=None)
param_scheduler = [
dict(
type=CosineAnnealingLR,
T_max=200,
eta_min=1e-5,
by_epoch=True,
begin=0,
end=200)
]
# runtime settings
train_cfg = dict(by_epoch=True, max_epochs=200, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (2 GPUs) x (16 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR
from torch.optim.adam import Adam
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optim_wrapper = dict(
type=OptimWrapper,
optimizer=dict(type=Adam, lr=0.001, weight_decay=0.001),
clip_grad=None)
param_scheduler = [
dict(
type=CosineAnnealingLR,
T_max=50,
eta_min=1e-5,
by_epoch=True,
begin=0,
end=50)
]
# runtime settings
train_cfg = dict(by_epoch=True, max_epochs=50, val_interval=1)
val_cfg = dict()
test_cfg = dict()
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (2 GPUs) x (16 samples per GPU).
auto_scale_lr = dict(enable=False, base_batch_size=32)
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine.config import read_base
with read_base():
from .._base_.datasets.nus_3d import *
from .._base_.models.centerpoint_pillar02_second_secfpn_nus import *
from .._base_.schedules.cyclic_20e import *
from .._base_.default_runtime import *
from mmengine.dataset.sampler import DefaultSampler
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
from mmdet3d.datasets.nuscenes_dataset import NuScenesDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile,
LoadPointsFromMultiSweeps)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import ( # noqa
GlobalRotScaleTrans, ObjectNameFilter, ObjectRangeFilter, ObjectSample,
PointShuffle, PointsRangeFilter, RandomFlip3D)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix.update(
dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP'))
model.update(
dict(
data_preprocessor=dict(
voxel_layer=dict(point_cloud_range=point_cloud_range)),
pts_voxel_encoder=dict(point_cloud_range=point_cloud_range),
pts_bbox_head=dict(bbox_coder=dict(pc_range=point_cloud_range[:2])),
# model training and testing settings
train_cfg=dict(pts=dict(point_cloud_range=point_cloud_range)),
test_cfg=dict(pts=dict(pc_range=point_cloud_range[:2]))))
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
backend_args = None
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
car=5,
truck=5,
bus=5,
trailer=5,
construction_vehicle=5,
traffic_cone=5,
barrier=5,
motorcycle=5,
bicycle=5,
pedestrian=5)),
classes=class_names,
sample_groups=dict(
car=2,
truck=3,
construction_vehicle=7,
bus=4,
trailer=6,
barrier=2,
motorcycle=6,
bicycle=6,
pedestrian=2,
traffic_cone=2),
points_loader=dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type=LoadPointsFromMultiSweeps,
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(type=LoadAnnotations3D, with_bbox_3d=True, with_label_3d=True),
dict(type=ObjectSample, db_sampler=db_sampler),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectNameFilter, classes=class_names),
dict(type=PointShuffle),
dict(
type=Pack3DDetInputs, keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type=LoadPointsFromMultiSweeps,
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type=RandomFlip3D)
]),
dict(type=Pack3DDetInputs, keys=['points'])
]
train_dataloader.merge(
dict(
_delete_=True,
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=CBGSDataset,
dataset=dict(
type=NuScenesDataset,
data_root=data_root,
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
metainfo=dict(classes=class_names),
test_mode=False,
data_prefix=data_prefix,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
backend_args=backend_args))))
test_dataloader.update(
dict(
dataset=dict(
pipeline=test_pipeline, metainfo=dict(classes=class_names))))
val_dataloader.update(
dict(
dataset=dict(
pipeline=test_pipeline, metainfo=dict(classes=class_names))))
train_cfg.update(dict(val_interval=20))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.datasets.nus_3d import *
from .._base_.models.centerpoint_voxel01_second_secfpn_nus import *
from .._base_.schedules.cyclic_20e import *
from .._base_.default_runtime import *
from mmengine.dataset.sampler import DefaultSampler
from mmdet3d.datasets.dataset_wrappers import CBGSDataset
from mmdet3d.datasets.nuscenes_dataset import NuScenesDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile,
LoadPointsFromMultiSweeps)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import ( # noqa
GlobalRotScaleTrans, ObjectNameFilter, ObjectRangeFilter, ObjectSample,
PointShuffle, PointsRangeFilter, RandomFlip3D)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range = [-51.2, -51.2, -5.0, 51.2, 51.2, 3.0]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names = [
'car', 'truck', 'construction_vehicle', 'bus', 'trailer', 'barrier',
'motorcycle', 'bicycle', 'pedestrian', 'traffic_cone'
]
data_prefix.update(
dict(pts='samples/LIDAR_TOP', img='', sweeps='sweeps/LIDAR_TOP'))
model.update(
dict(
data_preprocessor=dict(
voxel_layer=dict(point_cloud_range=point_cloud_range)),
pts_bbox_head=dict(bbox_coder=dict(pc_range=point_cloud_range[:2])),
# model training and testing settings
train_cfg=dict(pts=dict(point_cloud_range=point_cloud_range)),
test_cfg=dict(pts=dict(pc_range=point_cloud_range[:2]))))
dataset_type = 'NuScenesDataset'
data_root = 'data/nuscenes/'
backend_args = None
db_sampler = dict(
data_root=data_root,
info_path=data_root + 'nuscenes_dbinfos_train.pkl',
rate=1.0,
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(
car=5,
truck=5,
bus=5,
trailer=5,
construction_vehicle=5,
traffic_cone=5,
barrier=5,
motorcycle=5,
bicycle=5,
pedestrian=5)),
classes=class_names,
sample_groups=dict(
car=2,
truck=3,
construction_vehicle=7,
bus=4,
trailer=6,
barrier=2,
motorcycle=6,
bicycle=6,
pedestrian=2,
traffic_cone=2),
points_loader=dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=5,
use_dim=[0, 1, 2, 3, 4],
backend_args=backend_args),
backend_args=backend_args)
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type=LoadPointsFromMultiSweeps,
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(type=LoadAnnotations3D, with_bbox_3d=True, with_label_3d=True),
dict(type=ObjectSample, db_sampler=db_sampler),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.3925, 0.3925],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0]),
dict(
type=RandomFlip3D,
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectNameFilter, classes=class_names),
dict(type=PointShuffle),
dict(
type=Pack3DDetInputs, keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=5,
use_dim=5,
backend_args=backend_args),
dict(
type=LoadPointsFromMultiSweeps,
sweeps_num=9,
use_dim=[0, 1, 2, 3, 4],
pad_empty_sweeps=True,
remove_close=True,
backend_args=backend_args),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type=RandomFlip3D),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range)
]),
dict(type=Pack3DDetInputs, keys=['points'])
]
train_dataloader.merge(
dict(
_delete_=True,
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=CBGSDataset,
dataset=dict(
type=NuScenesDataset,
data_root=data_root,
ann_file='nuscenes_infos_train.pkl',
pipeline=train_pipeline,
metainfo=dict(classes=class_names),
test_mode=False,
data_prefix=data_prefix,
use_valid_flag=True,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
backend_args=backend_args))))
test_dataloader.update(
dict(
dataset=dict(
pipeline=test_pipeline, metainfo=dict(classes=class_names))))
val_dataloader.update(
dict(
dataset=dict(
pipeline=test_pipeline, metainfo=dict(classes=class_names))))
train_cfg.update(dict(val_interval=20))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.datasets.semantickitti import *
from .._base_.models.cylinder3d import *
from .._base_.default_runtime import *
from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
from mmengine.runner.loops import EpochBasedTrainLoop, TestLoop, ValLoop
from torch.optim import AdamW
# optimizer
lr = 0.001
optim_wrapper = dict(
type=OptimWrapper, optimizer=dict(type=AdamW, lr=lr, weight_decay=0.01))
train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=36, val_interval=1)
val_cfg = dict(type=ValLoop)
test_cfg = dict(type=TestLoop)
# learning rate
param_scheduler = [
dict(type=LinearLR, start_factor=0.001, by_epoch=False, begin=0, end=1000),
dict(
type=MultiStepLR,
begin=0,
end=36,
by_epoch=True,
milestones=[30],
gamma=0.1)
]
train_dataloader.update(dict(batch_size=4, ))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (4 samples per GPU).
# auto_scale_lr = dict(enable=False, base_batch_size=32)
default_hooks.update(dict(checkpoint=dict(type=CheckpointHook, interval=5)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.datasets.semantickitti import *
from .._base_.default_runtime import *
from .._base_.models.cylinder3d import *
from .._base_.schedules.schedule_3x import *
from mmcv.transforms.wrappers import RandomChoice
from mmdet3d.datasets.transforms.transforms_3d import LaserMix, PolarMix
train_pipeline = [
dict(type=LoadPointsFromFile, coord_type='LIDAR', load_dim=4, use_dim=4),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type=PointSegClassMapping),
dict(
type=RandomChoice,
transforms=[
[
dict(
type=LaserMix,
num_areas=[3, 4, 5, 6],
pitch_angles=[-25, 3],
pre_transform=[
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type=PointSegClassMapping)
],
prob=1)
],
[
dict(
type=PolarMix,
instance_classes=[0, 1, 2, 3, 4, 5, 6, 7],
swap_ratio=0.5,
rotate_paste_ratio=1.0,
pre_transform=[
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type=PointSegClassMapping)
],
prob=1)
],
],
prob=[0.5, 0.5]),
dict(
type=GlobalRotScaleTrans,
rot_range=[0., 6.28318531],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0],
),
dict(type=Pack3DDetInputs, keys=['points', 'pts_semantic_mask'])
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
default_hooks.update(dict(checkpoint=dict(type=CheckpointHook, interval=1)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.datasets.semantickitti import *
from .._base_.models.minkunet import *
from .._base_.schedules.schedule_3x import *
from .._base_.default_runtime import *
from mmcv.transforms.wrappers import RandomChoice
from mmengine.hooks.checkpoint_hook import CheckpointHook
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile,
PointSegClassMapping)
from mmdet3d.datasets.transforms.transforms_3d import (GlobalRotScaleTrans,
LaserMix, PolarMix)
model.update(
dict(
data_preprocessor=dict(max_voxels=None),
backbone=dict(encoder_blocks=[2, 3, 4, 6])))
train_pipeline = [
dict(type=LoadPointsFromFile, coord_type='LIDAR', load_dim=4, use_dim=4),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type=PointSegClassMapping),
dict(
type=RandomChoice,
transforms=[
[
dict(
type=LaserMix,
num_areas=[3, 4, 5, 6],
pitch_angles=[-25, 3],
pre_transform=[
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type=PointSegClassMapping)
],
prob=1)
],
[
dict(
type=PolarMix,
instance_classes=[0, 1, 2, 3, 4, 5, 6, 7],
swap_ratio=0.5,
rotate_paste_ratio=1.0,
pre_transform=[
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(
type=LoadAnnotations3D,
with_bbox_3d=False,
with_label_3d=False,
with_seg_3d=True,
seg_3d_dtype='np.int32',
seg_offset=2**16,
dataset_type='semantickitti'),
dict(type=PointSegClassMapping)
],
prob=1)
],
],
prob=[0.5, 0.5]),
dict(
type=GlobalRotScaleTrans,
rot_range=[0., 6.28318531],
scale_ratio_range=[0.95, 1.05],
translation_std=[0, 0, 0],
),
dict(type=Pack3DDetInputs, keys=['points', 'pts_semantic_mask'])
]
train_dataloader.update(dict(dataset=dict(pipeline=train_pipeline)))
default_hooks.update(dict(checkpoint=dict(type=CheckpointHook, interval=1)))
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.schedules.cosine import *
from .._base_.default_runtime import *
from mmcv.transforms.loading import LoadImageFromFile
from mmcv.transforms.processing import RandomResize, Resize
from mmengine.dataset.dataset_wrapper import RepeatDataset
from mmengine.dataset.sampler import DefaultSampler
from mmengine.visualization.vis_backend import LocalVisBackend
from mmdet3d.datasets.kitti_dataset import KittiDataset
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadPointsFromFile)
from mmdet3d.datasets.transforms.test_time_aug import MultiScaleFlipAug3D
from mmdet3d.datasets.transforms.transforms_3d import (GlobalRotScaleTrans,
ObjectRangeFilter,
PointShuffle,
PointsRangeFilter,
RandomFlip3D)
from mmdet3d.evaluation.metrics.kitti_metric import KittiMetric
from mmdet3d.models.backbones.second import SECOND
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.dense_heads.anchor3d_head import Anchor3DHead
from mmdet3d.models.detectors.mvx_faster_rcnn import DynamicMVXFasterRCNN
from mmdet3d.models.layers.fusion_layers.point_fusion import PointFusion
from mmdet3d.models.middle_encoders.sparse_encoder import SparseEncoder
from mmdet3d.models.necks.second_fpn import SECONDFPN
from mmdet3d.models.task_modules.anchor.anchor_3d_generator import \
Anchor3DRangeGenerator
from mmdet3d.models.task_modules.assigners.max_3d_iou_assigner import \
Max3DIoUAssigner
from mmdet3d.models.task_modules.coders.delta_xyzwhlr_bbox_coder import \
DeltaXYZWLHRBBoxCoder
from mmdet3d.models.voxel_encoders.voxel_encoder import DynamicVFE
from mmdet3d.structures.ops.iou3d_calculator import BboxOverlapsNearest3D
from mmdet3d.visualization.local_visualizer import Det3DLocalVisualizer
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model = dict(
type=DynamicMVXFasterRCNN,
data_preprocessor=dict(
type=Det3DDataPreprocessor,
voxel=True,
voxel_type='dynamic',
voxel_layer=dict(
max_num_points=-1,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(-1, -1)),
mean=[102.9801, 115.9465, 122.7717],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
img_backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='caffe'),
img_neck=dict(
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
# make the image features more stable numerically to avoid loss nan
norm_cfg=dict(type='BN', requires_grad=False),
num_outs=5),
pts_voxel_encoder=dict(
type=DynamicVFE,
in_channels=4,
feat_channels=[64, 64],
with_distance=False,
voxel_size=voxel_size,
with_cluster_center=True,
with_voxel_center=True,
point_cloud_range=point_cloud_range,
fusion_layer=dict(
type=PointFusion,
img_channels=256,
pts_channels=64,
mid_channels=128,
out_channels=128,
img_levels=[0, 1, 2, 3, 4],
align_corners=False,
activate_out=True,
fuse_out=False)),
pts_middle_encoder=dict(
type=SparseEncoder,
in_channels=128,
sparse_shape=[41, 1600, 1408],
order=('conv', 'norm', 'act')),
pts_backbone=dict(
type=SECOND,
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
pts_neck=dict(
type=SECONDFPN,
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
pts_bbox_head=dict(
type=Anchor3DHead,
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
anchor_generator=dict(
type=Anchor3DRangeGenerator,
ranges=[
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -0.6, 70.4, 40.0, -0.6],
[0, -40.0, -1.78, 70.4, 40.0, -1.78],
],
sizes=[[0.8, 0.6, 1.73], [1.76, 0.6, 1.73], [3.9, 1.6, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
assigner_per_size=True,
diff_rad_by_sin=True,
assign_per_class=True,
bbox_coder=dict(type=DeltaXYZWLHRBBoxCoder),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
# model training and testing settings
train_cfg=dict(
pts=dict(
assigner=[
dict( # for Pedestrian
type=Max3DIoUAssigner,
iou_calculator=dict(type=BboxOverlapsNearest3D),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Cyclist
type=Max3DIoUAssigner,
iou_calculator=dict(type=BboxOverlapsNearest3D),
pos_iou_thr=0.35,
neg_iou_thr=0.2,
min_pos_iou=0.2,
ignore_iof_thr=-1),
dict( # for Car
type=Max3DIoUAssigner,
iou_calculator=dict(type=BboxOverlapsNearest3D),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
],
allowed_border=0,
pos_weight=-1,
debug=False)),
test_cfg=dict(
pts=dict(
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.01,
score_thr=0.1,
min_bbox_size=0,
nms_pre=100,
max_num=50)))
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
metainfo = dict(classes=class_names)
input_modality = dict(use_lidar=True, use_camera=True)
backend_args = None
train_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(type=LoadAnnotations3D, with_bbox_3d=True, with_label_3d=True),
dict(type=RandomResize, scale=[(640, 192), (2560, 768)], keep_ratio=True),
dict(
type=GlobalRotScaleTrans,
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05],
translation_std=[0.2, 0.2, 0.2]),
dict(type=RandomFlip3D, flip_ratio_bev_horizontal=0.5),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range),
dict(type=ObjectRangeFilter, point_cloud_range=point_cloud_range),
dict(type=PointShuffle),
dict(
type=Pack3DDetInputs,
keys=[
'points', 'img', 'gt_bboxes_3d', 'gt_labels_3d', 'gt_bboxes',
'gt_labels'
])
]
test_pipeline = [
dict(
type=LoadPointsFromFile,
coord_type='LIDAR',
load_dim=4,
use_dim=4,
backend_args=backend_args),
dict(type=LoadImageFromFile, backend_args=backend_args),
dict(
type=MultiScaleFlipAug3D,
img_scale=(1280, 384),
pts_scale_ratio=1,
flip=False,
transforms=[
# Temporary solution, fix this after refactor the augtest
dict(type=Resize, scale=0, keep_ratio=True),
dict(
type=GlobalRotScaleTrans,
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type=RandomFlip3D),
dict(type=PointsRangeFilter, point_cloud_range=point_cloud_range),
]),
dict(type=Pack3DDetInputs, keys=['points', 'img'])
]
modality = dict(use_lidar=True, use_camera=True)
train_dataloader = dict(
batch_size=2,
num_workers=2,
sampler=dict(type=DefaultSampler, shuffle=True),
dataset=dict(
type=RepeatDataset,
times=2,
dataset=dict(
type=KittiDataset,
data_root=data_root,
modality=modality,
ann_file='kitti_infos_train.pkl',
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=train_pipeline,
filter_empty_gt=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='LiDAR',
backend_args=backend_args)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=KittiDataset,
data_root=data_root,
modality=modality,
ann_file='kitti_infos_val.pkl',
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='LiDAR',
backend_args=backend_args))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type=DefaultSampler, shuffle=False),
dataset=dict(
type=KittiDataset,
data_root=data_root,
ann_file='kitti_infos_val.pkl',
modality=modality,
data_prefix=dict(
pts='training/velodyne_reduced', img='training/image_2'),
pipeline=test_pipeline,
metainfo=metainfo,
test_mode=True,
box_type_3d='LiDAR',
backend_args=backend_args))
optim_wrapper.update(
dict(
optimizer=dict(weight_decay=0.01),
clip_grad=dict(max_norm=35, norm_type=2),
))
val_evaluator = dict(
type=KittiMetric, ann_file='data/kitti/kitti_infos_val.pkl')
test_evaluator = val_evaluator
vis_backends = [dict(type=LocalVisBackend)]
visualizer = dict(
type=Det3DLocalVisualizer, vis_backends=vis_backends, name='visualizer')
# You may need to download the model first is the network is unstable
load_from = 'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth' # noqa
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.datasets.kitti_mono3d import *
from .._base_.models.pgd import *
from .._base_.schedules.mmdet_schedule_1x import *
from .._base_.default_runtime import *
from mmcv.transforms.processing import Resize
from mmengine.optim.scheduler.lr_scheduler import LinearLR, MultiStepLR
from mmdet3d.datasets.transforms.formating import Pack3DDetInputs
from mmdet3d.datasets.transforms.loading import (LoadAnnotations3D,
LoadImageFromFileMono3D)
from mmdet3d.datasets.transforms.transforms_3d import RandomFlip3D
from mmdet3d.models.data_preprocessors.data_preprocessor import \
Det3DDataPreprocessor
from mmdet3d.models.losses.uncertain_smooth_l1_loss import \
UncertainSmoothL1Loss
from mmdet3d.models.task_modules.coders.pgd_bbox_coder import PGDBBoxCoder
# model settings
model.update(
dict(
data_preprocessor=dict(
type=Det3DDataPreprocessor,
mean=[103.530, 116.280, 123.675],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
backbone=dict(frozen_stages=0),
neck=dict(start_level=0, num_outs=4),
bbox_head=dict(
num_classes=3,
bbox_code_size=7,
pred_attrs=False,
pred_velo=False,
pred_bbox2d=True,
use_onlyreg_proj=True,
strides=(4, 8, 16, 32),
regress_ranges=((-1, 64), (64, 128), (128, 256), (256, 1e8)),
group_reg_dims=(2, 1, 3, 1, 16,
4), # offset, depth, size, rot, kpts, bbox2d
reg_branch=(
(256, ), # offset
(256, ), # depth
(256, ), # size
(256, ), # rot
(256, ), # kpts
(256, ) # bbox2d
),
centerness_branch=(256, ),
loss_cls=dict(
type='mmdet.FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(
type='mmdet.SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
loss_dir=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=False,
loss_weight=1.0),
loss_centerness=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
use_depth_classifier=True,
depth_branch=(256, ),
depth_range=(0, 70),
depth_unit=10,
division='uniform',
depth_bins=8,
pred_keypoints=True,
weight_dim=1,
loss_depth=dict(
type=UncertainSmoothL1Loss,
alpha=1.0,
beta=3.0,
loss_weight=1.0),
bbox_coder=dict(
type=PGDBBoxCoder,
base_depths=((28.01, 16.32), ),
base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6), (3.9, 1.56,
1.6)),
code_size=7)),
# set weight 1.0 for base 7 dims (offset, depth, size, rot)
# 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
train_cfg=dict(code_weight=[
1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2,
0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 0.2, 1.0, 1.0, 1.0,
1.0
]),
test_cfg=dict(
nms_pre=100, nms_thr=0.05, score_thr=0.001, max_per_img=20)))
backend_args = None
train_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(
type=LoadAnnotations3D,
with_bbox=True,
with_label=True,
with_attr_label=False,
with_bbox_3d=True,
with_label_3d=True,
with_bbox_depth=True),
dict(type=Resize, scale=(1242, 375), keep_ratio=True),
dict(type=RandomFlip3D, flip_ratio_bev_horizontal=0.5),
dict(
type=Pack3DDetInputs,
keys=[
'img', 'gt_bboxes', 'gt_bboxes_labels', 'gt_bboxes_3d',
'gt_labels_3d', 'centers_2d', 'depths'
]),
]
test_pipeline = [
dict(type=LoadImageFromFileMono3D, backend_args=backend_args),
dict(type=Resize, scale_factor=1.0),
dict(type=Pack3DDetInputs, keys=['img'])
]
train_dataloader.update(
dict(batch_size=3, num_workers=3, dataset=dict(pipeline=train_pipeline)))
test_dataloader.update(dict(dataset=dict(pipeline=test_pipeline)))
val_dataloader.update(dict(dataset=dict(pipeline=test_pipeline)))
# optimizer
optim_wrapper.update(
dict(
optimizer=dict(lr=0.001),
paramwise_cfg=dict(bias_lr_mult=2., bias_decay_mult=0.),
clip_grad=dict(max_norm=35, norm_type=2)))
# learning rate
param_scheduler = [
dict(
type=LinearLR, start_factor=1.0 / 3, by_epoch=False, begin=0, end=500),
dict(
type=MultiStepLR,
begin=0,
end=48,
by_epoch=True,
milestones=[32, 44],
gamma=0.1)
]
train_cfg.update(dict(max_epochs=48, val_interval=2))
auto_scale_lr.update(dict(base_batch_size=12))
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from mmengine import read_base
with read_base():
from .._base_.datasets.scannet_3d import *
from .._base_.models.votenet import *
from .._base_.schedules.schedule_3x import *
from .._base_.default_runtime import *
from mmengine.hooks.logger_hook import LoggerHook
from mmdet3d.models.task_modules.coders.partial_bin_based_bbox_coder import \
PartialBinBasedBBoxCoder
# model settings
model.update(
dict(
bbox_head=dict(
num_classes=18,
bbox_coder=dict(
type=PartialBinBasedBBoxCoder,
num_sizes=18,
num_dir_bins=1,
with_rot=False,
mean_sizes=[[0.76966727, 0.8116021, 0.92573744],
[1.876858, 1.8425595, 1.1931566],
[0.61328, 0.6148609, 0.7182701],
[1.3955007, 1.5121545, 0.83443564],
[0.97949594, 1.0675149, 0.6329687],
[0.531663, 0.5955577, 1.7500148],
[0.9624706, 0.72462326, 1.1481868],
[0.83221924, 1.0490936, 1.6875663],
[0.21132214, 0.4206159, 0.5372846],
[1.4440073, 1.8970833, 0.26985747],
[1.0294262, 1.4040797, 0.87554324],
[1.3766412, 0.65521795, 1.6813129],
[0.6650819, 0.71111923, 1.298853],
[0.41999173, 0.37906948, 1.7513971],
[0.59359556, 0.5912492, 0.73919016],
[0.50867593, 0.50656086, 0.30136237],
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]]))))
default_hooks.update(dict(logger=dict(type=LoggerHook, interval=30)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr.update(dict(enable=False, base_batch_size=64))
# Copyright (c) OpenMMLab. All rights reserved.
from .dataset_wrappers import CBGSDataset
from .det3d_dataset import Det3DDataset
from .kitti_dataset import KittiDataset
from .lyft_dataset import LyftDataset
from .nuscenes_dataset import NuScenesDataset
# yapf: enable
from .s3dis_dataset import S3DISDataset, S3DISSegDataset
from .scannet_dataset import (ScanNetDataset, ScanNetInstanceSegDataset,
ScanNetSegDataset)
from .seg3d_dataset import Seg3DDataset
from .semantickitti_dataset import SemanticKittiDataset
from .sunrgbd_dataset import SUNRGBDDataset
# yapf: disable
from .transforms import (AffineResize, BackgroundPointsFilter, GlobalAlignment,
GlobalRotScaleTrans, IndoorPatchPointSample,
IndoorPointSample, LoadAnnotations3D,
LoadPointsFromDict, LoadPointsFromFile,
LoadPointsFromMultiSweeps, NormalizePointsColor,
ObjectNameFilter, ObjectNoise, ObjectRangeFilter,
ObjectSample, PointSample, PointShuffle,
PointsRangeFilter, RandomDropPointsColor,
RandomFlip3D, RandomJitterPoints, RandomResize3D,
RandomShiftScale, Resize3D, VoxelBasedPointSampler)
from .utils import get_loading_pipeline
from .waymo_dataset import WaymoDataset
__all__ = [
'KittiDataset', 'CBGSDataset', 'NuScenesDataset', 'LyftDataset',
'ObjectSample', 'RandomFlip3D', 'ObjectNoise', 'GlobalRotScaleTrans',
'PointShuffle', 'ObjectRangeFilter', 'PointsRangeFilter',
'LoadPointsFromFile', 'S3DISSegDataset', 'S3DISDataset',
'NormalizePointsColor', 'IndoorPatchPointSample', 'IndoorPointSample',
'PointSample', 'LoadAnnotations3D', 'GlobalAlignment', 'SUNRGBDDataset',
'ScanNetDataset', 'ScanNetSegDataset', 'ScanNetInstanceSegDataset',
'SemanticKittiDataset', 'Det3DDataset', 'Seg3DDataset',
'LoadPointsFromMultiSweeps', 'WaymoDataset', 'BackgroundPointsFilter',
'VoxelBasedPointSampler', 'get_loading_pipeline', 'RandomDropPointsColor',
'RandomJitterPoints', 'ObjectNameFilter', 'AffineResize',
'RandomShiftScale', 'LoadPointsFromDict', 'Resize3D', 'RandomResize3D',
]
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import warnings
from typing import List, Optional, Tuple, Union
import numpy as np
from nuscenes import NuScenes
from nuscenes.utils.geometry_utils import view_points
from pyquaternion import Quaternion
from shapely.geometry import MultiPoint, box
from shapely.geometry.polygon import Polygon
from mmdet3d.structures import Box3DMode, CameraInstance3DBoxes, points_cam2img
from mmdet3d.structures.ops import box_np_ops
kitti_categories = ('Pedestrian', 'Cyclist', 'Car', 'Van', 'Truck',
'Person_sitting', 'Tram', 'Misc')
waymo_categories = ('Car', 'Pedestrian', 'Cyclist')
nus_categories = ('car', 'truck', 'trailer', 'bus', 'construction_vehicle',
'bicycle', 'motorcycle', 'pedestrian', 'traffic_cone',
'barrier')
nus_attributes = ('cycle.with_rider', 'cycle.without_rider',
'pedestrian.moving', 'pedestrian.standing',
'pedestrian.sitting_lying_down', 'vehicle.moving',
'vehicle.parked', 'vehicle.stopped', 'None')
NuScenesNameMapping = {
'movable_object.barrier': 'barrier',
'vehicle.bicycle': 'bicycle',
'vehicle.bus.bendy': 'bus',
'vehicle.bus.rigid': 'bus',
'vehicle.car': 'car',
'vehicle.construction': 'construction_vehicle',
'vehicle.motorcycle': 'motorcycle',
'human.pedestrian.adult': 'pedestrian',
'human.pedestrian.child': 'pedestrian',
'human.pedestrian.construction_worker': 'pedestrian',
'human.pedestrian.police_officer': 'pedestrian',
'movable_object.trafficcone': 'traffic_cone',
'vehicle.trailer': 'trailer',
'vehicle.truck': 'truck'
}
LyftNameMapping = {
'bicycle': 'bicycle',
'bus': 'bus',
'car': 'car',
'emergency_vehicle': 'emergency_vehicle',
'motorcycle': 'motorcycle',
'other_vehicle': 'other_vehicle',
'pedestrian': 'pedestrian',
'truck': 'truck',
'animal': 'animal'
}
def get_nuscenes_2d_boxes(nusc: NuScenes, sample_data_token: str,
visibilities: List[str]) -> List[dict]:
"""Get the 2d / mono3d annotation records for a given `sample_data_token`
of nuscenes dataset.
Args:
nusc (:obj:`NuScenes`): NuScenes class.
sample_data_token (str): Sample data token belonging to a camera
keyframe.
visibilities (List[str]): Visibility filter.
Return:
List[dict]: List of 2d annotation record that belongs to the input
`sample_data_token`.
"""
# Get the sample data and the sample corresponding to that sample data.
sd_rec = nusc.get('sample_data', sample_data_token)
assert sd_rec[
'sensor_modality'] == 'camera', 'Error: get_2d_boxes only works' \
' for camera sample_data!'
if not sd_rec['is_key_frame']:
raise ValueError(
'The 2D re-projections are available only for keyframes.')
s_rec = nusc.get('sample', sd_rec['sample_token'])
# Get the calibrated sensor and ego pose
# record to get the transformation matrices.
cs_rec = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
pose_rec = nusc.get('ego_pose', sd_rec['ego_pose_token'])
camera_intrinsic = np.array(cs_rec['camera_intrinsic'])
# Get all the annotation with the specified visibilties.
ann_recs = [
nusc.get('sample_annotation', token) for token in s_rec['anns']
]
ann_recs = [
ann_rec for ann_rec in ann_recs
if (ann_rec['visibility_token'] in visibilities)
]
repro_recs = []
for ann_rec in ann_recs:
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = ann_rec['token']
ann_rec['sample_data_token'] = sample_data_token
# Get the box in global coordinates.
box = nusc.get_box(ann_rec['token'])
# Move them to the ego-pose frame.
box.translate(-np.array(pose_rec['translation']))
box.rotate(Quaternion(pose_rec['rotation']).inverse)
# Move them to the calibrated sensor frame.
box.translate(-np.array(cs_rec['translation']))
box.rotate(Quaternion(cs_rec['rotation']).inverse)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box.corners()
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(corner_coords)
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
'nuscenes')
# if repro_rec is None, we do not append it into repre_recs
if repro_rec is not None:
loc = box.center.tolist()
dim = box.wlh
dim[[0, 1, 2]] = dim[[1, 2, 0]] # convert wlh to our lhw
dim = dim.tolist()
rot = box.orientation.yaw_pitch_roll[0]
rot = [-rot] # convert the rot to our cam coordinate
global_velo2d = nusc.box_velocity(box.token)[:2]
global_velo3d = np.array([*global_velo2d, 0.0])
e2g_r_mat = Quaternion(pose_rec['rotation']).rotation_matrix
c2e_r_mat = Quaternion(cs_rec['rotation']).rotation_matrix
cam_velo3d = global_velo3d @ np.linalg.inv(
e2g_r_mat).T @ np.linalg.inv(c2e_r_mat).T
velo = cam_velo3d[0::2].tolist()
repro_rec['bbox_3d'] = loc + dim + rot
repro_rec['velocity'] = velo
center_3d = np.array(loc).reshape([1, 3])
center_2d_with_depth = points_cam2img(
center_3d, camera_intrinsic, with_depth=True)
center_2d_with_depth = center_2d_with_depth.squeeze().tolist()
repro_rec['center_2d'] = center_2d_with_depth[:2]
repro_rec['depth'] = center_2d_with_depth[2]
# normalized center2D + depth
# if samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
ann_token = nusc.get('sample_annotation',
box.token)['attribute_tokens']
if len(ann_token) == 0:
attr_name = 'None'
else:
attr_name = nusc.get('attribute', ann_token[0])['name']
attr_id = nus_attributes.index(attr_name)
# repro_rec['attribute_name'] = attr_name
repro_rec['attr_label'] = attr_id
repro_recs.append(repro_rec)
return repro_recs
def get_kitti_style_2d_boxes(info: dict,
cam_idx: int = 2,
occluded: Tuple[int] = (0, 1, 2, 3),
annos: Optional[dict] = None,
mono3d: bool = True,
dataset: str = 'kitti') -> List[dict]:
"""Get the 2d / mono3d annotation records for a given info.
This function is used to get 2D/Mono3D annotations when loading annotations
from a kitti-style dataset class, such as KITTI and Waymo dataset.
Args:
info (dict): Information of the given sample data.
cam_idx (int): Camera id which the 2d / mono3d annotations to obtain
belong to. In KITTI, typically only CAM 2 will be used,
and in Waymo, multi cameras could be used.
Defaults to 2.
occluded (Tuple[int]): Integer (0, 1, 2, 3) indicating occlusion state:
0 = fully visible, 1 = partly occluded, 2 = largely occluded,
3 = unknown, -1 = DontCare.
Defaults to (0, 1, 2, 3).
annos (dict, optional): Original annotations. Defaults to None.
mono3d (bool): Whether to get boxes with mono3d annotation.
Defaults to True.
dataset (str): Dataset name of getting 2d bboxes.
Defaults to 'kitti'.
Return:
List[dict]: List of 2d / mono3d annotation record that
belongs to the input camera id.
"""
# Get calibration information
camera_intrinsic = info['calib'][f'P{cam_idx}']
repro_recs = []
# if no annotations in info (test dataset), then return
if annos is None:
return repro_recs
# Get all the annotation with the specified visibilties.
# filter the annotation bboxes by occluded attributes
ann_dicts = annos
mask = [(ocld in occluded) for ocld in ann_dicts['occluded']]
for k in ann_dicts.keys():
ann_dicts[k] = ann_dicts[k][mask]
# convert dict of list to list of dict
ann_recs = []
for i in range(len(ann_dicts['occluded'])):
ann_rec = {}
for k in ann_dicts.keys():
ann_rec[k] = ann_dicts[k][i]
ann_recs.append(ann_rec)
for ann_idx, ann_rec in enumerate(ann_recs):
# Augment sample_annotation with token information.
ann_rec['sample_annotation_token'] = \
f"{info['image']['image_idx']}.{ann_idx}"
ann_rec['sample_data_token'] = info['image']['image_idx']
loc = ann_rec['location'][np.newaxis, :]
dim = ann_rec['dimensions'][np.newaxis, :]
rot = ann_rec['rotation_y'][np.newaxis, np.newaxis]
# transform the center from [0.5, 1.0, 0.5] to [0.5, 0.5, 0.5]
dst = np.array([0.5, 0.5, 0.5])
src = np.array([0.5, 1.0, 0.5])
# gravity center
loc_center = loc + dim * (dst - src)
gt_bbox_3d = np.concatenate([loc_center, dim, rot],
axis=1).astype(np.float32)
# Filter out the corners that are not in front of the calibrated
# sensor.
corners_3d = box_np_ops.center_to_corner_box3d(
gt_bbox_3d[:, :3],
gt_bbox_3d[:, 3:6],
gt_bbox_3d[:, 6], (0.5, 0.5, 0.5),
axis=1)
corners_3d = corners_3d[0].T # (1, 8, 3) -> (3, 8)
in_front = np.argwhere(corners_3d[2, :] > 0).flatten()
corners_3d = corners_3d[:, in_front]
# Project 3d box to 2d.
corner_coords = view_points(corners_3d, camera_intrinsic,
True).T[:, :2].tolist()
# Keep only corners that fall within the image.
final_coords = post_process_coords(
corner_coords,
imsize=(info['image']['image_shape'][1],
info['image']['image_shape'][0]))
# Skip if the convex hull of the re-projected corners
# does not intersect the image canvas.
if final_coords is None:
continue
else:
min_x, min_y, max_x, max_y = final_coords
# Generate dictionary record to be included in the .json file.
repro_rec = generate_record(ann_rec, min_x, min_y, max_x, max_y,
dataset)
# If mono3d=True, add 3D annotations in camera coordinates
if mono3d and (repro_rec is not None):
# use bottom center to represent the bbox_3d
repro_rec['bbox_3d'] = np.concatenate(
[loc, dim, rot], axis=1).astype(np.float32).squeeze().tolist()
repro_rec['velocity'] = -1 # no velocity in KITTI
center_3d = np.array(loc_center).reshape([1, 3])
center_2d_with_depth = points_cam2img(
center_3d, camera_intrinsic, with_depth=True)
center_2d_with_depth = center_2d_with_depth.squeeze().tolist()
repro_rec['center_2d'] = center_2d_with_depth[:2]
repro_rec['depth'] = center_2d_with_depth[2]
# normalized center2D + depth
# samples with depth < 0 will be removed
if repro_rec['depth'] <= 0:
continue
repro_recs.append(repro_rec)
return repro_recs
def convert_annos(info: dict, cam_idx: int) -> dict:
"""Convert front-cam anns to i-th camera (KITTI-style info)."""
rect = info['calib']['R0_rect'].astype(np.float32)
lidar2cam0 = info['calib']['Tr_velo_to_cam'].astype(np.float32)
lidar2cami = info['calib'][f'Tr_velo_to_cam{cam_idx}'].astype(np.float32)
annos = info['annos']
converted_annos = copy.deepcopy(annos)
loc = annos['location']
dims = annos['dimensions']
rots = annos['rotation_y']
gt_bboxes_3d = np.concatenate([loc, dims, rots[..., np.newaxis]],
axis=1).astype(np.float32)
# convert gt_bboxes_3d to velodyne coordinates
gt_bboxes_3d = CameraInstance3DBoxes(gt_bboxes_3d).convert_to(
Box3DMode.LIDAR, np.linalg.inv(rect @ lidar2cam0), correct_yaw=True)
# convert gt_bboxes_3d to cam coordinates
gt_bboxes_3d = gt_bboxes_3d.convert_to(
Box3DMode.CAM, rect @ lidar2cami, correct_yaw=True).numpy()
converted_annos['location'] = gt_bboxes_3d[:, :3]
converted_annos['dimensions'] = gt_bboxes_3d[:, 3:6]
converted_annos['rotation_y'] = gt_bboxes_3d[:, 6]
return converted_annos
def post_process_coords(
corner_coords: List[int], imsize: Tuple[int] = (1600, 900)
) -> Union[Tuple[float], None]:
"""Get the intersection of the convex hull of the reprojected bbox corners
and the image canvas, return None if no intersection.
Args:
corner_coords (List[int]): Corner coordinates of reprojected
bounding box.
imsize (Tuple[int]): Size of the image canvas.
Defaults to (1600, 900).
Return:
Tuple[float] or None: Intersection of the convex hull of the 2D box
corners and the image canvas.
"""
polygon_from_2d_box = MultiPoint(corner_coords).convex_hull
img_canvas = box(0, 0, imsize[0], imsize[1])
if polygon_from_2d_box.intersects(img_canvas):
img_intersection = polygon_from_2d_box.intersection(img_canvas)
if isinstance(img_intersection, Polygon):
intersection_coords = np.array(
[coord for coord in img_intersection.exterior.coords])
min_x = min(intersection_coords[:, 0])
min_y = min(intersection_coords[:, 1])
max_x = max(intersection_coords[:, 0])
max_y = max(intersection_coords[:, 1])
return min_x, min_y, max_x, max_y
else:
warnings.warn('img_intersection is not an object of Polygon.')
return None
else:
return None
def generate_record(ann_rec: dict, x1: float, y1: float, x2: float, y2: float,
dataset: str) -> Union[dict, None]:
"""Generate one 2D annotation record given various information on top of
the 2D bounding box coordinates.
Args:
ann_rec (dict): Original 3d annotation record.
x1 (float): Minimum value of the x coordinate.
y1 (float): Minimum value of the y coordinate.
x2 (float): Maximum value of the x coordinate.
y2 (float): Maximum value of the y coordinate.
dataset (str): Name of dataset.
Returns:
dict or None: A sample 2d annotation record.
- bbox_label (int): 2d box label id
- bbox_label_3d (int): 3d box label id
- bbox (List[float]): left x, top y, right x, bottom y of 2d box
- bbox_3d_isvalid (bool): whether the box is valid
"""
if dataset == 'nuscenes':
cat_name = ann_rec['category_name']
if cat_name not in NuScenesNameMapping:
return None
else:
cat_name = NuScenesNameMapping[cat_name]
categories = nus_categories
else:
if dataset == 'kitti':
categories = kitti_categories
elif dataset == 'waymo':
categories = waymo_categories
else:
raise NotImplementedError('Unsupported dataset!')
cat_name = ann_rec['name']
if cat_name not in categories:
return None
rec = dict()
rec['bbox_label'] = categories.index(cat_name)
rec['bbox_label_3d'] = rec['bbox_label']
rec['bbox'] = [x1, y1, x2, y2]
rec['bbox_3d_isvalid'] = True
return rec
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import warnings
from typing import List, Set, Union
import numpy as np
from mmengine.dataset import BaseDataset, force_full_init
from mmdet3d.registry import DATASETS
@DATASETS.register_module()
class CBGSDataset:
"""A wrapper of class sampled dataset with ann_file path. Implementation of
paper `Class-balanced Grouping and Sampling for Point Cloud 3D Object
Detection <https://arxiv.org/abs/1908.09492>`_.
Balance the number of scenes under different classes.
Args:
dataset (:obj:`BaseDataset` or dict): The dataset to be class sampled.
lazy_init (bool): Whether to load annotation during instantiation.
Defaults to False.
"""
def __init__(self,
dataset: Union[BaseDataset, dict],
lazy_init: bool = False) -> None:
self.dataset: BaseDataset
if isinstance(dataset, dict):
self.dataset = DATASETS.build(dataset)
elif isinstance(dataset, BaseDataset):
self.dataset = dataset
else:
raise TypeError(
'elements in datasets sequence should be config or '
f'`BaseDataset` instance, but got {type(dataset)}')
self._metainfo = self.dataset.metainfo
self._fully_initialized = False
if not lazy_init:
self.full_init()
@property
def metainfo(self) -> dict:
"""Get the meta information of the repeated dataset.
Returns:
dict: The meta information of repeated dataset.
"""
return copy.deepcopy(self._metainfo)
def full_init(self) -> None:
"""Loop to ``full_init`` each dataset."""
if self._fully_initialized:
return
self.dataset.full_init()
# Get sample_indices
self.sample_indices = self._get_sample_indices(self.dataset)
self._fully_initialized = True
def _get_sample_indices(self, dataset: BaseDataset) -> List[int]:
"""Load sample indices according to ann_file.
Args:
dataset (:obj:`BaseDataset`): The dataset.
Returns:
List[dict]: List of indices after class sampling.
"""
classes = self.metainfo['classes']
cat2id = {name: i for i, name in enumerate(classes)}
class_sample_idxs = {cat_id: [] for cat_id in cat2id.values()}
for idx in range(len(dataset)):
sample_cat_ids = dataset.get_cat_ids(idx)
for cat_id in sample_cat_ids:
if cat_id != -1:
# Filter categories that do not need to be cared.
# -1 indicates dontcare in MMDet3D.
class_sample_idxs[cat_id].append(idx)
duplicated_samples = sum(
[len(v) for _, v in class_sample_idxs.items()])
class_distribution = {
k: len(v) / duplicated_samples
for k, v in class_sample_idxs.items()
}
sample_indices = []
frac = 1.0 / len(classes)
ratios = [frac / v for v in class_distribution.values()]
for cls_inds, ratio in zip(list(class_sample_idxs.values()), ratios):
sample_indices += np.random.choice(cls_inds,
int(len(cls_inds) *
ratio)).tolist()
return sample_indices
@force_full_init
def _get_ori_dataset_idx(self, idx: int) -> int:
"""Convert global index to local index.
Args:
idx (int): Global index of ``CBGSDataset``.
Returns:
int: Local index of data.
"""
return self.sample_indices[idx]
@force_full_init
def get_cat_ids(self, idx: int) -> Set[int]:
"""Get category ids of class balanced dataset by index.
Args:
idx (int): Index of data.
Returns:
Set[int]: All categories in the sample of specified index.
"""
sample_idx = self._get_ori_dataset_idx(idx)
return self.dataset.get_cat_ids(sample_idx)
@force_full_init
def get_data_info(self, idx: int) -> dict:
"""Get annotation by index.
Args:
idx (int): Global index of ``CBGSDataset``.
Returns:
dict: The idx-th annotation of the dataset.
"""
sample_idx = self._get_ori_dataset_idx(idx)
return self.dataset.get_data_info(sample_idx)
def __getitem__(self, idx: int) -> dict:
"""Get item from infos according to the given index.
Args:
idx (int): The index of self.sample_indices.
Returns:
dict: Data dictionary of the corresponding index.
"""
if not self._fully_initialized:
warnings.warn('Please call `full_init` method manually to '
'accelerate the speed.')
self.full_init()
ori_index = self._get_ori_dataset_idx(idx)
return self.dataset[ori_index]
@force_full_init
def __len__(self) -> int:
"""Return the length of data infos.
Returns:
int: Length of data infos.
"""
return len(self.sample_indices)
def get_subset_(self, indices: Union[List[int], int]) -> None:
"""Not supported in ``CBGSDataset`` for the ambiguous meaning of sub-
dataset."""
raise NotImplementedError(
'`CBGSDataset` does not support `get_subset` and '
'`get_subset_` interfaces because this will lead to ambiguous '
'implementation of some methods. If you want to use `get_subset` '
'or `get_subset_` interfaces, please use them in the wrapped '
'dataset first and then use `CBGSDataset`.')
def get_subset(self, indices: Union[List[int], int]) -> BaseDataset:
"""Not supported in ``CBGSDataset`` for the ambiguous meaning of sub-
dataset."""
raise NotImplementedError(
'`CBGSDataset` does not support `get_subset` and '
'`get_subset_` interfaces because this will lead to ambiguous '
'implementation of some methods. If you want to use `get_subset` '
'or `get_subset_` interfaces, please use them in the wrapped '
'dataset first and then use `CBGSDataset`.')
# Copyright (c) OpenMMLab. All rights reserved.
import copy
import os
from os import path as osp
from typing import Callable, List, Optional, Set, Union
import numpy as np
import torch
from mmengine.dataset import BaseDataset
from mmengine.logging import print_log
from terminaltables import AsciiTable
from mmdet3d.registry import DATASETS
from mmdet3d.structures import get_box_type
@DATASETS.register_module()
class Det3DDataset(BaseDataset):
"""Base Class of 3D dataset.
This is the base dataset of SUNRGB-D, ScanNet, nuScenes, and KITTI
dataset.
# TODO: doc link here for the standard data format
Args:
data_root (str, optional): The root directory for ``data_prefix`` and
``ann_file``. Defaults to None.
ann_file (str): Annotation file path. Defaults to ''.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict): Prefix for training data. Defaults to
dict(pts='velodyne', img='').
pipeline (List[dict]): Pipeline used for data processing.
Defaults to [].
modality (dict): Modality to specify the sensor data used as input,
it usually has following keys:
- use_camera: bool
- use_lidar: bool
Defaults to dict(use_lidar=True, use_camera=False).
default_cam_key (str, optional): The default camera name adopted.
Defaults to None.
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'LiDAR' in this dataset. Available options includes:
- 'LiDAR': Box in LiDAR coordinates, usually for
outdoor point cloud 3d detection.
- 'Depth': Box in depth coordinates, usually for
indoor point cloud 3d detection.
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool): Whether to filter the data with empty GT.
If it's set to be True, the example with empty annotations after
data pipeline will be dropped and a random example will be chosen
in `__getitem__`. Defaults to True.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations in test_mode,
the annotation will be save in `eval_ann_infos`, which can be
used in Evaluator. Defaults to True.
backend_args (dict, optional): Arguments to instantiate the
corresponding backend. Defaults to None.
show_ins_var (bool): For debug purpose. Whether to show variation
of the number of instances before and after through pipeline.
Defaults to False.
"""
def __init__(self,
data_root: Optional[str] = None,
ann_file: str = '',
metainfo: Optional[dict] = None,
data_prefix: dict = dict(pts='velodyne', img=''),
pipeline: List[Union[dict, Callable]] = [],
modality: dict = dict(use_lidar=True, use_camera=False),
default_cam_key: str = None,
box_type_3d: dict = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
load_eval_anns: bool = True,
backend_args: Optional[dict] = None,
show_ins_var: bool = False,
**kwargs) -> None:
self.backend_args = backend_args
self.filter_empty_gt = filter_empty_gt
self.load_eval_anns = load_eval_anns
_default_modality_keys = ('use_lidar', 'use_camera')
if modality is None:
modality = dict()
# Defaults to False if not specify
for key in _default_modality_keys:
if key not in modality:
modality[key] = False
self.modality = modality
self.default_cam_key = default_cam_key
assert self.modality['use_lidar'] or self.modality['use_camera'], (
'Please specify the `modality` (`use_lidar` '
f', `use_camera`) for {self.__class__.__name__}')
self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)
if metainfo is not None and 'classes' in metainfo:
# we allow to train on subset of self.METAINFO['classes']
# map unselected labels to -1
self.label_mapping = {
i: -1
for i in range(len(self.METAINFO['classes']))
}
self.label_mapping[-1] = -1
for label_idx, name in enumerate(metainfo['classes']):
ori_label = self.METAINFO['classes'].index(name)
self.label_mapping[ori_label] = label_idx
self.num_ins_per_cat = [0] * len(metainfo['classes'])
else:
self.label_mapping = {
i: i
for i in range(len(self.METAINFO['classes']))
}
self.label_mapping[-1] = -1
self.num_ins_per_cat = [0] * len(self.METAINFO['classes'])
super().__init__(
ann_file=ann_file,
metainfo=metainfo,
data_root=data_root,
data_prefix=data_prefix,
pipeline=pipeline,
test_mode=test_mode,
**kwargs)
# can be accessed by other component in runner
self.metainfo['box_type_3d'] = box_type_3d
self.metainfo['label_mapping'] = self.label_mapping
if not kwargs.get('lazy_init', False):
# used for showing variation of the number of instances before and
# after through the pipeline
self.show_ins_var = show_ins_var
# show statistics of this dataset
print_log('-' * 30, 'current')
print_log(
f'The length of {"test" if self.test_mode else "training"} dataset: {len(self)}', # noqa: E501
'current')
content_show = [['category', 'number']]
for label, num in enumerate(self.num_ins_per_cat):
cat_name = self.metainfo['classes'][label]
content_show.append([cat_name, num])
table = AsciiTable(content_show)
print_log(
f'The number of instances per category in the dataset:\n{table.table}', # noqa: E501
'current')
def _remove_dontcare(self, ann_info: dict) -> dict:
"""Remove annotations that do not need to be cared.
-1 indicates dontcare in MMDet3d.
Args:
ann_info (dict): Dict of annotation infos. The
instance with label `-1` will be removed.
Returns:
dict: Annotations after filtering.
"""
img_filtered_annotations = {}
filter_mask = ann_info['gt_labels_3d'] > -1
for key in ann_info.keys():
if key != 'instances':
img_filtered_annotations[key] = (ann_info[key][filter_mask])
else:
img_filtered_annotations[key] = ann_info[key]
return img_filtered_annotations
def get_ann_info(self, index: int) -> dict:
"""Get annotation info according to the given index.
Use index to get the corresponding annotations, thus the
evalhook could use this api.
Args:
index (int): Index of the annotation data to get.
Returns:
dict: Annotation information.
"""
data_info = self.get_data_info(index)
# test model
if 'ann_info' not in data_info:
ann_info = self.parse_ann_info(data_info)
else:
ann_info = data_info['ann_info']
return ann_info
def parse_ann_info(self, info: dict) -> Union[dict, None]:
"""Process the `instances` in data info to `ann_info`.
In `Custom3DDataset`, we simply concatenate all the field
in `instances` to `np.ndarray`, you can do the specific
process in subclass. You have to convert `gt_bboxes_3d`
to different coordinates according to the task.
Args:
info (dict): Info dict.
Returns:
dict or None: Processed `ann_info`.
"""
# add s or gt prefix for most keys after concat
# we only process 3d annotations here, the corresponding
# 2d annotation process is in the `LoadAnnotations3D`
# in `transforms`
name_mapping = {
'bbox_label_3d': 'gt_labels_3d',
'bbox_label': 'gt_bboxes_labels',
'bbox': 'gt_bboxes',
'bbox_3d': 'gt_bboxes_3d',
'depth': 'depths',
'center_2d': 'centers_2d',
'attr_label': 'attr_labels',
'velocity': 'velocities',
}
instances = info['instances']
# empty gt
if len(instances) == 0:
return None
else:
keys = list(instances[0].keys())
ann_info = dict()
for ann_name in keys:
temp_anns = [item[ann_name] for item in instances]
# map the original dataset label to training label
if 'label' in ann_name and ann_name != 'attr_label':
temp_anns = [
self.label_mapping[item] for item in temp_anns
]
if ann_name in name_mapping:
mapped_ann_name = name_mapping[ann_name]
else:
mapped_ann_name = ann_name
if 'label' in ann_name:
temp_anns = np.array(temp_anns).astype(np.int64)
elif ann_name in name_mapping:
temp_anns = np.array(temp_anns).astype(np.float32)
else:
temp_anns = np.array(temp_anns)
ann_info[mapped_ann_name] = temp_anns
ann_info['instances'] = info['instances']
for label in ann_info['gt_labels_3d']:
if label != -1:
self.num_ins_per_cat[label] += 1
return ann_info
def parse_data_info(self, info: dict) -> dict:
"""Process the raw data info.
Convert all relative path of needed modality data file to
the absolute path. And process the `instances` field to
`ann_info` in training stage.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \
osp.join(
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
info['num_pts_feats'] = info['lidar_points']['num_pts_feats']
info['lidar_path'] = info['lidar_points']['lidar_path']
if 'lidar_sweeps' in info:
for sweep in info['lidar_sweeps']:
file_suffix = sweep['lidar_points']['lidar_path'].split(
os.sep)[-1]
if 'samples' in sweep['lidar_points']['lidar_path']:
sweep['lidar_points']['lidar_path'] = osp.join(
self.data_prefix['pts'], file_suffix)
else:
sweep['lidar_points']['lidar_path'] = osp.join(
self.data_prefix['sweeps'], file_suffix)
if self.modality['use_camera']:
for cam_id, img_info in info['images'].items():
if 'img_path' in img_info:
if cam_id in self.data_prefix:
cam_prefix = self.data_prefix[cam_id]
else:
cam_prefix = self.data_prefix.get('img', '')
img_info['img_path'] = osp.join(cam_prefix,
img_info['img_path'])
if self.default_cam_key is not None:
info['img_path'] = info['images'][
self.default_cam_key]['img_path']
if 'lidar2cam' in info['images'][self.default_cam_key]:
info['lidar2cam'] = np.array(
info['images'][self.default_cam_key]['lidar2cam'])
if 'cam2img' in info['images'][self.default_cam_key]:
info['cam2img'] = np.array(
info['images'][self.default_cam_key]['cam2img'])
if 'lidar2img' in info['images'][self.default_cam_key]:
info['lidar2img'] = np.array(
info['images'][self.default_cam_key]['lidar2img'])
else:
info['lidar2img'] = info['cam2img'] @ info['lidar2cam']
if not self.test_mode:
# used in training
info['ann_info'] = self.parse_ann_info(info)
if self.test_mode and self.load_eval_anns:
info['eval_ann_info'] = self.parse_ann_info(info)
return info
def _show_ins_var(self, old_labels: np.ndarray,
new_labels: torch.Tensor) -> None:
"""Show variation of the number of instances before and after through
the pipeline.
Args:
old_labels (np.ndarray): The labels before through the pipeline.
new_labels (torch.Tensor): The labels after through the pipeline.
"""
ori_num_per_cat = dict()
for label in old_labels:
if label != -1:
cat_name = self.metainfo['classes'][label]
ori_num_per_cat[cat_name] = ori_num_per_cat.get(cat_name,
0) + 1
new_num_per_cat = dict()
for label in new_labels:
if label != -1:
cat_name = self.metainfo['classes'][label]
new_num_per_cat[cat_name] = new_num_per_cat.get(cat_name,
0) + 1
content_show = [['category', 'new number', 'ori number']]
for cat_name, num in ori_num_per_cat.items():
new_num = new_num_per_cat.get(cat_name, 0)
content_show.append([cat_name, new_num, num])
table = AsciiTable(content_show)
print_log(
'The number of instances per category after and before '
f'through pipeline:\n{table.table}', 'current')
def prepare_data(self, index: int) -> Union[dict, None]:
"""Data preparation for both training and testing stage.
Called by `__getitem__` of dataset.
Args:
index (int): Index for accessing the target data.
Returns:
dict or None: Data dict of the corresponding index.
"""
ori_input_dict = self.get_data_info(index)
# deepcopy here to avoid inplace modification in pipeline.
input_dict = copy.deepcopy(ori_input_dict)
# box_type_3d (str): 3D box type.
input_dict['box_type_3d'] = self.box_type_3d
# box_mode_3d (str): 3D box mode.
input_dict['box_mode_3d'] = self.box_mode_3d
# pre-pipline return None to random another in `__getitem__`
if not self.test_mode and self.filter_empty_gt:
if len(input_dict['ann_info']['gt_labels_3d']) == 0:
return None
example = self.pipeline(input_dict)
if not self.test_mode and self.filter_empty_gt:
# after pipeline drop the example with empty annotations
# return None to random another in `__getitem__`
if example is None or len(
example['data_samples'].gt_instances_3d.labels_3d) == 0:
return None
if self.show_ins_var:
if 'ann_info' in ori_input_dict:
self._show_ins_var(
ori_input_dict['ann_info']['gt_labels_3d'],
example['data_samples'].gt_instances_3d.labels_3d)
else:
print_log(
"'ann_info' is not in the input dict. It's probably that "
'the data is not in training mode',
'current',
level=30)
return example
def get_cat_ids(self, idx: int) -> Set[int]:
"""Get category ids by index. Dataset wrapped by ClassBalancedDataset
must implement this method.
The ``CBGSDataset`` or ``ClassBalancedDataset``requires a subclass
which implements this method.
Args:
idx (int): The index of data.
Returns:
set[int]: All categories in the sample of specified index.
"""
info = self.get_data_info(idx)
gt_labels = info['ann_info']['gt_labels_3d'].tolist()
return set(gt_labels)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment