Commit c9ad3605 authored by jshilong's avatar jshilong Committed by ChaimZhu
Browse files

[Refactor]New version VoteNet

parent db44cc50
# dataset settings
dataset_type = 'ScanNetDataset'
data_root = './data/scannet/'
class_names = ('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin')
metainfo = dict(
CLASSES=('cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
'bookshelf', 'picture', 'counter', 'desk', 'curtain',
'refrigerator', 'showercurtrain', 'toilet', 'sink', 'bathtub',
'garbagebin'))
train_pipeline = [
dict(
type='LoadPointsFromFile',
......@@ -35,9 +37,8 @@ train_pipeline = [
rot_range=[-0.087266, 0.087266],
scale_ratio_range=[1.0, 1.0],
shift_height=True),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
type='Pack3DDetInputs',
keys=[
'points', 'gt_bboxes_3d', 'gt_labels_3d', 'pts_semantic_mask',
'pts_instance_mask'
......@@ -68,61 +69,51 @@ test_pipeline = [
flip_ratio_bev_horizontal=0.5,
flip_ratio_bev_vertical=0.5),
dict(type='PointSample', num_points=40000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='GlobalAlignment', rotation_axis=2),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=8,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=8,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_train.pkl',
ann_file='scannet_infos_train.pkl',
pipeline=train_pipeline,
filter_empty_gt=False,
classes=class_names,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
box_type_3d='Depth')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'),
test=dict(
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'scannet_infos_val.pkl',
ann_file='scannet_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
default_scope = 'mmdet3d'
default_hooks = dict(
optimizer=dict(type='OptimizerHook', grad_clip=None),
timer=dict(type='IterTimerHook'),
logger=dict(type='LoggerHook', interval=50),
param_scheduler=dict(type='ParamSchedulerHook'),
......
model = dict(
type='VoteNet',
data_preprocessor=dict(type='Det3DDataPreprocessor'),
backbone=dict(
type='PointNet2SASSG',
in_channels=4,
......@@ -40,10 +41,8 @@ model = dict(
normalize_xyz=True),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
type='mmdet.CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
......@@ -54,20 +53,21 @@ model = dict(
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
type='mmdet.SmoothL1Loss', reduction='sum',
loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
# model training and testing settings
train_cfg=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote'),
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mode='vote'),
test_cfg=dict(
sample_mod='seed',
sample_mode='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True))
......@@ -3,22 +3,47 @@
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
lr = 1e-4
iter_num_in_epoch = 3712
# This schedule is mainly used by models on nuScenes dataset
optimizer = dict(type='AdamW', lr=1e-4, weight_decay=0.01)
# max_norm=10 is better for SECOND
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.01),
clip_grad=dict(max_norm=35, norm_type=2))
# learning rate
param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=8 * iter_num_in_epoch,
eta_min=lr * 10,
by_epoch=False,
begin=0,
end=8 * iter_num_in_epoch),
dict(
type='CosineAnnealingLR',
T_max=12 * iter_num_in_epoch,
eta_min=lr * 1e-4,
by_epoch=False,
begin=8 * iter_num_in_epoch,
end=20 * iter_num_in_epoch),
dict(
type='CosineAnnealingBetas',
T_max=8 * iter_num_in_epoch,
eta_min=0.85 / 0.95,
by_epoch=False,
begin=0,
end=8 * iter_num_in_epoch),
dict(
type='CosineAnnealingBetas',
T_max=12 * iter_num_in_epoch,
eta_min=1,
by_epoch=False,
begin=8 * iter_num_in_epoch,
end=20 * iter_num_in_epoch)
]
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=20)
train_cfg = dict(by_epoch=True, max_epochs=20)
val_cfg = dict(interval=1)
test_cfg = dict()
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr = 0.0018
iter_num_in_epoch = 3712
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the official AdamW optimizer implemented by PyTorch.
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4,
)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4,
)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01),
clip_grad=dict(max_norm=10, norm_type=2))
# learning rate
param_scheduler = [
dict(
type='CosineAnnealingLR',
T_max=16 * iter_num_in_epoch,
eta_min=lr * 10,
by_epoch=False,
begin=0,
end=16 * iter_num_in_epoch),
dict(
type='CosineAnnealingLR',
T_max=24 * iter_num_in_epoch,
eta_min=lr * 1e-4,
by_epoch=False,
begin=16 * iter_num_in_epoch,
end=40 * iter_num_in_epoch),
dict(
type='CosineAnnealingBetas',
T_max=16 * iter_num_in_epoch,
eta_min=0.85 / 0.95,
by_epoch=False,
begin=0,
end=16 * iter_num_in_epoch),
dict(
type='CosineAnnealingBetas',
T_max=24 * iter_num_in_epoch,
eta_min=1,
by_epoch=False,
begin=16 * iter_num_in_epoch,
end=40 * iter_num_in_epoch)
]
# Runtime settings,training schedule for 40e
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner = dict(type='EpochBasedRunner', max_epochs=40)
train_cfg = dict(by_epoch=True, max_epochs=40)
val_cfg = dict(interval=1)
test_cfg = dict()
......@@ -2,8 +2,24 @@
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr = 0.008 # max learning rate
optimizer = dict(type='AdamW', lr=lr, weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(policy='step', warmup=None, step=[24, 32])
# runtime settings
runner = dict(type='EpochBasedRunner', max_epochs=36)
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='AdamW', lr=lr, weight_decay=0.01),
clip_grad=dict(max_norm=10, norm_type=2),
)
# training schedule for 1x
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=36, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# learning rate
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=36,
by_epoch=True,
milestones=[24, 32],
gamma=0.1)
]
......@@ -31,6 +31,4 @@ model = dict(
[1.1511526, 1.0546296, 0.49706793],
[0.47535285, 0.49249494, 0.5802117]])))
# yapf:disable
log_config = dict(interval=30)
# yapf:enable
default_hooks = dict(logger=dict(type='LoggerHook', interval=30))
......@@ -51,6 +51,8 @@ class Det3DDataSample(DetDataSample):
panoptic segmentation.
- ``pred_pts_panoptic_seg``(PixelData): Predicted of point cloud
panoptic segmentation.
- ``eval_ann_info``(dict): Raw annotation, which will be passed to
evaluator and do the online evaluation.
Examples:
>>> from mmengine.data import InstanceData, PixelData
......
......@@ -205,7 +205,6 @@ def indoor_eval(gt_annos,
metric,
label2cat,
logger=None,
box_type_3d=None,
box_mode_3d=None):
"""Indoor Evaluation.
......@@ -217,11 +216,11 @@ def indoor_eval(gt_annos,
includes the following keys
- labels_3d (torch.Tensor): Labels of boxes.
- boxes_3d (:obj:`BaseInstance3DBoxes`):
- bboxes_3d (:obj:`BaseInstance3DBoxes`):
3D bounding boxes in Depth coordinate.
- scores_3d (torch.Tensor): Scores of boxes.
metric (list[float]): IoU thresholds for computing average precisions.
label2cat (dict): Map from label to category.
label2cat (tuple): Map from label to category.
logger (logging.Logger | str, optional): The way to print the mAP
summary. See `mmdet.utils.print_log()` for details. Default: None.
......@@ -236,7 +235,7 @@ def indoor_eval(gt_annos,
det_anno = dt_annos[img_id]
for i in range(len(det_anno['labels_3d'])):
label = det_anno['labels_3d'].numpy()[i]
bbox = det_anno['boxes_3d'].convert_to(box_mode_3d)[i]
bbox = det_anno['bboxes_3d'].convert_to(box_mode_3d)[i]
score = det_anno['scores_3d'].numpy()[i]
if label not in pred:
pred[int(label)] = {}
......@@ -250,15 +249,9 @@ def indoor_eval(gt_annos,
# parse gt annotations
gt_anno = gt_annos[img_id]
if gt_anno['gt_num'] != 0:
gt_boxes = box_type_3d(
gt_anno['gt_boxes_upright_depth'],
box_dim=gt_anno['gt_boxes_upright_depth'].shape[-1],
origin=(0.5, 0.5, 0.5)).convert_to(box_mode_3d)
labels_3d = gt_anno['class']
else:
gt_boxes = box_type_3d(np.array([], dtype=np.float32))
labels_3d = np.array([], dtype=np.int64)
gt_boxes = gt_anno['gt_bboxes_3d']
labels_3d = gt_anno['gt_labels_3d']
for i in range(len(labels_3d)):
label = labels_3d[i]
......
......@@ -51,7 +51,7 @@ def merge_aug_bboxes_3d(aug_results, aug_batch_input_metas, test_cfg):
aug_labels = torch.cat(recovered_labels, dim=0)
# TODO: use a more elegent way to deal with nms
if test_cfg.use_rotate_nms:
if test_cfg.get('use_rotate_nms', False):
nms_func = nms_bev
else:
nms_func = nms_normal_bev
......@@ -83,7 +83,7 @@ def merge_aug_bboxes_3d(aug_results, aug_batch_input_metas, test_cfg):
merged_labels = torch.cat(merged_labels, dim=0)
_, order = merged_scores.sort(0, descending=True)
num = min(test_cfg.max_num, len(aug_bboxes))
num = min(test_cfg.get('max_num', 500), len(aug_bboxes))
order = order[:num]
merged_bboxes = merged_bboxes[order]
......
......@@ -47,10 +47,15 @@ class Det3DDataset(BaseDataset):
- 'Camera': Box in camera coordinates, usually
for vision-based 3d detection.
filter_empty_gt (bool, optional): Whether to filter the data with
filter_empty_gt (bool): Whether to filter the data with
empty GT. Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
load_eval_anns (bool): Whether to load annotations
in test_mode, the annotation will be save in
`eval_ann_infos`, which can be use in Evaluator.
file_client_args (dict): Configuration of file client.
Defaults to `dict(backend='disk')`.
"""
def __init__(self,
......@@ -63,11 +68,13 @@ class Det3DDataset(BaseDataset):
box_type_3d: dict = 'LiDAR',
filter_empty_gt: bool = True,
test_mode: bool = False,
load_eval_anns=True,
file_client_args: dict = dict(backend='disk'),
**kwargs):
# init file client
self.file_client = mmcv.FileClient(**file_client_args)
self.filter_empty_gt = filter_empty_gt
self.load_eval_anns = load_eval_anns
_default_modality_keys = ('use_lidar', 'use_camera')
if modality is None:
modality = dict()
......@@ -82,7 +89,6 @@ class Det3DDataset(BaseDataset):
f', `use_camera`) for {self.__class__.__name__}')
self.box_type_3d, self.box_mode_3d = get_box_type(box_type_3d)
if metainfo is not None and 'CLASSES' in metainfo:
# we allow to train on subset of self.METAINFO['CLASSES']
# map unselected labels to -1
......@@ -101,6 +107,10 @@ class Det3DDataset(BaseDataset):
}
self.label_mapping[-1] = -1
# can be accessed by other component in runner
metainfo['box_type_3d'] = box_type_3d
metainfo['label_mapping'] = self.label_mapping
super().__init__(
ann_file=ann_file,
metainfo=metainfo,
......@@ -221,7 +231,10 @@ class Det3DDataset(BaseDataset):
self.data_prefix.get('img', ''), img_info['img_path'])
if not self.test_mode:
# used in traing
info['ann_info'] = self.parse_ann_info(info)
if self.test_mode and self.load_eval_anns:
info['eval_ann_info'] = self.parse_ann_info(info)
return info
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
import numpy as np
from mmcv import BaseTransform
from mmcv.transforms import to_tensor
......@@ -45,14 +47,16 @@ class Pack3DDetInputs(BaseTransform):
key = key[3:]
return key
def transform(self, results: dict) -> dict:
"""Method to pack the input data.
def transform(self, results: Union[dict,
List[dict]]) -> Union[dict, List[dict]]:
"""Method to pack the input data. when the value in this dict is a
list, it usually is in Augmentations Testing.
Args:
results (dict): Result dict from the data pipeline.
results (dict | list[dict]): Result dict from the data pipeline.
Returns:
dict:
dict | List[dict]:
- 'inputs' (dict): The forward data of models. It usually contains
following keys:
......@@ -63,12 +67,41 @@ class Pack3DDetInputs(BaseTransform):
- 'data_sample' (obj:`Det3DDataSample`): The annotation info of the
sample.
"""
packed_results = dict()
# augtest
if isinstance(results, list):
pack_results = []
for single_result in results:
pack_results.append(self.pack_single_results(single_result))
return pack_results
# norm training and simple testing
elif isinstance(results, dict):
return self.pack_single_results(results)
else:
raise NotImplementedError
def pack_single_results(self, results):
"""Method to pack the single input data. when the value in this dict is
a list, it usually is in Augmentations Testing.
Args:
results (dict): Result dict from the data pipeline.
Returns:
dict: A dict contains
- 'inputs' (dict): The forward data of models. It usually contains
following keys:
- points
- img
- 'data_sample' (obj:`Det3DDataSample`): The annotation info of the
sample.
"""
# Format 3D data
if 'points' in results:
assert isinstance(results['points'], BasePoints)
results['points'] = results['points'].tensor
if isinstance(results['points'], BasePoints):
results['points'] = results['points'].tensor
if 'img' in results:
if isinstance(results['img'], list):
......@@ -134,6 +167,12 @@ class Pack3DDetInputs(BaseTransform):
data_sample.gt_instances_3d = gt_instances_3d
data_sample.gt_instances = gt_instances
data_sample.seg_data = seg_data
if 'eval_ann_info' in results:
data_sample.eval_ann_info = results['eval_ann_info']
else:
data_sample.eval_ann_info = None
packed_results = dict()
packed_results['data_sample'] = data_sample
packed_results['inputs'] = inputs
......
......@@ -684,6 +684,9 @@ class LoadAnnotations3D(LoadAnnotations):
pts_instance_mask_path, dtype=np.int64)
results['pts_instance_mask'] = pts_instance_mask
# 'eval_ann_info' will be passed to evaluator
if 'eval_ann_info' in results:
results['eval_ann_info']['pts_instance_mask'] = pts_instance_mask
return results
def _load_semantic_seg_3d(self, results: dict) -> dict:
......@@ -710,6 +713,9 @@ class LoadAnnotations3D(LoadAnnotations):
pts_semantic_mask_path, dtype=np.int64)
results['pts_semantic_mask'] = pts_semantic_mask
# 'eval_ann_info' will be passed to evaluator
if 'eval_ann_info' in results:
results['eval_ann_info']['pts_semantic_mask'] = pts_semantic_mask
return results
def transform(self, results: dict) -> dict:
......
# Copyright (c) OpenMMLab. All rights reserved.
import warnings
from copy import deepcopy
from typing import Dict, List, Optional, Tuple, Union
import mmcv
from mmcv import BaseTransform
from mmengine.dataset import Compose
from mmdet3d.registry import TRANSFORMS
from .compose import Compose
@TRANSFORMS.register_module()
class MultiScaleFlipAug3D(object):
class MultiScaleFlipAug3D(BaseTransform):
"""Test-time augmentation with multiple scales and flipping.
Args:
......@@ -33,13 +35,13 @@ class MultiScaleFlipAug3D(object):
"""
def __init__(self,
transforms,
img_scale,
pts_scale_ratio,
flip=False,
flip_direction='horizontal',
pcd_horizontal_flip=False,
pcd_vertical_flip=False):
transforms: List[dict],
img_scale: Optional[Union[Tuple[int], List[Tuple[int]]]],
pts_scale_ratio: Union[float, List[float]],
flip: bool = False,
flip_direction: str = 'horizontal',
pcd_horizontal_flip: bool = False,
pcd_vertical_flip: bool = False) -> None:
self.transforms = Compose(transforms)
self.img_scale = img_scale if isinstance(img_scale,
list) else [img_scale]
......@@ -65,17 +67,17 @@ class MultiScaleFlipAug3D(object):
warnings.warn(
'flip has no effect when RandomFlip is not in transforms')
def __call__(self, results):
def transform(self, results: Dict) -> List[Dict]:
"""Call function to augment common fields in results.
Args:
results (dict): Result dict contains the data to augment.
Returns:
dict: The result dict contains the data that is augmented with
List[dict]: The list contains the data that is augmented with
different scales and flips.
"""
aug_data = []
aug_data_list = []
# modified from `flip_aug = [False, True] if self.flip else [False]`
# to reduce unnecessary scenes when using double flip augmentation
......@@ -104,13 +106,9 @@ class MultiScaleFlipAug3D(object):
_results['pcd_vertical_flip'] = \
pcd_vertical_flip
data = self.transforms(_results)
aug_data.append(data)
# list of dict to dict of list
aug_data_dict = {key: [] for key in aug_data[0]}
for data in aug_data:
for key, val in data.items():
aug_data_dict[key].append(val)
return aug_data_dict
aug_data_list.append(data)
return aug_data_list
def __repr__(self):
"""str: Return a string that describes the module."""
......
# Copyright (c) OpenMMLab. All rights reserved.
from .indoor_metric import IndoorMetric # noqa: F401,F403
from .kitti_metric import KittiMetric # noqa: F401,F403
__all_ = ['KittiMetric']
__all_ = ['KittiMetric', 'IndoorMetric']
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Sequence
from mmengine.evaluator import BaseMetric
from mmengine.logging import MMLogger
from mmdet3d.core import get_box_type, indoor_eval
from mmdet3d.registry import METRICS
@METRICS.register_module()
class IndoorMetric(BaseMetric):
"""Kitti evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
metric. Defaults to [0.25, 0.5].
collect_device (str, optional): Device name used for collecting
results from different ranks during distributed training.
Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
prefix (str): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
"""
def __init__(self,
iou_thr: List[float] = [0.25, 0.5],
collect_device: str = 'cpu',
prefix: Optional[str] = None,
**kwargs):
super(IndoorMetric, self).__init__(
prefix=prefix, collect_device=collect_device)
self.iou_thr = iou_thr
def process(self, data_batch: Sequence[dict],
predictions: Sequence[dict]) -> None:
"""Process one batch of data samples and predictions.
The processed results should be stored in ``self.results``,
which will be used to compute the metrics when all batches
have been processed.
Args:
data_batch (Sequence[dict]): A batch of data
from the dataloader.
predictions (Sequence[dict]): A batch of outputs from
the model.
"""
batch_eval_anns = [
item['data_sample']['eval_ann_info'] for item in data_batch
]
for eval_ann, pred_dict in zip(batch_eval_anns, predictions):
pred_3d = pred_dict['pred_instances_3d']
cpu_pred_3d = dict()
for k, v in pred_3d.items():
if hasattr(v, 'to'):
cpu_pred_3d[k] = v.to('cpu')
else:
cpu_pred_3d[k] = v
self.results.append((eval_ann, cpu_pred_3d))
def compute_metrics(self, results: list) -> Dict[str, float]:
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
the metrics, and the values are corresponding results.
"""
logger: MMLogger = MMLogger.get_current_instance()
ann_infos = []
pred_results = []
for eval_ann, sinlge_pred_results in results:
ann_infos.append(eval_ann)
pred_results.append(sinlge_pred_results)
box_type_3d, box_mode_3d = get_box_type(
self.dataset_meta['box_type_3d'])
ret_dict = indoor_eval(
ann_infos,
pred_results,
self.iou_thr,
self.dataset_meta['CLASSES'],
logger=logger,
box_mode_3d=box_mode_3d)
return ret_dict
# Copyright (c) OpenMMLab. All rights reserved.
from numbers import Number
from typing import Dict, List, Optional, Sequence, Tuple, Union
from typing import List, Optional, Sequence, Tuple, Union
import numpy as np
from mmengine.data import BaseDataElement
......@@ -66,19 +66,41 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
batch_augments=batch_augments)
def forward(self,
data: Sequence[dict],
training: bool = False) -> Tuple[Dict, Optional[list]]:
data: List[Union[dict, List[dict]]],
training: bool = False
) -> Tuple[Union[dict, List[dict]], Optional[list]]:
"""Perform normalization、padding and bgr2rgb conversion based on
``BaseDataPreprocessor``.
Args:
data (Sequence[dict]): data sampled from dataloader.
data (List[dict] | List[List[dict]]): data from dataloader.
The outer list always represent the batch size, when it is
a list[list[dict]], the inter list indicate test time
augmentation.
training (bool): Whether to enable training time augmentation.
Returns:
Tuple[Dict, Optional[list]]: Data in the same format as the
model input.
Tuple[Dict, Optional[list]] |
Tuple[List[Dict], Optional[list[list]]]:
Data in the same format as the model input.
"""
if isinstance(data[0], list):
num_augs = len(data[0])
aug_batch_data = []
aug_batch_data_sample = []
for aug_id in range(num_augs):
single_aug_batch_data, \
single_aug_batch_data_sample = self.simple_process(
[item[aug_id] for item in data], training)
aug_batch_data.append(single_aug_batch_data)
aug_batch_data_sample.append(single_aug_batch_data_sample)
return aug_batch_data, aug_batch_data_sample
else:
return self.simple_process(data, training)
def simple_process(self, data: Sequence[dict], training: bool = False):
inputs_dict, batch_data_samples = self.collate_data(data)
if 'points' in inputs_dict[0].keys():
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional, Union
import numpy as np
import torch
from mmcv.ops import furthest_point_sample
from mmcv.runner import BaseModule, force_fp32
from mmengine import ConfigDict, InstanceData
from torch.nn import functional as F
from mmdet3d.core.post_processing import aligned_3d_nms
from mmdet3d.models.builder import build_loss
from mmdet3d.models.losses import chamfer_distance
from mmdet3d.models.model_utils import VoteModule
from mmdet3d.ops import build_sa_module
from mmdet3d.registry import MODELS
from mmdet.core import build_bbox_coder, multi_apply
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet.core.utils import multi_apply
from ...core import Det3DDataSample
from .base_conv_bbox_head import BaseConvBboxHead
......@@ -21,66 +24,76 @@ class VoteHead(BaseModule):
Args:
num_classes (int): The number of class.
bbox_coder (:obj:`BaseBBoxCoder`): Bbox coder for encoding and
decoding boxes.
train_cfg (dict): Config for training.
test_cfg (dict): Config for testing.
vote_module_cfg (dict): Config of VoteModule for point-wise votes.
vote_aggregation_cfg (dict): Config of vote aggregation layer.
pred_layer_cfg (dict): Config of classfication and regression
prediction layers.
conv_cfg (dict): Config of convolution in prediction layer.
norm_cfg (dict): Config of BN in prediction layer.
objectness_loss (dict): Config of objectness loss.
center_loss (dict): Config of center loss.
dir_class_loss (dict): Config of direction classification loss.
dir_res_loss (dict): Config of direction residual regression loss.
size_class_loss (dict): Config of size classification loss.
size_res_loss (dict): Config of size residual regression loss.
semantic_loss (dict): Config of point-wise semantic segmentation loss.
bbox_coder (ConfigDict, dict): Bbox coder for encoding and
decoding boxes. Defaults to None.
train_cfg (dict, optional): Config for training. Defaults to None.
test_cfg (dict, optional): Config for testing. Defaults to None.
vote_module_cfg (dict, optional): Config of VoteModule for
point-wise votes. Defaults to None.
vote_aggregation_cfg (dict, optional): Config of vote
aggregation layer. Defaults to None.
pred_layer_cfg (dict, optional): Config of classification
and regression prediction layers. Defaults to None.
objectness_loss (dict, optional): Config of objectness loss.
Defaults to None.
center_loss (dict, optional): Config of center loss.
Defaults to None.
dir_class_loss (dict, optional): Config of direction
classification loss. Defaults to None.
dir_res_loss (dict, optional): Config of direction
residual regression loss. Defaults to None.
size_class_loss (dict, optional): Config of size
classification loss. Defaults to None.
size_res_loss (dict, optional): Config of size
residual regression loss. Defaults to None.
semantic_loss (dict, optional): Config of point-wise
semantic segmentation loss. Defaults to None.
iou_loss (dict, optional): Config of IOU loss for
regression. Defaults to None.
init_cfg (dict, optional): Config of model weight
initialization. Defaults to None.
"""
def __init__(self,
num_classes,
bbox_coder,
train_cfg=None,
test_cfg=None,
vote_module_cfg=None,
vote_aggregation_cfg=None,
pred_layer_cfg=None,
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=None,
center_loss=None,
dir_class_loss=None,
dir_res_loss=None,
size_class_loss=None,
size_res_loss=None,
semantic_loss=None,
iou_loss=None,
init_cfg=None):
num_classes: int,
bbox_coder: Union[ConfigDict, dict],
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
vote_module_cfg: Optional[dict] = None,
vote_aggregation_cfg: Optional[dict] = None,
pred_layer_cfg: Optional[dict] = None,
objectness_loss: Optional[dict] = None,
center_loss: Optional[dict] = None,
dir_class_loss: Optional[dict] = None,
dir_res_loss: Optional[dict] = None,
size_class_loss: Optional[dict] = None,
size_res_loss: Optional[dict] = None,
semantic_loss: Optional[dict] = None,
iou_loss: Optional[dict] = None,
init_cfg: Optional[dict] = None):
super(VoteHead, self).__init__(init_cfg=init_cfg)
self.num_classes = num_classes
self.train_cfg = train_cfg
self.test_cfg = test_cfg
self.gt_per_seed = vote_module_cfg['gt_per_seed']
self.num_proposal = vote_aggregation_cfg['num_point']
self.objectness_loss = build_loss(objectness_loss)
self.center_loss = build_loss(center_loss)
self.dir_res_loss = build_loss(dir_res_loss)
self.dir_class_loss = build_loss(dir_class_loss)
self.size_res_loss = build_loss(size_res_loss)
self.loss_objectness = MODELS.build(objectness_loss)
self.loss_center = MODELS.build(center_loss)
self.loss_dir_res = MODELS.build(dir_res_loss)
self.loss_dir_class = MODELS.build(dir_class_loss)
self.loss_size_res = MODELS.build(size_res_loss)
if size_class_loss is not None:
self.size_class_loss = build_loss(size_class_loss)
self.size_class_loss = MODELS.build(size_class_loss)
if semantic_loss is not None:
self.semantic_loss = build_loss(semantic_loss)
self.semantic_loss = MODELS.build(semantic_loss)
if iou_loss is not None:
self.iou_loss = build_loss(iou_loss)
self.iou_loss = MODELS.build(iou_loss)
else:
self.iou_loss = None
self.bbox_coder = build_bbox_coder(bbox_coder)
self.bbox_coder = TASK_UTILS.build(bbox_coder)
self.num_sizes = self.bbox_coder.num_sizes
self.num_dir_bins = self.bbox_coder.num_dir_bins
......@@ -94,6 +107,15 @@ class VoteHead(BaseModule):
num_cls_out_channels=self._get_cls_out_channels(),
num_reg_out_channels=self._get_reg_out_channels())
@property
def sample_mode(self):
if self.training:
sample_mode = self.train_cfg.sample_mode
else:
sample_mode = self.test_cfg.sample_mode
assert sample_mode in ['vote', 'seed', 'random', 'spec']
return sample_mode
def _get_cls_out_channels(self):
"""Return the channel number of classification outputs."""
# Class numbers (k) + objectness (2)
......@@ -106,16 +128,18 @@ class VoteHead(BaseModule):
# size class+residual(num_sizes*4)
return 3 + self.num_dir_bins * 2 + self.num_sizes * 4
def _extract_input(self, feat_dict):
def _extract_input(self, feat_dict: dict) -> tuple:
"""Extract inputs from features dictionary.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
torch.Tensor: Coordinates of input points.
torch.Tensor: Features of input points.
torch.Tensor: Indices of input points.
tuple[Tensor]: Arrage as following three tensor.
- Coordinates of input points.
- Features of input points.
- Indices of input points.
"""
# for imvotenet
......@@ -133,7 +157,77 @@ class VoteHead(BaseModule):
return seed_points, seed_features, seed_indices
def forward(self, feat_dict, sample_mod):
def predict(self,
points: List[torch.Tensor],
feats_dict: Dict[str, torch.Tensor],
batch_data_samples: List[Det3DDataSample],
rescale=True,
**kwargs) -> List[InstanceData]:
"""
Args:
points (list[tensor]): Point clouds of multiple samples.
feats_dict (dict): Features from FPN or backbone..
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
rescale (bool): Whether rescale the resutls to
the original scale.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict = self(feats_dict)
batch_size = len(batch_data_samples)
batch_input_metas = []
for batch_index in range(batch_size):
metainfo = batch_data_samples[batch_index].metainfo
batch_input_metas.append(metainfo)
results_list = self.predict_by_feat(
points, preds_dict, batch_input_metas, rescale=rescale, **kwargs)
return results_list
def loss(self, points: List[torch.Tensor], feats_dict: Dict[str,
torch.Tensor],
batch_data_samples: List[Det3DDataSample], **kwargs) -> dict:
"""
Args:
points (list[tensor]): Points cloud of multiple samples.
feats_dict (dict): Predictions from backbone or FPN.
batch_data_samples (list[:obj:`Det3DDataSample`]): Each item
contains the meta information of each sample and
corresponding annotations.
Returns:
dict: A dictionary of loss components.
"""
preds_dict = self.forward(feats_dict)
batch_gt_instance_3d = []
batch_gt_instances_ignore = []
batch_input_metas = []
batch_pts_semantic_mask = []
batch_pts_instance_mask = []
for data_sample in batch_data_samples:
batch_input_metas.append(data_sample.metainfo)
batch_gt_instance_3d.append(data_sample.gt_instances_3d)
batch_gt_instances_ignore.append(
data_sample.get('ignored_instances', None))
batch_pts_semantic_mask.append(
data_sample.seg_data.get('pts_semantic_mask', None))
batch_pts_instance_mask.append(
data_sample.seg_data.get('pts_instance_mask', None))
loss_inputs = (points, preds_dict, batch_gt_instance_3d)
losses = self.loss_by_feat(
*loss_inputs,
batch_pts_semantic_mask=batch_pts_semantic_mask,
batch_pts_instance_mask=batch_pts_instance_mask,
batch_input_metas=batch_input_metas,
batch_gt_instances_ignore=batch_gt_instances_ignore)
return losses
def forward(self, feat_dict: dict) -> dict:
"""Forward pass.
Note:
......@@ -146,13 +240,10 @@ class VoteHead(BaseModule):
Args:
feat_dict (dict): Feature dict from backbone.
sample_mod (str): Sample mode for vote aggregation layer.
valid modes are "vote", "seed", "random" and "spec".
Returns:
dict: Predictions of vote head.
"""
assert sample_mod in ['vote', 'seed', 'random', 'spec']
seed_points, seed_features, seed_indices = self._extract_input(
feat_dict)
......@@ -168,11 +259,11 @@ class VoteHead(BaseModule):
vote_offset=vote_offset)
# 2. aggregate vote_points
if sample_mod == 'vote':
if self.sample_mode == 'vote':
# use fps in vote_aggregation
aggregation_inputs = dict(
points_xyz=vote_points, features=vote_features)
elif sample_mod == 'seed':
elif self.sample_mode == 'seed':
# FPS on seed and choose the votes corresponding to the seeds
sample_indices = furthest_point_sample(seed_points,
self.num_proposal)
......@@ -180,7 +271,7 @@ class VoteHead(BaseModule):
points_xyz=vote_points,
features=vote_features,
indices=sample_indices)
elif sample_mod == 'random':
elif self.sample_mode == 'random':
# Random sampling from the votes
batch_size, num_seed = seed_points.shape[:2]
sample_indices = seed_points.new_tensor(
......@@ -190,7 +281,7 @@ class VoteHead(BaseModule):
points_xyz=vote_points,
features=vote_features,
indices=sample_indices)
elif sample_mod == 'spec':
elif self.sample_mode == 'spec':
# Specify the new center in vote_aggregation
aggregation_inputs = dict(
points_xyz=seed_points,
......@@ -198,7 +289,7 @@ class VoteHead(BaseModule):
target_xyz=vote_points)
else:
raise NotImplementedError(
f'Sample mode {sample_mod} is not supported!')
f'Sample mode {self.sample_mode} is not supported!')
vote_aggregation_ret = self.vote_aggregation(**aggregation_inputs)
aggregated_points, features, aggregated_indices = vote_aggregation_ret
......@@ -214,45 +305,42 @@ class VoteHead(BaseModule):
decode_res = self.bbox_coder.split_pred(cls_predictions,
reg_predictions,
aggregated_points)
results.update(decode_res)
return results
@force_fp32(apply_to=('bbox_preds', ))
def loss(self,
bbox_preds,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
img_metas=None,
gt_bboxes_ignore=None,
ret_target=False):
def loss_by_feat(
self,
points: List[torch.Tensor],
bbox_preds_dict: dict,
batch_gt_instances_3d: List[InstanceData],
batch_pts_semantic_mask: Optional[List[torch.Tensor]] = None,
batch_pts_instance_mask: Optional[List[torch.Tensor]] = None,
ret_target: bool = False,
**kwargs) -> dict:
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
ret_target (Bool): Return targets or not.
bbox_preds_dict (dict): Predictions from forward of vote head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic mask
of points cloud. Defaults to None.
batch_pts_semantic_mask (list[tensor]): Instance mask
of points cloud. Defaults to None.
batch_input_metas (list[dict]): Contain pcd and img's meta info.
ret_target (bool): Return targets or not.
Returns:
dict: Losses of Votenet.
"""
targets = self.get_targets(points, gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask,
bbox_preds)
targets = self.get_targets(points, bbox_preds_dict,
batch_gt_instances_3d,
batch_pts_semantic_mask,
batch_pts_instance_mask)
(vote_targets, vote_target_masks, size_class_targets, size_res_targets,
dir_class_targets, dir_res_targets, center_targets,
assigned_center_targets, mask_targets, valid_gt_masks,
......@@ -260,28 +348,28 @@ class VoteHead(BaseModule):
valid_gt_weights) = targets
# calculate vote loss
vote_loss = self.vote_module.get_loss(bbox_preds['seed_points'],
bbox_preds['vote_points'],
bbox_preds['seed_indices'],
vote_loss = self.vote_module.get_loss(bbox_preds_dict['seed_points'],
bbox_preds_dict['vote_points'],
bbox_preds_dict['seed_indices'],
vote_target_masks, vote_targets)
# calculate objectness loss
objectness_loss = self.objectness_loss(
bbox_preds['obj_scores'].transpose(2, 1),
objectness_loss = self.loss_objectness(
bbox_preds_dict['obj_scores'].transpose(2, 1),
objectness_targets,
weight=objectness_weights)
# calculate center loss
source2target_loss, target2source_loss = self.center_loss(
bbox_preds['center'],
source2target_loss, target2source_loss = self.loss_center(
bbox_preds_dict['center'],
center_targets,
src_weight=box_loss_weights,
dst_weight=valid_gt_weights)
center_loss = source2target_loss + target2source_loss
# calculate direction class loss
dir_class_loss = self.dir_class_loss(
bbox_preds['dir_class'].transpose(2, 1),
dir_class_loss = self.loss_dir_class(
bbox_preds_dict['dir_class'].transpose(2, 1),
dir_class_targets,
weight=box_loss_weights)
......@@ -291,13 +379,13 @@ class VoteHead(BaseModule):
(batch_size, proposal_num, self.num_dir_bins))
heading_label_one_hot.scatter_(2, dir_class_targets.unsqueeze(-1), 1)
dir_res_norm = torch.sum(
bbox_preds['dir_res_norm'] * heading_label_one_hot, -1)
dir_res_loss = self.dir_res_loss(
bbox_preds_dict['dir_res_norm'] * heading_label_one_hot, -1)
dir_res_loss = self.loss_dir_res(
dir_res_norm, dir_res_targets, weight=box_loss_weights)
# calculate size class loss
size_class_loss = self.size_class_loss(
bbox_preds['size_class'].transpose(2, 1),
bbox_preds_dict['size_class'].transpose(2, 1),
size_class_targets,
weight=box_loss_weights)
......@@ -308,17 +396,17 @@ class VoteHead(BaseModule):
one_hot_size_targets_expand = one_hot_size_targets.unsqueeze(
-1).repeat(1, 1, 1, 3).contiguous()
size_residual_norm = torch.sum(
bbox_preds['size_res_norm'] * one_hot_size_targets_expand, 2)
bbox_preds_dict['size_res_norm'] * one_hot_size_targets_expand, 2)
box_loss_weights_expand = box_loss_weights.unsqueeze(-1).repeat(
1, 1, 3)
size_res_loss = self.size_res_loss(
size_res_loss = self.loss_size_res(
size_residual_norm,
size_res_targets,
weight=box_loss_weights_expand)
# calculate semantic loss
semantic_loss = self.semantic_loss(
bbox_preds['sem_scores'].transpose(2, 1),
bbox_preds_dict['sem_scores'].transpose(2, 1),
mask_targets,
weight=box_loss_weights)
......@@ -334,7 +422,7 @@ class VoteHead(BaseModule):
if self.iou_loss:
corners_pred = self.bbox_coder.decode_corners(
bbox_preds['center'], size_residual_norm,
bbox_preds_dict['center'], size_residual_norm,
one_hot_size_targets_expand)
corners_target = self.bbox_coder.decode_corners(
assigned_center_targets, size_res_targets,
......@@ -348,25 +436,26 @@ class VoteHead(BaseModule):
return losses
def get_targets(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
bbox_preds=None):
def get_targets(
self,
points,
bbox_preds: dict = None,
batch_gt_instances_3d: List[InstanceData] = None,
batch_pts_semantic_mask: List[torch.Tensor] = None,
batch_pts_instance_mask: List[torch.Tensor] = None,
):
"""Generate targets of vote head.
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes`` and ``labels``
attributes.
batch_pts_semantic_mask (list[tensor]): Semantic gt mask for
multiple images.
batch_pts_instance_mask (list[tensor]): Instance gt mask for
multiple images.
Returns:
tuple[torch.Tensor]: Targets of vote head.
......@@ -374,40 +463,46 @@ class VoteHead(BaseModule):
# find empty example
valid_gt_masks = list()
gt_num = list()
for index in range(len(gt_labels_3d)):
if len(gt_labels_3d[index]) == 0:
fake_box = gt_bboxes_3d[index].tensor.new_zeros(
1, gt_bboxes_3d[index].tensor.shape[-1])
gt_bboxes_3d[index] = gt_bboxes_3d[index].new_box(fake_box)
gt_labels_3d[index] = gt_labels_3d[index].new_zeros(1)
valid_gt_masks.append(gt_labels_3d[index].new_zeros(1))
batch_gt_labels_3d = [
gt_instances_3d.labels_3d
for gt_instances_3d in batch_gt_instances_3d
]
batch_gt_bboxes_3d = [
gt_instances_3d.bboxes_3d
for gt_instances_3d in batch_gt_instances_3d
]
for index in range(len(batch_gt_labels_3d)):
if len(batch_gt_labels_3d[index]) == 0:
fake_box = batch_gt_bboxes_3d[index].tensor.new_zeros(
1, batch_gt_bboxes_3d[index].tensor.shape[-1])
batch_gt_bboxes_3d[index] = batch_gt_bboxes_3d[index].new_box(
fake_box)
batch_gt_labels_3d[index] = batch_gt_labels_3d[
index].new_zeros(1)
valid_gt_masks.append(batch_gt_labels_3d[index].new_zeros(1))
gt_num.append(1)
else:
valid_gt_masks.append(gt_labels_3d[index].new_ones(
gt_labels_3d[index].shape))
gt_num.append(gt_labels_3d[index].shape[0])
valid_gt_masks.append(batch_gt_labels_3d[index].new_ones(
batch_gt_labels_3d[index].shape))
gt_num.append(batch_gt_labels_3d[index].shape[0])
max_gt_num = max(gt_num)
if pts_semantic_mask is None:
pts_semantic_mask = [None for i in range(len(gt_labels_3d))]
pts_instance_mask = [None for i in range(len(gt_labels_3d))]
aggregated_points = [
bbox_preds['aggregated_points'][i]
for i in range(len(gt_labels_3d))
for i in range(len(batch_gt_labels_3d))
]
(vote_targets, vote_target_masks, size_class_targets, size_res_targets,
dir_class_targets, dir_res_targets, center_targets,
assigned_center_targets, mask_targets, objectness_targets,
objectness_masks) = multi_apply(self.get_targets_single, points,
gt_bboxes_3d, gt_labels_3d,
pts_semantic_mask, pts_instance_mask,
aggregated_points)
assigned_center_targets, mask_targets,
objectness_targets, objectness_masks) = multi_apply(
self._get_targets_single, points, batch_gt_bboxes_3d,
batch_gt_labels_3d, batch_pts_semantic_mask,
batch_pts_instance_mask, aggregated_points)
# pad targets as original code of votenet.
for index in range(len(gt_labels_3d)):
pad_num = max_gt_num - gt_labels_3d[index].shape[0]
for index in range(len(batch_gt_labels_3d)):
pad_num = max_gt_num - batch_gt_labels_3d[index].shape[0]
center_targets[index] = F.pad(center_targets[index],
(0, 0, 0, pad_num))
valid_gt_masks[index] = F.pad(valid_gt_masks[index], (0, pad_num))
......@@ -437,13 +532,13 @@ class VoteHead(BaseModule):
valid_gt_masks, objectness_targets, objectness_weights,
box_loss_weights, valid_gt_weights)
def get_targets_single(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
aggregated_points=None):
def _get_targets_single(self,
points,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
aggregated_points=None):
"""Generate targets of vote head for single batch.
Args:
......@@ -501,7 +596,6 @@ class VoteHead(BaseModule):
vote_targets = points.new_zeros([num_points, 3])
vote_target_masks = points.new_zeros([num_points],
dtype=torch.long)
for i in torch.unique(pts_instance_mask):
indices = torch.nonzero(
pts_instance_mask == i, as_tuple=False).squeeze(-1)
......@@ -561,47 +655,63 @@ class VoteHead(BaseModule):
dir_res_targets, center_targets, assigned_center_targets,
mask_targets.long(), objectness_targets, objectness_masks)
def get_bboxes(self,
points,
bbox_preds,
input_metas,
rescale=False,
use_nms=True):
def predict_by_feat(self,
points: List[torch.Tensor],
bbox_preds_dict: dict,
batch_input_metas: List[dict],
use_nms: bool = True,
**kwargs) -> List[InstanceData]:
"""Generate bboxes from vote head predictions.
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Predictions from vote head.
input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool): Whether to rescale bboxes.
points (List[torch.Tensor]): Input points of multiple samples.
bbox_preds_dict (dict): Predictions from vote head.
batch_input_metas (list[dict]): Each item
contains the meta information of each sample.
use_nms (bool): Whether to apply NMS, skip nms postprocessing
while using vote head in rpn stage.
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData cantains 3d Bounding boxes and corresponding
scores and labels.
"""
# decode boxes
obj_scores = F.softmax(bbox_preds['obj_scores'], dim=-1)[..., -1]
sem_scores = F.softmax(bbox_preds['sem_scores'], dim=-1)
bbox3d = self.bbox_coder.decode(bbox_preds)
if use_nms:
batch_size = bbox3d.shape[0]
results = list()
for b in range(batch_size):
stack_points = torch.stack(points)
obj_scores = F.softmax(bbox_preds_dict['obj_scores'], dim=-1)[..., -1]
sem_scores = F.softmax(bbox_preds_dict['sem_scores'], dim=-1)
bbox3d = self.bbox_coder.decode(bbox_preds_dict)
batch_size = bbox3d.shape[0]
results_list = list()
for b in range(batch_size):
temp_results = InstanceData()
if use_nms:
bbox_selected, score_selected, labels = \
self.multiclass_nms_single(obj_scores[b], sem_scores[b],
bbox3d[b], points[b, ..., :3],
input_metas[b])
bbox = input_metas[b]['box_type_3d'](
self.multiclass_nms_single(obj_scores[b],
sem_scores[b],
bbox3d[b],
stack_points[b, ..., :3],
batch_input_metas[b])
bbox = batch_input_metas[b]['box_type_3d'](
bbox_selected,
box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot)
temp_results.bboxes_3d = bbox
temp_results.scores_3d = score_selected
temp_results.labels_3d = labels
results_list.append(temp_results)
else:
bbox = batch_input_metas[b]['box_type_3d'](
bbox_selected,
box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot)
results.append((bbox, score_selected, labels))
temp_results.bboxes_3d = bbox
temp_results.obj_scores_3d = obj_scores[b]
temp_results.sem_scores_3d = obj_scores[b]
results_list.append(temp_results)
return results
else:
return bbox3d
return results_list
def multiclass_nms_single(self, obj_scores, sem_scores, bbox, points,
input_meta):
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
from mmdet3d.core import Det3DDataSample
from mmdet3d.core.utils import (ForwardResults, InstanceList, OptConfigType,
OptMultiConfig, OptSampleList, SampleList)
......@@ -24,8 +26,8 @@ class Base3DDetector(BaseDetector):
super().__init__(data_preprocessor=data_processor, init_cfg=init_cfg)
def forward(self,
batch_inputs_dict: dict,
batch_data_samples: OptSampleList = None,
inputs: Union[dict, List[dict]],
data_samples: OptSampleList = None,
mode: str = 'tensor',
**kwargs) -> ForwardResults:
"""The unified entry for a forward process in both training and test.
......@@ -43,10 +45,19 @@ class Base3DDetector(BaseDetector):
optimizer updating, which are done in the :meth:`train_step`.
Args:
batch_inputs (torch.Tensor): The input tensor with shape
(N, C, ...) in general.
batch_data_samples (list[:obj:`DetDataSample`], optional): The
annotation data of every samples. Defaults to None.
inputs (dict | list[dict]): When it is a list[dict], the
outer list indicate the test time augmentation. Each
dict contains batch inputs
which include 'points' and 'imgs' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor): Image tensor has shape (B, C, H, W).
data_samples (list[:obj:`DetDataSample`],
list[list[:obj:`DetDataSample`]], optional): The
annotation data of every samples. When it is a list[list], the
outer list indicate the test time augmentation, and the
inter list indicate the batch. Otherwise, the list simply
indicate the batch. Defaults to None.
mode (str): Return what kind of value. Defaults to 'tensor'.
Returns:
......@@ -57,13 +68,20 @@ class Base3DDetector(BaseDetector):
- If ``mode="loss"``, return a dict of tensor.
"""
if mode == 'loss':
return self.loss(batch_inputs_dict, batch_data_samples, **kwargs)
return self.loss(inputs, data_samples, **kwargs)
elif mode == 'predict':
return self.predict(batch_inputs_dict, batch_data_samples,
**kwargs)
if isinstance(data_samples[0], list):
# aug test
assert len(data_samples[0]) == 1, 'Only support ' \
'batch_size 1 ' \
'in mmdet3d when ' \
'do the test' \
'time augmentation.'
return self.aug_test(inputs, data_samples, **kwargs)
else:
return self.predict(inputs, data_samples, **kwargs)
elif mode == 'tensor':
return self._forward(batch_inputs_dict, batch_data_samples,
**kwargs)
return self._forward(inputs, data_samples, **kwargs)
else:
raise RuntimeError(f'Invalid mode "{mode}". '
'Only supports loss, predict and tensor mode')
......
# Copyright (c) OpenMMLab. All rights reserved.
import torch
from typing import Dict, List, Optional, Union
from mmdet3d.core import bbox3d2result, merge_aug_bboxes_3d
from mmengine import InstanceData
from torch import Tensor
from mmdet3d.core import Det3DDataSample, merge_aug_bboxes_3d
from mmdet3d.registry import MODELS
from .single_stage import SingleStage3DDetector
@MODELS.register_module()
class VoteNet(SingleStage3DDetector):
r"""`VoteNet <https://arxiv.org/pdf/1904.09664.pdf>`_ for 3D detection."""
r"""`VoteNet <https://arxiv.org/pdf/1904.09664.pdf>`_ for 3D detection.
Args:
backbone (dict): Config dict of detector's backbone.
bbox_head (dict, optional): Config dict of box head. Defaults to None.
train_cfg (dict, optional): Config dict of training hyper-parameters.
Defaults to None.
test_cfg (dict, optional): Config dict of test hyper-parameters.
Defaults to None.
init_cfg (dict, optional): the config to control the
initialization. Default to None.
data_preprocessor (dict or ConfigDict, optional): The pre-process
config of :class:`BaseDataPreprocessor`. it usually includes,
``pad_size_divisor``, ``pad_value``, ``mean`` and ``std``.
"""
def __init__(self,
backbone,
bbox_head=None,
train_cfg=None,
test_cfg=None,
init_cfg=None,
pretrained=None):
backbone: dict,
bbox_head: Optional[dict] = None,
train_cfg: Optional[dict] = None,
test_cfg: Optional[dict] = None,
init_cfg: Optional[dict] = None,
data_preprocessor: Optional[dict] = None,
**kwargs):
super(VoteNet, self).__init__(
backbone=backbone,
bbox_head=bbox_head,
train_cfg=train_cfg,
test_cfg=test_cfg,
init_cfg=None,
pretrained=pretrained)
def forward_train(self,
points,
img_metas,
gt_bboxes_3d,
gt_labels_3d,
pts_semantic_mask=None,
pts_instance_mask=None,
gt_bboxes_ignore=None):
"""Forward of training.
init_cfg=init_cfg,
data_preprocessor=data_preprocessor,
**kwargs)
def loss(self, batch_inputs_dict: Dict[str, Union[List, Tensor]],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> List[Det3DDataSample]:
"""
Args:
points (list[torch.Tensor]): Points of each batch.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
batch_inputs_dict (dict): The model input dict which include
'points' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`.
Returns:
dict: Losses.
dict[str, Tensor]: A dictionary of loss components.
"""
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.train_cfg.sample_mod)
loss_inputs = (points, gt_bboxes_3d, gt_labels_3d, pts_semantic_mask,
pts_instance_mask, img_metas)
losses = self.bbox_head.loss(
bbox_preds, *loss_inputs, gt_bboxes_ignore=gt_bboxes_ignore)
feat_dict = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
losses = self.bbox_head.loss(points, feat_dict, batch_data_samples,
**kwargs)
return losses
def simple_test(self, points, img_metas, imgs=None, rescale=False):
def predict(self, batch_inputs_dict: Dict[str, Optional[Tensor]],
batch_data_samples: List[Det3DDataSample],
**kwargs) -> List[Det3DDataSample]:
"""Forward of testing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list): Image metas.
rescale (bool): Whether to rescale results.
batch_inputs_dict (dict): The model input dict which include
'points' keys.
- points (list[torch.Tensor]): Point cloud of each sample.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`.
Returns:
list: Predicted 3d boxes.
list[:obj:`Det3DDataSample`]: Detection results of the
input sample. Each Det3DDataSample usually contain
'pred_instances_3d'. And the ``pred_instances_3d`` usually
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
points_cat = torch.stack(points)
x = self.extract_feat(points_cat)
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
points_cat, bbox_preds, img_metas, rescale=rescale)
bbox_results = [
bbox3d2result(bboxes, scores, labels)
for bboxes, scores, labels in bbox_list
]
return bbox_results
def aug_test(self, points, img_metas, imgs=None, rescale=False):
"""Test with augmentation."""
points_cat = [torch.stack(pts) for pts in points]
feats = self.extract_feats(points_cat, img_metas)
# only support aug_test for one sample
aug_bboxes = []
for x, pts_cat, img_meta in zip(feats, points_cat, img_metas):
bbox_preds = self.bbox_head(x, self.test_cfg.sample_mod)
bbox_list = self.bbox_head.get_bboxes(
pts_cat, bbox_preds, img_meta, rescale=rescale)
bbox_list = [
dict(boxes_3d=bboxes, scores_3d=scores, labels_3d=labels)
for bboxes, scores, labels in bbox_list
]
aug_bboxes.append(bbox_list[0])
feats_dict = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
results_list = self.bbox_head.predict(points, feats_dict,
batch_data_samples, **kwargs)
data_3d_samples = self.convert_to_datasample(results_list)
return data_3d_samples
def aug_test(self, aug_inputs_list: List[dict],
aug_data_samples: List[List[dict]], **kwargs):
"""Test with augmentation.
Batch size always is 1 when do the augtest.
Args:
aug_inputs_list (List[dict]): The list indicate same data
under differecnt augmentation.
aug_data_samples (List[List[dict]]): The outer list
indicate different augmentation, and the inter
list indicate the batch size.
"""
num_augs = len(aug_inputs_list)
if num_augs == 1:
return self.predict(aug_inputs_list[0], aug_data_samples[0])
batch_size = len(aug_data_samples[0])
assert batch_size == 1
multi_aug_results = []
for aug_id in range(num_augs):
batch_inputs_dict = aug_inputs_list[aug_id]
batch_data_samples = aug_data_samples[aug_id]
feats_dict = self.extract_feat(batch_inputs_dict)
points = batch_inputs_dict['points']
results_list = self.bbox_head.predict(points, feats_dict,
batch_data_samples, **kwargs)
multi_aug_results.append(results_list[0])
aug_input_metas_list = []
for aug_index in range(num_augs):
metainfo = aug_data_samples[aug_id][0].metainfo
aug_input_metas_list.append(metainfo)
aug_results_list = [item.to_dict() for item in multi_aug_results]
# after merging, bboxes will be rescaled to the original image size
merged_bboxes = merge_aug_bboxes_3d(aug_bboxes, img_metas,
self.bbox_head.test_cfg)
merged_results_dict = merge_aug_bboxes_3d(aug_results_list,
aug_input_metas_list,
self.bbox_head.test_cfg)
return [merged_bboxes]
merged_results = InstanceData(**merged_results_dict)
data_3d_samples = self.convert_to_datasample([merged_results])
return data_3d_samples
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment