Unverified Commit 86f6183d authored by ChaimZhu's avatar ChaimZhu Committed by GitHub
Browse files

[Refactor] move voxelization to data_preprocessor and fix new ut bugs (#1671)

* mv voxelization

* update

* update full

* fix configs

* improve docstring of data_preprocessor

* fix dynamic voxel config

* remove default voxel_type in config

* fix typos

* add docstring

* fix ut

* update

* fix docstring
parent a50c71dd
...@@ -8,6 +8,13 @@ model = dict( ...@@ -8,6 +8,13 @@ model = dict(
type='DynamicMVXFasterRCNN', type='DynamicMVXFasterRCNN',
data_preprocessor=dict( data_preprocessor=dict(
type='Det3DDataPreprocessor', type='Det3DDataPreprocessor',
voxel=True,
voxel_type='dynamic',
voxel_layer=dict(
max_num_points=-1,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(-1, -1)),
mean=[102.9801, 115.9465, 122.7717], mean=[102.9801, 115.9465, 122.7717],
std=[1.0, 1.0, 1.0], std=[1.0, 1.0, 1.0],
bgr_to_rgb=False, bgr_to_rgb=False,
...@@ -26,12 +33,6 @@ model = dict( ...@@ -26,12 +33,6 @@ model = dict(
in_channels=[256, 512, 1024, 2048], in_channels=[256, 512, 1024, 2048],
out_channels=256, out_channels=256,
num_outs=5), num_outs=5),
pts_voxel_layer=dict(
max_num_points=-1,
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(-1, -1),
),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
type='DynamicVFE', type='DynamicVFE',
in_channels=4, in_channels=4,
......
...@@ -38,8 +38,10 @@ db_sampler = dict( ...@@ -38,8 +38,10 @@ db_sampler = dict(
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0, rate=1.0,
prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)), prepare=dict(filter_by_difficulty=[-1], filter_by_min_points=dict(Car=5)),
classes=class_names,
sample_groups=dict(Car=15), sample_groups=dict(Car=15),
classes=class_names) points_loader=dict(
type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4))
train_pipeline = [ train_pipeline = [
dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4), dict(type='LoadPointsFromFile', coord_type='LIDAR', load_dim=4, use_dim=4),
......
...@@ -25,7 +25,7 @@ model = dict( ...@@ -25,7 +25,7 @@ model = dict(
_delete_=True, _delete_=True,
pts=dict( pts=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
......
...@@ -22,7 +22,7 @@ model = dict( ...@@ -22,7 +22,7 @@ model = dict(
_delete_=True, _delete_=True,
pts=dict( pts=dict(
assigner=dict( assigner=dict(
type='MaxIoUAssigner', type='Max3DIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'), iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.55, pos_iou_thr=0.55,
neg_iou_thr=0.4, neg_iou_thr=0.4,
......
...@@ -60,7 +60,8 @@ val_dataloader = dict(dataset=dict(pipeline=test_pipeline)) ...@@ -60,7 +60,8 @@ val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# model settings # model settings
model = dict( model = dict(
pts_voxel_layer=dict(point_cloud_range=[-100, -100, -5, 100, 100, 3]), data_preprocessor=dict(
voxel_layer=dict(point_cloud_range=[-100, -100, -5, 100, 100, 3])),
pts_voxel_encoder=dict( pts_voxel_encoder=dict(
feat_channels=[32, 64], feat_channels=[32, 64],
point_cloud_range=[-100, -100, -5, 100, 100, 3]), point_cloud_range=[-100, -100, -5, 100, 100, 3]),
......
...@@ -64,7 +64,7 @@ val_dataloader = dict( ...@@ -64,7 +64,7 @@ val_dataloader = dict(
# model settings # model settings
model = dict( model = dict(
pts_voxel_layer=dict(max_num_points=20), data_preprocessor=dict(voxel_layer=dict(max_num_points=20)),
pts_voxel_encoder=dict(feat_channels=[64, 64]), pts_voxel_encoder=dict(feat_channels=[64, 64]),
pts_neck=dict( pts_neck=dict(
_delete_=True, _delete_=True,
......
...@@ -422,7 +422,7 @@ In MMDetection3D, for example, to change the FPN neck of PointPillars with the f ...@@ -422,7 +422,7 @@ In MMDetection3D, for example, to change the FPN neck of PointPillars with the f
```python ```python
model = dict( model = dict(
type='MVXFasterRCNN', type='MVXFasterRCNN',
pts_voxel_layer=dict(...), data_preprocessor=dict(voxel_layer=dict(...)),
pts_voxel_encoder=dict(...), pts_voxel_encoder=dict(...),
pts_middle_encoder=dict(...), pts_middle_encoder=dict(...),
pts_backbone=dict(...), pts_backbone=dict(...),
......
...@@ -423,7 +423,7 @@ gpu_ids = range(0, 1) # 所使用的 GPU 编号 ...@@ -423,7 +423,7 @@ gpu_ids = range(0, 1) # 所使用的 GPU 编号
```python ```python
model = dict( model = dict(
type='MVXFasterRCNN', type='MVXFasterRCNN',
pts_voxel_layer=dict(...), data_preprocessor=dict(voxel_layer=dict(...)),
pts_voxel_encoder=dict(...), pts_voxel_encoder=dict(...),
pts_middle_encoder=dict(...), pts_middle_encoder=dict(...),
pts_backbone=dict(...), pts_backbone=dict(...),
......
...@@ -250,9 +250,9 @@ class Seg3DDataset(BaseDataset): ...@@ -250,9 +250,9 @@ class Seg3DDataset(BaseDataset):
osp.join(self.data_prefix.get('pts_semantic_mask', ''), osp.join(self.data_prefix.get('pts_semantic_mask', ''),
info['pts_semantic_mask_path']) info['pts_semantic_mask_path'])
# Add label_mapping to input dict for directly # only be used in `PointSegClassMapping` in pipeline
# use it in PointSegClassMapping pipeline # to map original semantic class to valid category ids.
info['label_mapping'] = self.label_mapping info['seg_label_mapping'] = self.seg_label_mapping
# 'eval_ann_info' will be updated in loading transforms # 'eval_ann_info' will be updated in loading transforms
if self.test_mode and self.load_eval_anns: if self.test_mode and self.load_eval_anns:
......
...@@ -64,6 +64,7 @@ class KittiMetric(BaseMetric): ...@@ -64,6 +64,7 @@ class KittiMetric(BaseMetric):
self.pklfile_prefix = pklfile_prefix self.pklfile_prefix = pklfile_prefix
self.submission_prefix = submission_prefix self.submission_prefix = submission_prefix
self.pred_box_type_3d = pred_box_type_3d self.pred_box_type_3d = pred_box_type_3d
self.default_cam_key = default_cam_key
self.file_client_args = file_client_args self.file_client_args = file_client_args
allowed_metrics = ['bbox', 'img_bbox', 'mAP'] allowed_metrics = ['bbox', 'img_bbox', 'mAP']
...@@ -284,15 +285,17 @@ class KittiMetric(BaseMetric): ...@@ -284,15 +285,17 @@ class KittiMetric(BaseMetric):
pklfile_prefix_ = osp.join(pklfile_prefix, name) + '.pkl' pklfile_prefix_ = osp.join(pklfile_prefix, name) + '.pkl'
else: else:
pklfile_prefix_ = None pklfile_prefix_ = None
if 'pred_instances' in name and '3d' in name and name[0] != '_': if 'pred_instances' in name and '3d' in name and name[
0] != '_' and results[0][name]:
net_outputs = [result[name] for result in results] net_outputs = [result[name] for result in results]
result_list_ = self.bbox2result_kitti(net_outputs, result_list_ = self.bbox2result_kitti(net_outputs,
sample_id_list, classes, sample_id_list, classes,
pklfile_prefix_, pklfile_prefix_,
submission_prefix_) submission_prefix_)
result_dict[name] = result_list_ result_dict[name] = result_list_
elif name == 'pred_instances' and name[0] != '_': elif name == 'pred_instances' and name[0] != '_' and results[0][
net_outputs = [info[name] for info in results] name]:
net_outputs = [result[name] for result in results]
result_list_ = self.bbox2result_kitti2d( result_list_ = self.bbox2result_kitti2d(
net_outputs, sample_id_list, classes, pklfile_prefix_, net_outputs, sample_id_list, classes, pklfile_prefix_,
submission_prefix_) submission_prefix_)
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from numbers import Number from numbers import Number
from typing import List, Optional, Sequence, Tuple, Union from typing import Dict, List, Optional, Sequence, Tuple, Union
import numpy as np import numpy as np
import torch
from mmcv.ops import Voxelization
from mmengine.data import BaseDataElement from mmengine.data import BaseDataElement
from mmengine.model import stack_batch from mmengine.model import stack_batch
from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.utils import OptConfigType
from mmdet.models import DetDataPreprocessor from mmdet.models import DetDataPreprocessor
@MODELS.register_module() @MODELS.register_module()
class Det3DDataPreprocessor(DetDataPreprocessor): class Det3DDataPreprocessor(DetDataPreprocessor):
"""Points (Image) pre-processor for point clouds / multi-modality 3D """Points / Image pre-processor for point clouds / vision-only / multi-
detection tasks. modality 3D detection tasks.
It provides the data pre-processing as follows It provides the data pre-processing as follows
- Collate and move data to the target device. - Collate and move image and point cloud data to the target device.
- 1) For image data:
- Pad images in inputs to the maximum size of current batch with defined - Pad images in inputs to the maximum size of current batch with defined
``pad_value``. The padding size can be divisible by a defined ``pad_value``. The padding size can be divisible by a defined
``pad_size_divisor`` ``pad_size_divisor``
...@@ -25,8 +31,20 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -25,8 +31,20 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
- Convert images in inputs from bgr to rgb if the shape of input is - Convert images in inputs from bgr to rgb if the shape of input is
(3, H, W). (3, H, W).
- Normalize images in inputs with defined std and mean. - Normalize images in inputs with defined std and mean.
- Do batch augmentations during training.
- 2) For point cloud data:
- if no voxelization, directly return list of point cloud data.
- if voxelization is applied, voxelize point cloud according to
``voxel_type`` and obtain ``voxels``.
Args: Args:
voxel (bool): Whether to apply voxelziation to point cloud.
voxel_type (str): Voxelization type. Two voxelization types are
provided: 'hard' and 'dynamic', respectively for hard
voxelization and dynamic voxelization. Defaults to 'hard'.
voxel_layer (:obj:`ConfigDict`, optional): Voxelization layer
config. Defaults to None.
mean (Sequence[Number], optional): The pixel mean of R, G, B channels. mean (Sequence[Number], optional): The pixel mean of R, G, B channels.
Defaults to None. Defaults to None.
std (Sequence[Number], optional): The pixel standard deviation of std (Sequence[Number], optional): The pixel standard deviation of
...@@ -38,9 +56,13 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -38,9 +56,13 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
Defaults to False. Defaults to False.
rgb_to_bgr (bool): whether to convert image from RGB to RGB. rgb_to_bgr (bool): whether to convert image from RGB to RGB.
Defaults to False. Defaults to False.
batch_augments (list[dict], optional): Batch-level augmentations
""" """
def __init__(self, def __init__(self,
voxel: bool = False,
voxel_type: str = 'hard',
voxel_layer: OptConfigType = None,
mean: Sequence[Number] = None, mean: Sequence[Number] = None,
std: Sequence[Number] = None, std: Sequence[Number] = None,
pad_size_divisor: int = 1, pad_size_divisor: int = 1,
...@@ -64,6 +86,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -64,6 +86,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
bgr_to_rgb=bgr_to_rgb, bgr_to_rgb=bgr_to_rgb,
rgb_to_bgr=rgb_to_bgr, rgb_to_bgr=rgb_to_bgr,
batch_augments=batch_augments) batch_augments=batch_augments)
self.voxel = voxel
self.voxel_type = voxel_type
if voxel:
self.voxel_layer = Voxelization(**voxel_layer)
def forward(self, def forward(self,
data: List[Union[dict, List[dict]]], data: List[Union[dict, List[dict]]],
...@@ -152,6 +178,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -152,6 +178,10 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
'imgs': batch_imgs if imgs is not None else None 'imgs': batch_imgs if imgs is not None else None
} }
if self.voxel:
voxel_dict = self.voxelize(points)
batch_inputs_dict['voxels'] = voxel_dict
return batch_inputs_dict, batch_data_samples return batch_inputs_dict, batch_data_samples
def collate_data( def collate_data(
...@@ -203,3 +233,66 @@ class Det3DDataPreprocessor(DetDataPreprocessor): ...@@ -203,3 +233,66 @@ class Det3DDataPreprocessor(DetDataPreprocessor):
self.pad_size_divisor)) * self.pad_size_divisor self.pad_size_divisor)) * self.pad_size_divisor
batch_pad_shape.append((pad_h, pad_w)) batch_pad_shape.append((pad_h, pad_w))
return batch_pad_shape return batch_pad_shape
@torch.no_grad()
def voxelize(self, points: List[torch.Tensor]) -> Dict:
"""Apply voxelization to point cloud.
Args:
points (List[Tensor]): Point cloud in one data batch.
Returns:
dict[str, Tensor]: Voxelization information.
- voxels (Tensor): Features of voxels, shape is MXNxC for hard
voxelization, NXC for dynamic voxelization.
- coors (Tensor): Coordinates of voxels, shape is Nx(1+NDim),
where 1 represents the batch index.
- num_points (Tensor, optional): Number of points in each voxel.
- voxel_centers (Tensor, optional): Centers of voxels.
"""
voxel_dict = dict()
if self.voxel_type == 'hard':
voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res)
res_voxel_centers = (
res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(
self.voxel_layer.voxel_size) + res_voxels.new_tensor(
self.voxel_layer.point_cloud_range[0:3])
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxel_centers.append(res_voxel_centers)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
voxel_centers = torch.cat(voxel_centers, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
voxel_dict['num_points'] = num_points
voxel_dict['voxel_centers'] = voxel_centers
elif self.voxel_type == 'dynamic':
coors = []
# dynamic voxelization only provide a coors mapping
for res in points:
res_coors = self.voxel_layer(res)
coors.append(res_coors)
voxels = torch.cat(points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
else:
raise ValueError(f'Invalid voxelization type {self.voxel_type}')
voxel_dict['voxels'] = voxels
voxel_dict['coors'] = coors_batch
return voxel_dict
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Union from typing import List, Optional, Union
from mmengine import InstanceData
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.structures import Det3DDataSample from mmdet3d.structures import Det3DDataSample
from mmdet3d.structures.det3d_data_sample import (ForwardResults, from mmdet3d.structures.det3d_data_sample import (ForwardResults,
...@@ -131,11 +133,17 @@ class Base3DDetector(BaseDetector): ...@@ -131,11 +133,17 @@ class Base3DDetector(BaseDetector):
(results_list_3d is not None),\ (results_list_3d is not None),\
'please pass at least one type of results_list' 'please pass at least one type of results_list'
if results_list_2d is None:
results_list_2d = [
InstanceData() for _ in range(len(results_list_3d))
]
if results_list_3d is None:
results_list_3d = [
InstanceData() for _ in range(len(results_list_2d))
]
for i in range(len(results_list_3d)): for i in range(len(results_list_3d)):
result = Det3DDataSample() result = Det3DDataSample()
if results_list_3d is not None:
result.pred_instances_3d = results_list_3d[i] result.pred_instances_3d = results_list_3d[i]
if results_list_2d is not None:
result.pred_instances = results_list_2d[i] result.pred_instances = results_list_2d[i]
data_sample_list.append(result) data_sample_list.append(result)
return data_sample_list return data_sample_list
...@@ -10,8 +10,6 @@ class CenterPoint(MVXTwoStageDetector): ...@@ -10,8 +10,6 @@ class CenterPoint(MVXTwoStageDetector):
"""Base class of Multi-modality VoxelNet. """Base class of Multi-modality VoxelNet.
Args: Args:
pts_voxel_layer (dict, optional): Point cloud voxelization
layer. Defaults to None.
pts_voxel_encoder (dict, optional): Point voxelization pts_voxel_encoder (dict, optional): Point voxelization
encoder layer. Defaults to None. encoder layer. Defaults to None.
pts_middle_encoder (dict, optional): Middle encoder layer pts_middle_encoder (dict, optional): Middle encoder layer
...@@ -43,7 +41,6 @@ class CenterPoint(MVXTwoStageDetector): ...@@ -43,7 +41,6 @@ class CenterPoint(MVXTwoStageDetector):
""" """
def __init__(self, def __init__(self,
pts_voxel_layer: Optional[dict] = None,
pts_voxel_encoder: Optional[dict] = None, pts_voxel_encoder: Optional[dict] = None,
pts_middle_encoder: Optional[dict] = None, pts_middle_encoder: Optional[dict] = None,
pts_fusion_layer: Optional[dict] = None, pts_fusion_layer: Optional[dict] = None,
...@@ -61,9 +58,8 @@ class CenterPoint(MVXTwoStageDetector): ...@@ -61,9 +58,8 @@ class CenterPoint(MVXTwoStageDetector):
**kwargs): **kwargs):
super(CenterPoint, super(CenterPoint,
self).__init__(pts_voxel_layer, pts_voxel_encoder, self).__init__(pts_voxel_encoder, pts_middle_encoder,
pts_middle_encoder, pts_fusion_layer, pts_fusion_layer, img_backbone, pts_backbone,
img_backbone, pts_backbone, img_neck, pts_neck, img_neck, pts_neck, pts_bbox_head, img_roi_head,
pts_bbox_head, img_roi_head, img_rpn_head, img_rpn_head, train_cfg, test_cfg, init_cfg,
train_cfg, test_cfg, init_cfg, data_preprocessor, data_preprocessor, **kwargs)
**kwargs)
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple from typing import Tuple
import torch
from mmcv.runner import force_fp32
from torch import Tensor from torch import Tensor
from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
...@@ -17,7 +14,6 @@ class DynamicVoxelNet(VoxelNet): ...@@ -17,7 +14,6 @@ class DynamicVoxelNet(VoxelNet):
""" """
def __init__(self, def __init__(self,
voxel_layer: ConfigType,
voxel_encoder: ConfigType, voxel_encoder: ConfigType,
middle_encoder: ConfigType, middle_encoder: ConfigType,
backbone: ConfigType, backbone: ConfigType,
...@@ -28,7 +24,6 @@ class DynamicVoxelNet(VoxelNet): ...@@ -28,7 +24,6 @@ class DynamicVoxelNet(VoxelNet):
data_preprocessor: OptConfigType = None, data_preprocessor: OptConfigType = None,
init_cfg: OptMultiConfig = None) -> None: init_cfg: OptMultiConfig = None) -> None:
super().__init__( super().__init__(
voxel_layer=voxel_layer,
voxel_encoder=voxel_encoder, voxel_encoder=voxel_encoder,
middle_encoder=middle_encoder, middle_encoder=middle_encoder,
backbone=backbone, backbone=backbone,
...@@ -39,37 +34,12 @@ class DynamicVoxelNet(VoxelNet): ...@@ -39,37 +34,12 @@ class DynamicVoxelNet(VoxelNet):
data_preprocessor=data_preprocessor, data_preprocessor=data_preprocessor,
init_cfg=init_cfg) init_cfg=init_cfg)
@torch.no_grad()
@force_fp32()
def voxelize(self, points: List[torch.Tensor]) -> tuple:
"""Apply dynamic voxelization to points.
Args:
points (list[Tensor]): Points of each sample.
Returns:
tuple[Tensor]: Concatenated points and coordinates.
"""
coors = []
# dynamic voxelization only provide a coors mapping
for res in points:
res_coors = self.voxel_layer(res)
coors.append(res_coors)
points = torch.cat(points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return points, coors_batch
def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]: def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Extract features from points.""" """Extract features from points."""
# TODO: Remove voxelization to datapreprocessor voxel_dict = batch_inputs_dict['voxels']
points = batch_inputs_dict['points'] voxel_features, feature_coors = self.voxel_encoder(
voxels, coors = self.voxelize(points) voxel_dict['voxels'], voxel_dict['coors'])
voxel_features, feature_coors = self.voxel_encoder(voxels, coors) batch_size = voxel_dict['coors'][-1, 0].item() + 1
batch_size = coors[-1, 0].item() + 1
x = self.middle_encoder(voxel_features, feature_coors, batch_size) x = self.middle_encoder(voxel_features, feature_coors, batch_size)
x = self.backbone(x) x = self.backbone(x)
if self.with_neck: if self.with_neck:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional, Sequence from typing import Dict, List, Optional, Sequence
import torch
from torch import Tensor from torch import Tensor
from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .mvx_two_stage import MVXTwoStageDetector from .mvx_two_stage import MVXTwoStageDetector
...@@ -24,39 +22,18 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector): ...@@ -24,39 +22,18 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector):
def __init__(self, **kwargs): def __init__(self, **kwargs):
super(DynamicMVXFasterRCNN, self).__init__(**kwargs) super(DynamicMVXFasterRCNN, self).__init__(**kwargs)
@torch.no_grad()
def voxelize(self, points):
"""Apply dynamic voxelization to points.
Args:
points (list[torch.Tensor]): Points of each sample.
Returns:
tuple[torch.Tensor]: Concatenated points and coordinates.
"""
coors = []
# dynamic voxelization only provide a coors mapping
for res in points:
res_coors = self.pts_voxel_layer(res)
coors.append(res_coors)
points = torch.cat(points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return points, coors_batch
def extract_pts_feat( def extract_pts_feat(
self, self,
points: List[Tensor], voxel_dict: Dict[str, Tensor],
points: Optional[List[Tensor]] = None,
img_feats: Optional[Sequence[Tensor]] = None, img_feats: Optional[Sequence[Tensor]] = None,
batch_input_metas: Optional[List[dict]] = None batch_input_metas: Optional[List[dict]] = None
) -> Sequence[Tensor]: ) -> Sequence[Tensor]:
"""Extract features of points. """Extract features of points.
Args: Args:
points (List[tensor]): Point cloud of multiple inputs. voxel_dict(Dict[str, Tensor]): Dict of voxelization infos.
points (List[tensor], optional): Point cloud of multiple inputs.
img_feats (list[Tensor], tuple[tensor], optional): Features from img_feats (list[Tensor], tuple[tensor], optional): Features from
image backbone. image backbone.
batch_input_metas (list[dict], optional): The meta information batch_input_metas (list[dict], optional): The meta information
...@@ -68,10 +45,10 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector): ...@@ -68,10 +45,10 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector):
""" """
if not self.with_pts_bbox: if not self.with_pts_bbox:
return None return None
voxels, coors = self.voxelize(points)
voxel_features, feature_coors = self.pts_voxel_encoder( voxel_features, feature_coors = self.pts_voxel_encoder(
voxels, coors, points, img_feats, batch_input_metas) voxel_dict['voxels'], voxel_dict['coors'], points, img_feats,
batch_size = coors[-1, 0] + 1 batch_input_metas)
batch_size = voxel_dict['coors'][-1, 0] + 1
x = self.pts_middle_encoder(voxel_features, feature_coors, batch_size) x = self.pts_middle_encoder(voxel_features, feature_coors, batch_size)
x = self.pts_backbone(x) x = self.pts_backbone(x)
if self.with_pts_neck: if self.with_pts_neck:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
import copy import copy
from typing import Dict, List, Optional, Sequence, Tuple from typing import Dict, List, Optional, Sequence
import torch import torch
from mmcv.ops import Voxelization
from mmengine import InstanceData from mmengine import InstanceData
from torch import Tensor from torch import Tensor
from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.structures import Det3DDataSample from mmdet3d.structures import Det3DDataSample
...@@ -18,8 +16,6 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -18,8 +16,6 @@ class MVXTwoStageDetector(Base3DDetector):
"""Base class of Multi-modality VoxelNet. """Base class of Multi-modality VoxelNet.
Args: Args:
pts_voxel_layer (dict, optional): Point cloud voxelization
layer. Defaults to None.
pts_voxel_encoder (dict, optional): Point voxelization pts_voxel_encoder (dict, optional): Point voxelization
encoder layer. Defaults to None. encoder layer. Defaults to None.
pts_middle_encoder (dict, optional): Middle encoder layer pts_middle_encoder (dict, optional): Middle encoder layer
...@@ -51,7 +47,6 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -51,7 +47,6 @@ class MVXTwoStageDetector(Base3DDetector):
""" """
def __init__(self, def __init__(self,
pts_voxel_layer: Optional[dict] = None,
pts_voxel_encoder: Optional[dict] = None, pts_voxel_encoder: Optional[dict] = None,
pts_middle_encoder: Optional[dict] = None, pts_middle_encoder: Optional[dict] = None,
pts_fusion_layer: Optional[dict] = None, pts_fusion_layer: Optional[dict] = None,
...@@ -70,8 +65,6 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -70,8 +65,6 @@ class MVXTwoStageDetector(Base3DDetector):
super(MVXTwoStageDetector, self).__init__( super(MVXTwoStageDetector, self).__init__(
init_cfg=init_cfg, data_preprocessor=data_preprocessor, **kwargs) init_cfg=init_cfg, data_preprocessor=data_preprocessor, **kwargs)
if pts_voxel_layer:
self.pts_voxel_layer = Voxelization(**pts_voxel_layer)
if pts_voxel_encoder: if pts_voxel_encoder:
self.pts_voxel_encoder = MODELS.build(pts_voxel_encoder) self.pts_voxel_encoder = MODELS.build(pts_voxel_encoder)
if pts_middle_encoder: if pts_middle_encoder:
...@@ -192,14 +185,16 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -192,14 +185,16 @@ class MVXTwoStageDetector(Base3DDetector):
def extract_pts_feat( def extract_pts_feat(
self, self,
points: List[Tensor], voxel_dict: Dict[str, Tensor],
points: Optional[List[Tensor]] = None,
img_feats: Optional[Sequence[Tensor]] = None, img_feats: Optional[Sequence[Tensor]] = None,
batch_input_metas: Optional[List[dict]] = None batch_input_metas: Optional[List[dict]] = None
) -> Sequence[Tensor]: ) -> Sequence[Tensor]:
"""Extract features of points. """Extract features of points.
Args: Args:
points (List[tensor]): Point cloud of multiple inputs. voxel_dict(Dict[str, Tensor]): Dict of voxelization infos.
points (List[tensor], optional): Point cloud of multiple inputs.
img_feats (list[Tensor], tuple[tensor], optional): Features from img_feats (list[Tensor], tuple[tensor], optional): Features from
image backbone. image backbone.
batch_input_metas (list[dict], optional): The meta information batch_input_metas (list[dict], optional): The meta information
...@@ -211,11 +206,13 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -211,11 +206,13 @@ class MVXTwoStageDetector(Base3DDetector):
""" """
if not self.with_pts_bbox: if not self.with_pts_bbox:
return None return None
voxels, num_points, coors = self.voxelize(points) voxel_features = self.pts_voxel_encoder(voxel_dict['voxels'],
voxel_features = self.pts_voxel_encoder(voxels, num_points, coors, voxel_dict['num_points'],
img_feats, batch_input_metas) voxel_dict['coors'], img_feats,
batch_size = coors[-1, 0] + 1 batch_input_metas)
x = self.pts_middle_encoder(voxel_features, coors, batch_size) batch_size = voxel_dict['coors'][-1, 0] + 1
x = self.pts_middle_encoder(voxel_features, voxel_dict['coors'],
batch_size)
x = self.pts_backbone(x) x = self.pts_backbone(x)
if self.with_pts_neck: if self.with_pts_neck:
x = self.pts_neck(x) x = self.pts_neck(x)
...@@ -238,39 +235,17 @@ class MVXTwoStageDetector(Base3DDetector): ...@@ -238,39 +235,17 @@ class MVXTwoStageDetector(Base3DDetector):
tuple: Two elements in tuple arrange as tuple: Two elements in tuple arrange as
image features and point cloud features. image features and point cloud features.
""" """
points = batch_inputs_dict['points'] voxel_dict = batch_inputs_dict['voxels']
imgs = batch_inputs_dict['imgs'] imgs = batch_inputs_dict['imgs']
points = batch_inputs_dict['points']
img_feats = self.extract_img_feat(imgs, batch_input_metas) img_feats = self.extract_img_feat(imgs, batch_input_metas)
pts_feats = self.extract_pts_feat( pts_feats = self.extract_pts_feat(
points, img_feats=img_feats, batch_input_metas=batch_input_metas) voxel_dict,
points=points,
img_feats=img_feats,
batch_input_metas=batch_input_metas)
return (img_feats, pts_feats) return (img_feats, pts_feats)
@torch.no_grad()
def voxelize(self, points: List[Tensor]) -> Tuple:
"""Apply dynamic voxelization to points.
Args:
points (list[torch.Tensor]): Points of each sample.
Returns:
tuple[torch.Tensor]: Concatenated points, number of points
per voxel, and coordinates.
"""
voxels, coors, num_points = [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.pts_voxel_layer(res)
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
def loss(self, batch_inputs_dict: Dict[List, torch.Tensor], def loss(self, batch_inputs_dict: Dict[List, torch.Tensor],
batch_data_samples: List[Det3DDataSample], batch_data_samples: List[Det3DDataSample],
**kwargs) -> List[Det3DDataSample]: **kwargs) -> List[Det3DDataSample]:
......
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import Dict, List, Optional from typing import Dict, Optional
import torch
from mmcv.ops import Voxelization
from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from .two_stage import TwoStage3DDetector from .two_stage import TwoStage3DDetector
...@@ -17,7 +13,6 @@ class PartA2(TwoStage3DDetector): ...@@ -17,7 +13,6 @@ class PartA2(TwoStage3DDetector):
""" """
def __init__(self, def __init__(self,
voxel_layer: dict,
voxel_encoder: dict, voxel_encoder: dict,
middle_encoder: dict, middle_encoder: dict,
backbone: dict, backbone: dict,
...@@ -37,7 +32,6 @@ class PartA2(TwoStage3DDetector): ...@@ -37,7 +32,6 @@ class PartA2(TwoStage3DDetector):
test_cfg=test_cfg, test_cfg=test_cfg,
init_cfg=init_cfg, init_cfg=init_cfg,
data_preprocessor=data_preprocessor) data_preprocessor=data_preprocessor)
self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = MODELS.build(voxel_encoder) self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder) self.middle_encoder = MODELS.build(middle_encoder)
...@@ -57,8 +51,7 @@ class PartA2(TwoStage3DDetector): ...@@ -57,8 +51,7 @@ class PartA2(TwoStage3DDetector):
and for inside 3D object detection, usually a dict containing and for inside 3D object detection, usually a dict containing
features will be obtained. features will be obtained.
""" """
points = batch_inputs_dict['points'] voxel_dict = batch_inputs_dict['voxels']
voxel_dict = self.voxelize(points)
voxel_features = self.voxel_encoder(voxel_dict['voxels'], voxel_features = self.voxel_encoder(voxel_dict['voxels'],
voxel_dict['num_points'], voxel_dict['num_points'],
voxel_dict['coors']) voxel_dict['coors'])
...@@ -71,34 +64,3 @@ class PartA2(TwoStage3DDetector): ...@@ -71,34 +64,3 @@ class PartA2(TwoStage3DDetector):
feats_dict.update({'neck_feats': neck_feats}) feats_dict.update({'neck_feats': neck_feats})
feats_dict['voxels_dict'] = voxel_dict feats_dict['voxels_dict'] = voxel_dict
return feats_dict return feats_dict
@torch.no_grad()
def voxelize(self, points: List[torch.Tensor]) -> Dict:
"""Apply hard voxelization to points."""
voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res)
res_voxel_centers = (
res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(
self.voxel_layer.voxel_size) + res_voxels.new_tensor(
self.voxel_layer.point_cloud_range[0:3])
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxel_centers.append(res_voxel_centers)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
voxel_centers = torch.cat(voxel_centers, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
voxel_dict = dict(
voxels=voxels,
num_points=num_points,
coors=coors_batch,
voxel_centers=voxel_centers)
return voxel_dict
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Tuple from typing import Tuple
import torch
from mmcv.ops import Voxelization
from mmcv.runner import force_fp32
from torch import Tensor from torch import Tensor
from torch.nn import functional as F
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig from mmdet3d.utils import ConfigType, OptConfigType, OptMultiConfig
...@@ -17,7 +13,6 @@ class VoxelNet(SingleStage3DDetector): ...@@ -17,7 +13,6 @@ class VoxelNet(SingleStage3DDetector):
r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection.""" r"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
def __init__(self, def __init__(self,
voxel_layer: ConfigType,
voxel_encoder: ConfigType, voxel_encoder: ConfigType,
middle_encoder: ConfigType, middle_encoder: ConfigType,
backbone: ConfigType, backbone: ConfigType,
...@@ -35,37 +30,18 @@ class VoxelNet(SingleStage3DDetector): ...@@ -35,37 +30,18 @@ class VoxelNet(SingleStage3DDetector):
test_cfg=test_cfg, test_cfg=test_cfg,
data_preprocessor=data_preprocessor, data_preprocessor=data_preprocessor,
init_cfg=init_cfg) init_cfg=init_cfg)
self.voxel_layer = Voxelization(**voxel_layer)
self.voxel_encoder = MODELS.build(voxel_encoder) self.voxel_encoder = MODELS.build(voxel_encoder)
self.middle_encoder = MODELS.build(middle_encoder) self.middle_encoder = MODELS.build(middle_encoder)
@torch.no_grad()
@force_fp32()
def voxelize(self, points: List[torch.Tensor]) -> tuple:
"""Apply hard voxelization to points."""
voxels, coors, num_points = [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res)
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]: def extract_feat(self, batch_inputs_dict: dict) -> Tuple[Tensor]:
"""Extract features from points.""" """Extract features from points."""
# TODO: Remove voxelization to datapreprocessor voxel_dict = batch_inputs_dict['voxels']
points = batch_inputs_dict['points'] voxel_features = self.voxel_encoder(voxel_dict['voxels'],
voxels, num_points, coors = self.voxelize(points) voxel_dict['num_points'],
voxel_features = self.voxel_encoder(voxels, num_points, coors) voxel_dict['coors'])
batch_size = coors[-1, 0].item() + 1 batch_size = voxel_dict['coors'][-1, 0].item() + 1
x = self.middle_encoder(voxel_features, coors, batch_size) x = self.middle_encoder(voxel_features, voxel_dict['coors'],
batch_size)
x = self.backbone(x) x = self.backbone(x)
if self.with_neck: if self.with_neck:
x = self.neck(x) x = self.neck(x)
......
...@@ -33,10 +33,7 @@ def _generate_s3dis_seg_dataset_config(): ...@@ -33,10 +33,7 @@ def _generate_s3dis_seg_dataset_config():
with_label_3d=False, with_label_3d=False,
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(type='PointSegClassMapping'),
type='PointSegClassMapping',
valid_cat_ids=tuple(range(len(classes))),
max_cat_id=13),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=5, num_points=5,
......
...@@ -55,11 +55,7 @@ def _generate_scannet_seg_dataset_config(): ...@@ -55,11 +55,7 @@ def _generate_scannet_seg_dataset_config():
with_label_3d=False, with_label_3d=False,
with_mask_3d=False, with_mask_3d=False,
with_seg_3d=True), with_seg_3d=True),
dict( dict(type='PointSegClassMapping'),
type='PointSegClassMapping',
valid_cat_ids=(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24,
28, 33, 34, 36, 39),
max_cat_id=40),
dict( dict(
type='IndoorPatchPointSample', type='IndoorPatchPointSample',
num_points=5, num_points=5,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment