Unverified Commit 9073a3b5 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

[Refactor] Support imvoxelnet at SUN RGB-D on 1.x branch (#2141)

* Support imvoxelnet@sunrgbd on 1.x branch

* Add unit tests

* Update README.md

* Update imvoxelnet_2xb4_sunrgbd-3d-10class.py

* Add typehints

* Fix lint

* Fix BC-breaking caused by updated keys

* Add coord_type in the imvoxelnet kitti config
parent bd1525ec
...@@ -26,6 +26,12 @@ Results for SUN RGB-D, ScanNet and nuScenes are currently available in ImVoxelNe ...@@ -26,6 +26,12 @@ Results for SUN RGB-D, ScanNet and nuScenes are currently available in ImVoxelNe
| :--------------------------------------------: | :---: | :-----: | :------: | :------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: | | :--------------------------------------------: | :---: | :-----: | :------: | :------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [ResNet-50](./imvoxelnet_8xb4_kitti-3d-car.py) | Car | 3x | | | 17.26 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x8_kitti-3d-car/imvoxelnet_4x8_kitti-3d-car_20210830_003014-3d0ffdf4.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x8_kitti-3d-car/imvoxelnet_4x8_kitti-3d-car_20210830_003014.log.json) | | [ResNet-50](./imvoxelnet_8xb4_kitti-3d-car.py) | Car | 3x | | | 17.26 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x8_kitti-3d-car/imvoxelnet_4x8_kitti-3d-car_20210830_003014-3d0ffdf4.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x8_kitti-3d-car/imvoxelnet_4x8_kitti-3d-car_20210830_003014.log.json) |
### SUN RGB-D
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | mAP@0.25 | mAP@0.5 | Download |
| :-------------------------------------------------: | :-----: | :------: | :------------: | :------: | :-----: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [ResNet-50](./imvoxelnet_4x2_sunrgbd-3d-10class.py) | 2x | 7.2 | 22.5 | 40.96 | 13.50 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x2_sunrgbd-3d-10class/imvoxelnet_4x2_sunrgbd-3d-10class_20220809_184416-29ca7d2e.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x2_sunrgbd-3d-10class/imvoxelnet_4x2_sunrgbd-3d-10class_20220809_184416.log.json) |
## Citation ## Citation
```latex ```latex
......
_base_ = [
'../_base_/schedules/mmdet-schedule-1x.py', '../_base_/default_runtime.py'
]
prior_generator = dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-3.2, -0.2, -2.28, 3.2, 6.2, 0.28]],
rotations=[.0])
model = dict(
type='ImVoxelNet',
data_preprocessor=dict(
type='Det3DDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32),
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
style='pytorch'),
neck=dict(
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=4),
neck_3d=dict(
type='IndoorImVoxelNeck',
in_channels=256,
out_channels=128,
n_blocks=[1, 1, 1]),
bbox_head=dict(
type='ImVoxelHead',
n_classes=10,
n_levels=3,
n_channels=128,
n_reg_outs=7,
pts_assign_threshold=27,
pts_center_threshold=18,
prior_generator=prior_generator),
prior_generator=prior_generator,
n_voxels=[40, 40, 16],
coord_type='DEPTH',
train_cfg=dict(),
test_cfg=dict(nms_pre=1000, iou_thr=.25, score_thr=.01))
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = [
'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'
]
metainfo = dict(CLASSES=class_names)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/sunrgbd/':
# 's3://openmmlab/datasets/detection3d/sunrgbd_processed/',
# 'data/sunrgbd/':
# 's3://openmmlab/datasets/detection3d/sunrgbd_processed/'
# }))
train_pipeline = [
dict(type='LoadAnnotations3D'),
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='RandomResize', scale=[(512, 384), (768, 576)], keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='Pack3DDetInputs', keys=['img', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='Resize', scale=(640, 480), keep_ratio=True),
dict(type='Pack3DDetInputs', keys=['img'])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
test_mode=False,
filter_empty_gt=True,
box_type_3d='Depth',
metainfo=metainfo)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
test_mode=True,
box_type_3d='Depth',
metainfo=metainfo))
test_dataloader = val_dataloader
val_evaluator = dict(
type='IndoorMetric',
ann_file=data_root + 'sunrgbd_infos_val.pkl',
metric='bbox')
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(
_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001),
paramwise_cfg=dict(
custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}),
clip_grad=dict(max_norm=35., norm_type=2))
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)
]
# hooks
default_hooks = dict(checkpoint=dict(type='CheckpointHook', max_keep_ckpts=1))
# runtime
find_unused_parameters = True # only 1 of 4 FPN outputs is used
...@@ -52,7 +52,8 @@ model = dict( ...@@ -52,7 +52,8 @@ model = dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False, type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)), loss_weight=0.2)),
n_voxels=[216, 248, 12], n_voxels=[216, 248, 12],
anchor_generator=dict( coord_type='LIDAR',
prior_generator=dict(
type='AlignedAnchor3DRangeGenerator', type='AlignedAnchor3DRangeGenerator',
ranges=[[-0.16, -39.68, -3.08, 68.96, 39.68, 0.76]], ranges=[[-0.16, -39.68, -3.08, 68.96, 39.68, 0.76]],
rotations=[.0]), rotations=[.0]),
......
...@@ -9,6 +9,7 @@ from .fcaf3d_head import FCAF3DHead ...@@ -9,6 +9,7 @@ from .fcaf3d_head import FCAF3DHead
from .fcos_mono3d_head import FCOSMono3DHead from .fcos_mono3d_head import FCOSMono3DHead
from .free_anchor3d_head import FreeAnchor3DHead from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead from .groupfree3d_head import GroupFree3DHead
from .imvoxel_head import ImVoxelHead
from .monoflex_head import MonoFlexHead from .monoflex_head import MonoFlexHead
from .parta2_rpn_head import PartA2RPNHead from .parta2_rpn_head import PartA2RPNHead
from .pgd_head import PGDHead from .pgd_head import PGDHead
...@@ -23,5 +24,5 @@ __all__ = [ ...@@ -23,5 +24,5 @@ __all__ = [
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead', 'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead', 'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead', 'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead' 'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead', 'ImVoxelHead'
] ]
This diff is collapsed.
...@@ -7,6 +7,7 @@ from mmengine.structures import InstanceData ...@@ -7,6 +7,7 @@ from mmengine.structures import InstanceData
from mmdet3d.models.detectors import Base3DDetector from mmdet3d.models.detectors import Base3DDetector
from mmdet3d.models.layers.fusion_layers.point_fusion import point_sample from mmdet3d.models.layers.fusion_layers.point_fusion import point_sample
from mmdet3d.registry import MODELS, TASK_UTILS from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import get_proj_mat_by_coord_type
from mmdet3d.structures.det3d_data_sample import SampleList from mmdet3d.structures.det3d_data_sample import SampleList
from mmdet3d.utils import ConfigType, OptConfigType, OptInstanceList from mmdet3d.utils import ConfigType, OptConfigType, OptInstanceList
...@@ -20,9 +21,11 @@ class ImVoxelNet(Base3DDetector): ...@@ -20,9 +21,11 @@ class ImVoxelNet(Base3DDetector):
neck (:obj:`ConfigDict` or dict): The neck config. neck (:obj:`ConfigDict` or dict): The neck config.
neck_3d (:obj:`ConfigDict` or dict): The 3D neck config. neck_3d (:obj:`ConfigDict` or dict): The 3D neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config. bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
prior_generator (:obj:`ConfigDict` or dict): The prior points
generator config.
n_voxels (list): Number of voxels along x, y, z axis. n_voxels (list): Number of voxels along x, y, z axis.
anchor_generator (:obj:`ConfigDict` or dict): The anchor generator coord_type (str): The type of coordinates of points cloud:
config. 'DEPTH', 'LIDAR', or 'CAMERA'.
train_cfg (:obj:`ConfigDict` or dict, optional): Config dict of train_cfg (:obj:`ConfigDict` or dict, optional): Config dict of
training hyper-parameters. Defaults to None. training hyper-parameters. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): Config dict of test test_cfg (:obj:`ConfigDict` or dict, optional): Config dict of test
...@@ -39,8 +42,9 @@ class ImVoxelNet(Base3DDetector): ...@@ -39,8 +42,9 @@ class ImVoxelNet(Base3DDetector):
neck: ConfigType, neck: ConfigType,
neck_3d: ConfigType, neck_3d: ConfigType,
bbox_head: ConfigType, bbox_head: ConfigType,
prior_generator: ConfigType,
n_voxels: List, n_voxels: List,
anchor_generator: ConfigType, coord_type: str,
train_cfg: OptConfigType = None, train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None, test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None, data_preprocessor: OptConfigType = None,
...@@ -53,8 +57,9 @@ class ImVoxelNet(Base3DDetector): ...@@ -53,8 +57,9 @@ class ImVoxelNet(Base3DDetector):
bbox_head.update(train_cfg=train_cfg) bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg) bbox_head.update(test_cfg=test_cfg)
self.bbox_head = MODELS.build(bbox_head) self.bbox_head = MODELS.build(bbox_head)
self.prior_generator = TASK_UTILS.build(prior_generator)
self.n_voxels = n_voxels self.n_voxels = n_voxels
self.anchor_generator = TASK_UTILS.build(anchor_generator) self.coord_type = coord_type
self.train_cfg = train_cfg self.train_cfg = train_cfg
self.test_cfg = test_cfg self.test_cfg = test_cfg
...@@ -62,6 +67,8 @@ class ImVoxelNet(Base3DDetector): ...@@ -62,6 +67,8 @@ class ImVoxelNet(Base3DDetector):
batch_data_samples: SampleList): batch_data_samples: SampleList):
"""Extract 3d features from the backbone -> fpn -> 3d projection. """Extract 3d features from the backbone -> fpn -> 3d projection.
-> 3d neck -> bbox_head.
Args: Args:
batch_inputs_dict (dict): The model input dict which include batch_inputs_dict (dict): The model input dict which include
the 'imgs' key. the 'imgs' key.
...@@ -72,7 +79,9 @@ class ImVoxelNet(Base3DDetector): ...@@ -72,7 +79,9 @@ class ImVoxelNet(Base3DDetector):
as `gt_instance` or `gt_panoptic_seg` or `gt_sem_seg`. as `gt_instance` or `gt_panoptic_seg` or `gt_sem_seg`.
Returns: Returns:
torch.Tensor: of shape (N, C_out, N_x, N_y, N_z) Tuple:
- torch.Tensor: Features of shape (N, C_out, N_x, N_y, N_z).
- torch.Tensor: Valid mask of shape (N, 1, N_x, N_y, N_z).
""" """
img = batch_inputs_dict['imgs'] img = batch_inputs_dict['imgs']
batch_img_metas = [ batch_img_metas = [
...@@ -80,9 +89,9 @@ class ImVoxelNet(Base3DDetector): ...@@ -80,9 +89,9 @@ class ImVoxelNet(Base3DDetector):
] ]
x = self.backbone(img) x = self.backbone(img)
x = self.neck(x)[0] x = self.neck(x)[0]
points = self.anchor_generator.grid_anchors( points = self.prior_generator.grid_anchors([self.n_voxels[::-1]],
[self.n_voxels[::-1]], device=img.device)[0][:, :3] device=img.device)[0][:, :3]
volumes = [] volumes, valid_preds = [], []
for feature, img_meta in zip(x, batch_img_metas): for feature, img_meta in zip(x, batch_img_metas):
img_scale_factor = ( img_scale_factor = (
points.new_tensor(img_meta['scale_factor'][:2]) points.new_tensor(img_meta['scale_factor'][:2])
...@@ -91,13 +100,14 @@ class ImVoxelNet(Base3DDetector): ...@@ -91,13 +100,14 @@ class ImVoxelNet(Base3DDetector):
img_crop_offset = ( img_crop_offset = (
points.new_tensor(img_meta['img_crop_offset']) points.new_tensor(img_meta['img_crop_offset'])
if 'img_crop_offset' in img_meta.keys() else 0) if 'img_crop_offset' in img_meta.keys() else 0)
lidar2img = points.new_tensor(img_meta['lidar2img']) proj_mat = points.new_tensor(
get_proj_mat_by_coord_type(img_meta, self.coord_type))
volume = point_sample( volume = point_sample(
img_meta, img_meta,
img_features=feature[None, ...], img_features=feature[None, ...],
points=points, points=points,
proj_mat=lidar2img, proj_mat=points.new_tensor(proj_mat),
coord_type='LIDAR', coord_type=self.coord_type,
img_scale_factor=img_scale_factor, img_scale_factor=img_scale_factor,
img_crop_offset=img_crop_offset, img_crop_offset=img_crop_offset,
img_flip=img_flip, img_flip=img_flip,
...@@ -106,9 +116,11 @@ class ImVoxelNet(Base3DDetector): ...@@ -106,9 +116,11 @@ class ImVoxelNet(Base3DDetector):
aligned=False) aligned=False)
volumes.append( volumes.append(
volume.reshape(self.n_voxels[::-1] + [-1]).permute(3, 2, 1, 0)) volume.reshape(self.n_voxels[::-1] + [-1]).permute(3, 2, 1, 0))
valid_preds.append(
~torch.all(volumes[-1] == 0, dim=0, keepdim=True))
x = torch.stack(volumes) x = torch.stack(volumes)
x = self.neck_3d(x) x = self.neck_3d(x)
return x return x, torch.stack(valid_preds).float()
def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList, def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> Union[dict, list]: **kwargs) -> Union[dict, list]:
...@@ -126,8 +138,12 @@ class ImVoxelNet(Base3DDetector): ...@@ -126,8 +138,12 @@ class ImVoxelNet(Base3DDetector):
Returns: Returns:
dict: A dictionary of loss components. dict: A dictionary of loss components.
""" """
x, valid_preds = self.extract_feat(batch_inputs_dict,
x = self.extract_feat(batch_inputs_dict, batch_data_samples) batch_data_samples)
# For indoor datasets ImVoxelNet uses ImVoxelHead that handles
# mask of visible voxels.
if self.coord_type == 'DEPTH':
x += (valid_preds, )
losses = self.bbox_head.loss(x, batch_data_samples, **kwargs) losses = self.bbox_head.loss(x, batch_data_samples, **kwargs)
return losses return losses
...@@ -159,8 +175,14 @@ class ImVoxelNet(Base3DDetector): ...@@ -159,8 +175,14 @@ class ImVoxelNet(Base3DDetector):
- bboxes_3d (Tensor): Contains a tensor with shape - bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7. (num_instances, C) where C >=7.
""" """
x = self.extract_feat(batch_inputs_dict, batch_data_samples) x, valid_preds = self.extract_feat(batch_inputs_dict,
results_list = self.bbox_head.predict(x, batch_data_samples, **kwargs) batch_data_samples)
# For indoor datasets ImVoxelNet uses ImVoxelHead that handles
# mask of visible voxels.
if self.coord_type == 'DEPTH':
x += (valid_preds, )
results_list = \
self.bbox_head.predict(x, batch_data_samples, **kwargs)
predictions = self.add_pred_to_datasample(batch_data_samples, predictions = self.add_pred_to_datasample(batch_data_samples,
results_list) results_list)
return predictions return predictions
...@@ -182,7 +204,12 @@ class ImVoxelNet(Base3DDetector): ...@@ -182,7 +204,12 @@ class ImVoxelNet(Base3DDetector):
Returns: Returns:
tuple[list]: A tuple of features from ``bbox_head`` forward. tuple[list]: A tuple of features from ``bbox_head`` forward.
""" """
x = self.extract_feat(batch_inputs_dict, batch_data_samples) x, valid_preds = self.extract_feat(batch_inputs_dict,
batch_data_samples)
# For indoor datasets ImVoxelNet uses ImVoxelHead that handles
# mask of visible voxels.
if self.coord_type == 'DEPTH':
x += (valid_preds, )
results = self.bbox_head.forward(x) results = self.bbox_head.forward(x)
return results return results
......
...@@ -2,10 +2,11 @@ ...@@ -2,10 +2,11 @@
from mmdet.models.necks.fpn import FPN from mmdet.models.necks.fpn import FPN
from .dla_neck import DLANeck from .dla_neck import DLANeck
from .imvoxel_neck import OutdoorImVoxelNeck from .imvoxel_neck import IndoorImVoxelNeck, OutdoorImVoxelNeck
from .pointnet2_fp_neck import PointNetFPNeck from .pointnet2_fp_neck import PointNetFPNeck
from .second_fpn import SECONDFPN from .second_fpn import SECONDFPN
__all__ = [ __all__ = [
'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck' 'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck',
'IndoorImVoxelNeck'
] ]
# Copyright (c) OpenMMLab. All rights reserved. # Copyright (c) OpenMMLab. All rights reserved.
from mmcv.cnn import ConvModule from mmcv.cnn import ConvModule
from mmengine.model import BaseModule
from torch import nn from torch import nn
from mmdet3d.registry import MODELS from mmdet3d.registry import MODELS
@MODELS.register_module() @MODELS.register_module()
class OutdoorImVoxelNeck(nn.Module): class OutdoorImVoxelNeck(BaseModule):
"""Neck for ImVoxelNet outdoor scenario. """Neck for ImVoxelNet outdoor scenario.
Args: Args:
in_channels (int): Input channels of multi-scale feature map. in_channels (int): Number of channels in an input tensor.
out_channels (int): Output channels of multi-scale feature map. out_channels (int): Number of channels in all output tensors.
""" """
def __init__(self, in_channels, out_channels): def __init__(self, in_channels, out_channels):
super().__init__() super(OutdoorImVoxelNeck, self).__init__()
self.model = nn.Sequential( self.model = nn.Sequential(
ResModule(in_channels), ResModule(in_channels, in_channels),
ConvModule( ConvModule(
in_channels=in_channels, in_channels=in_channels,
out_channels=in_channels * 2, out_channels=in_channels * 2,
...@@ -27,7 +28,7 @@ class OutdoorImVoxelNeck(nn.Module): ...@@ -27,7 +28,7 @@ class OutdoorImVoxelNeck(nn.Module):
conv_cfg=dict(type='Conv3d'), conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'), norm_cfg=dict(type='BN3d'),
act_cfg=dict(type='ReLU', inplace=True)), act_cfg=dict(type='ReLU', inplace=True)),
ResModule(in_channels * 2), ResModule(in_channels * 2, in_channels * 2),
ConvModule( ConvModule(
in_channels=in_channels * 2, in_channels=in_channels * 2,
out_channels=in_channels * 4, out_channels=in_channels * 4,
...@@ -37,7 +38,7 @@ class OutdoorImVoxelNeck(nn.Module): ...@@ -37,7 +38,7 @@ class OutdoorImVoxelNeck(nn.Module):
conv_cfg=dict(type='Conv3d'), conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'), norm_cfg=dict(type='BN3d'),
act_cfg=dict(type='ReLU', inplace=True)), act_cfg=dict(type='ReLU', inplace=True)),
ResModule(in_channels * 4), ResModule(in_channels * 4, in_channels * 4),
ConvModule( ConvModule(
in_channels=in_channels * 4, in_channels=in_channels * 4,
out_channels=out_channels, out_channels=out_channels,
...@@ -66,31 +67,148 @@ class OutdoorImVoxelNeck(nn.Module): ...@@ -66,31 +67,148 @@ class OutdoorImVoxelNeck(nn.Module):
pass pass
@MODELS.register_module()
class IndoorImVoxelNeck(BaseModule):
"""Neck for ImVoxelNet outdoor scenario.
Args:
in_channels (int): Number of channels in an input tensor.
out_channels (int): Number of channels in all output tensors.
n_blocks (list[int]): Number of blocks for each feature level.
"""
def __init__(self, in_channels, out_channels, n_blocks):
super(IndoorImVoxelNeck, self).__init__()
self.n_scales = len(n_blocks)
n_channels = in_channels
for i in range(len(n_blocks)):
stride = 1 if i == 0 else 2
self.__setattr__(f'down_layer_{i}',
self._make_layer(stride, n_channels, n_blocks[i]))
n_channels = n_channels * stride
if i > 0:
self.__setattr__(
f'up_block_{i}',
self._make_up_block(n_channels, n_channels // 2))
self.__setattr__(f'out_block_{i}',
self._make_block(n_channels, out_channels))
def forward(self, x):
"""Forward function.
Args:
x (torch.Tensor): of shape (N, C_in, N_x, N_y, N_z).
Returns:
list[torch.Tensor]: of shape (N, C_out, N_xi, N_yi, N_zi).
"""
down_outs = []
for i in range(self.n_scales):
x = self.__getattr__(f'down_layer_{i}')(x)
down_outs.append(x)
outs = []
for i in range(self.n_scales - 1, -1, -1):
if i < self.n_scales - 1:
x = self.__getattr__(f'up_block_{i + 1}')(x)
x = down_outs[i] + x
out = self.__getattr__(f'out_block_{i}')(x)
outs.append(out)
return outs[::-1]
@staticmethod
def _make_layer(stride, n_channels, n_blocks):
"""Make a layer from several residual blocks.
Args:
stride (int): Stride of the first residual block.
n_channels (int): Number of channels of the first residual block.
n_blocks (int): Number of residual blocks.
Returns:
torch.nn.Module: With several residual blocks.
"""
blocks = []
for i in range(n_blocks):
if i == 0 and stride != 1:
blocks.append(ResModule(n_channels, n_channels * 2, stride))
n_channels = n_channels * 2
else:
blocks.append(ResModule(n_channels, n_channels))
return nn.Sequential(*blocks)
@staticmethod
def _make_block(in_channels, out_channels):
"""Make a convolutional block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: Convolutional block.
"""
return nn.Sequential(
nn.Conv3d(in_channels, out_channels, 3, 1, 1, bias=False),
nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True))
@staticmethod
def _make_up_block(in_channels, out_channels):
"""Make upsampling convolutional block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: Upsampling convolutional block.
"""
return nn.Sequential(
nn.ConvTranspose3d(in_channels, out_channels, 2, 2, bias=False),
nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True),
nn.Conv3d(out_channels, out_channels, 3, 1, 1, bias=False),
nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True))
class ResModule(nn.Module): class ResModule(nn.Module):
"""3d residual block for ImVoxelNeck. """3d residual block for ImVoxelNeck.
Args: Args:
n_channels (int): Input channels of a feature map. in_channels (int): Number of channels in input tensor.
out_channels (int): Number of channels in output tensor.
stride (int, optional): Stride of the block. Defaults to 1.
""" """
def __init__(self, n_channels): def __init__(self, in_channels, out_channels, stride=1):
super().__init__() super().__init__()
self.conv0 = ConvModule( self.conv0 = ConvModule(
in_channels=n_channels, in_channels=in_channels,
out_channels=n_channels, out_channels=out_channels,
kernel_size=3, kernel_size=3,
stride=stride,
padding=1, padding=1,
conv_cfg=dict(type='Conv3d'), conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'), norm_cfg=dict(type='BN3d'),
act_cfg=dict(type='ReLU', inplace=True)) act_cfg=dict(type='ReLU', inplace=True))
self.conv1 = ConvModule( self.conv1 = ConvModule(
in_channels=n_channels, in_channels=out_channels,
out_channels=n_channels, out_channels=out_channels,
kernel_size=3, kernel_size=3,
padding=1, padding=1,
conv_cfg=dict(type='Conv3d'), conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'), norm_cfg=dict(type='BN3d'),
act_cfg=None) act_cfg=None)
if stride != 1:
self.downsample = ConvModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
padding=0,
conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'),
act_cfg=None)
self.stride = stride
self.activation = nn.ReLU(inplace=True) self.activation = nn.ReLU(inplace=True)
def forward(self, x): def forward(self, x):
...@@ -105,6 +223,8 @@ class ResModule(nn.Module): ...@@ -105,6 +223,8 @@ class ResModule(nn.Module):
identity = x identity = x
x = self.conv0(x) x = self.conv0(x)
x = self.conv1(x) x = self.conv1(x)
x = identity + x if self.stride != 1:
identity = self.downsample(identity)
x = x + identity
x = self.activation(x) x = self.activation(x)
return x return x
...@@ -9,10 +9,10 @@ from mmdet3d.models.dense_heads import FCAF3DHead ...@@ -9,10 +9,10 @@ from mmdet3d.models.dense_heads import FCAF3DHead
from mmdet3d.testing import create_detector_inputs from mmdet3d.testing import create_detector_inputs
class TestAnchor3DHead(TestCase): class TestFCAF3DHead(TestCase):
def test_fcaf3d_head_loss(self): def test_fcaf3d_head_loss(self):
"""Test anchor head loss when truth is empty and non-empty.""" """Test fcaf3d head loss when truth is empty and non-empty."""
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
......
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import pytest
import torch
from mmdet3d import * # noqa
from mmdet3d.models.dense_heads import ImVoxelHead
from mmdet3d.testing import create_detector_inputs
class TestImVoxelHead(TestCase):
def test_imvoxel_head_loss(self):
"""Test imvoxel head loss when truth is empty and non-empty."""
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
# build head
prior_generator = dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-3.2, -0.2, -2.28, 3.2, 6.2, 0.28]],
rotations=[.0])
imvoxel_head = ImVoxelHead(
n_classes=1,
n_levels=1,
n_channels=32,
n_reg_outs=7,
pts_assign_threshold=27,
pts_center_threshold=18,
prior_generator=prior_generator,
center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
bbox_loss=dict(type='RotatedIoU3DLoss'),
cls_loss=dict(type='mmdet.FocalLoss'),
)
imvoxel_head = imvoxel_head.cuda()
# fake input of head
# (x, valid_preds)
x = [
torch.randn(1, 32, 10, 10, 4).cuda(),
torch.ones(1, 1, 10, 10, 4).cuda()
]
# fake annotation
num_gt_instance = 1
packed_inputs = create_detector_inputs(
with_points=False,
with_img=True,
img_size=(128, 128),
num_gt_instance=num_gt_instance,
with_pts_semantic_mask=False,
with_pts_instance_mask=False)
data_samples = [
sample.cuda() for sample in packed_inputs['data_samples']
]
losses = imvoxel_head.loss(x, data_samples)
print(losses)
self.assertGreaterEqual(losses['center_loss'], 0)
self.assertGreaterEqual(losses['bbox_loss'], 0)
self.assertGreaterEqual(losses['cls_loss'], 0)
...@@ -10,11 +10,12 @@ from mmdet3d.testing import (create_detector_inputs, get_detector_cfg, ...@@ -10,11 +10,12 @@ from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
class TestImVoxelNet(unittest.TestCase): class TestImVoxelNet(unittest.TestCase):
def test_imvoxelnet(self): def test_imvoxelnet_kitti(self):
import mmdet3d.models import mmdet3d.models
assert hasattr(mmdet3d.models, 'ImVoxelNet') assert hasattr(mmdet3d.models, 'ImVoxelNet')
DefaultScope.get_instance('test_ImVoxelNet', scope_name='mmdet3d') DefaultScope.get_instance(
'test_imvoxelnet_kitti', scope_name='mmdet3d')
setup_seed(0) setup_seed(0)
imvoxel_net_cfg = get_detector_cfg( imvoxel_net_cfg = get_detector_cfg(
'imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py') 'imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py')
...@@ -47,3 +48,42 @@ class TestImVoxelNet(unittest.TestCase): ...@@ -47,3 +48,42 @@ class TestImVoxelNet(unittest.TestCase):
self.assertGreaterEqual(losses['loss_cls'][0], 0) self.assertGreaterEqual(losses['loss_cls'][0], 0)
self.assertGreaterEqual(losses['loss_bbox'][0], 0) self.assertGreaterEqual(losses['loss_bbox'][0], 0)
self.assertGreaterEqual(losses['loss_dir'][0], 0) self.assertGreaterEqual(losses['loss_dir'][0], 0)
def test_imvoxelnet_sunrgbd(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'ImVoxelNet')
DefaultScope.get_instance(
'test_imvoxelnet_sunrgbd', scope_name='mmdet3d')
setup_seed(0)
imvoxel_net_cfg = get_detector_cfg(
'imvoxelnet/imvoxelnet_2xb4_sunrgbd-3d-10class.py')
model = MODELS.build(imvoxel_net_cfg)
num_gt_instance = 1
packed_inputs = create_detector_inputs(
with_points=False,
with_img=True,
img_size=(128, 128),
num_gt_instance=num_gt_instance,
with_pts_semantic_mask=False,
with_pts_instance_mask=False)
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
data = model.data_preprocessor(packed_inputs, True)
torch.cuda.empty_cache()
results = model.forward(**data, mode='predict')
self.assertEqual(len(results), 1)
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(**data, mode='loss')
self.assertGreaterEqual(losses['center_loss'], 0)
self.assertGreaterEqual(losses['bbox_loss'], 0)
self.assertGreaterEqual(losses['cls_loss'], 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment