Unverified Commit 9073a3b5 authored by Tai-Wang's avatar Tai-Wang Committed by GitHub
Browse files

[Refactor] Support imvoxelnet at SUN RGB-D on 1.x branch (#2141)

* Support imvoxelnet@sunrgbd on 1.x branch

* Add unit tests

* Update README.md

* Update imvoxelnet_2xb4_sunrgbd-3d-10class.py

* Add typehints

* Fix lint

* Fix BC-breaking caused by updated keys

* Add coord_type in the imvoxelnet kitti config
parent bd1525ec
......@@ -26,6 +26,12 @@ Results for SUN RGB-D, ScanNet and nuScenes are currently available in ImVoxelNe
| :--------------------------------------------: | :---: | :-----: | :------: | :------------: | :---: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [ResNet-50](./imvoxelnet_8xb4_kitti-3d-car.py) | Car | 3x | | | 17.26 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x8_kitti-3d-car/imvoxelnet_4x8_kitti-3d-car_20210830_003014-3d0ffdf4.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x8_kitti-3d-car/imvoxelnet_4x8_kitti-3d-car_20210830_003014.log.json) |
### SUN RGB-D
| Backbone | Lr schd | Mem (GB) | Inf time (fps) | mAP@0.25 | mAP@0.5 | Download |
| :-------------------------------------------------: | :-----: | :------: | :------------: | :------: | :-----: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
| [ResNet-50](./imvoxelnet_4x2_sunrgbd-3d-10class.py) | 2x | 7.2 | 22.5 | 40.96 | 13.50 | [model](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x2_sunrgbd-3d-10class/imvoxelnet_4x2_sunrgbd-3d-10class_20220809_184416-29ca7d2e.pth) \| [log](https://download.openmmlab.com/mmdetection3d/v1.0.0_models/imvoxelnet/imvoxelnet_4x2_sunrgbd-3d-10class/imvoxelnet_4x2_sunrgbd-3d-10class_20220809_184416.log.json) |
## Citation
```latex
......
_base_ = [
'../_base_/schedules/mmdet-schedule-1x.py', '../_base_/default_runtime.py'
]
prior_generator = dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-3.2, -0.2, -2.28, 3.2, 6.2, 0.28]],
rotations=[.0])
model = dict(
type='ImVoxelNet',
data_preprocessor=dict(
type='Det3DDataPreprocessor',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
bgr_to_rgb=True,
pad_size_divisor=32),
backbone=dict(
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'),
style='pytorch'),
neck=dict(
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=4),
neck_3d=dict(
type='IndoorImVoxelNeck',
in_channels=256,
out_channels=128,
n_blocks=[1, 1, 1]),
bbox_head=dict(
type='ImVoxelHead',
n_classes=10,
n_levels=3,
n_channels=128,
n_reg_outs=7,
pts_assign_threshold=27,
pts_center_threshold=18,
prior_generator=prior_generator),
prior_generator=prior_generator,
n_voxels=[40, 40, 16],
coord_type='DEPTH',
train_cfg=dict(),
test_cfg=dict(nms_pre=1000, iou_thr=.25, score_thr=.01))
dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = [
'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub'
]
metainfo = dict(CLASSES=class_names)
file_client_args = dict(backend='disk')
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/sunrgbd/':
# 's3://openmmlab/datasets/detection3d/sunrgbd_processed/',
# 'data/sunrgbd/':
# 's3://openmmlab/datasets/detection3d/sunrgbd_processed/'
# }))
train_pipeline = [
dict(type='LoadAnnotations3D'),
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='RandomResize', scale=[(512, 384), (768, 576)], keep_ratio=True),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='Pack3DDetInputs', keys=['img', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='LoadImageFromFile', file_client_args=file_client_args),
dict(type='Resize', scale=(640, 480), keep_ratio=True),
dict(type='Pack3DDetInputs', keys=['img'])
]
train_dataloader = dict(
batch_size=4,
num_workers=4,
persistent_workers=True,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=2,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
test_mode=False,
filter_empty_gt=True,
box_type_3d='Depth',
metainfo=metainfo)))
val_dataloader = dict(
batch_size=1,
num_workers=1,
persistent_workers=True,
drop_last=False,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file='sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
test_mode=True,
box_type_3d='Depth',
metainfo=metainfo))
test_dataloader = val_dataloader
val_evaluator = dict(
type='IndoorMetric',
ann_file=data_root + 'sunrgbd_infos_val.pkl',
metric='bbox')
test_evaluator = val_evaluator
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(
_delete_=True, type='AdamW', lr=0.0001, weight_decay=0.0001),
paramwise_cfg=dict(
custom_keys={'backbone': dict(lr_mult=0.1, decay_mult=1.0)}),
clip_grad=dict(max_norm=35., norm_type=2))
param_scheduler = [
dict(
type='MultiStepLR',
begin=0,
end=12,
by_epoch=True,
milestones=[8, 11],
gamma=0.1)
]
# hooks
default_hooks = dict(checkpoint=dict(type='CheckpointHook', max_keep_ckpts=1))
# runtime
find_unused_parameters = True # only 1 of 4 FPN outputs is used
......@@ -52,7 +52,8 @@ model = dict(
type='mmdet.CrossEntropyLoss', use_sigmoid=False,
loss_weight=0.2)),
n_voxels=[216, 248, 12],
anchor_generator=dict(
coord_type='LIDAR',
prior_generator=dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-0.16, -39.68, -3.08, 68.96, 39.68, 0.76]],
rotations=[.0]),
......
......@@ -9,6 +9,7 @@ from .fcaf3d_head import FCAF3DHead
from .fcos_mono3d_head import FCOSMono3DHead
from .free_anchor3d_head import FreeAnchor3DHead
from .groupfree3d_head import GroupFree3DHead
from .imvoxel_head import ImVoxelHead
from .monoflex_head import MonoFlexHead
from .parta2_rpn_head import PartA2RPNHead
from .pgd_head import PGDHead
......@@ -23,5 +24,5 @@ __all__ = [
'SSD3DHead', 'BaseConvBboxHead', 'CenterHead', 'ShapeAwareHead',
'BaseMono3DDenseHead', 'AnchorFreeMono3DHead', 'FCOSMono3DHead',
'GroupFree3DHead', 'PointRPNHead', 'SMOKEMono3DHead', 'PGDHead',
'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead'
'MonoFlexHead', 'Base3DDenseHead', 'FCAF3DHead', 'ImVoxelHead'
]
This diff is collapsed.
......@@ -7,6 +7,7 @@ from mmengine.structures import InstanceData
from mmdet3d.models.detectors import Base3DDetector
from mmdet3d.models.layers.fusion_layers.point_fusion import point_sample
from mmdet3d.registry import MODELS, TASK_UTILS
from mmdet3d.structures.bbox_3d import get_proj_mat_by_coord_type
from mmdet3d.structures.det3d_data_sample import SampleList
from mmdet3d.utils import ConfigType, OptConfigType, OptInstanceList
......@@ -20,9 +21,11 @@ class ImVoxelNet(Base3DDetector):
neck (:obj:`ConfigDict` or dict): The neck config.
neck_3d (:obj:`ConfigDict` or dict): The 3D neck config.
bbox_head (:obj:`ConfigDict` or dict): The bbox head config.
prior_generator (:obj:`ConfigDict` or dict): The prior points
generator config.
n_voxels (list): Number of voxels along x, y, z axis.
anchor_generator (:obj:`ConfigDict` or dict): The anchor generator
config.
coord_type (str): The type of coordinates of points cloud:
'DEPTH', 'LIDAR', or 'CAMERA'.
train_cfg (:obj:`ConfigDict` or dict, optional): Config dict of
training hyper-parameters. Defaults to None.
test_cfg (:obj:`ConfigDict` or dict, optional): Config dict of test
......@@ -39,8 +42,9 @@ class ImVoxelNet(Base3DDetector):
neck: ConfigType,
neck_3d: ConfigType,
bbox_head: ConfigType,
prior_generator: ConfigType,
n_voxels: List,
anchor_generator: ConfigType,
coord_type: str,
train_cfg: OptConfigType = None,
test_cfg: OptConfigType = None,
data_preprocessor: OptConfigType = None,
......@@ -53,8 +57,9 @@ class ImVoxelNet(Base3DDetector):
bbox_head.update(train_cfg=train_cfg)
bbox_head.update(test_cfg=test_cfg)
self.bbox_head = MODELS.build(bbox_head)
self.prior_generator = TASK_UTILS.build(prior_generator)
self.n_voxels = n_voxels
self.anchor_generator = TASK_UTILS.build(anchor_generator)
self.coord_type = coord_type
self.train_cfg = train_cfg
self.test_cfg = test_cfg
......@@ -62,6 +67,8 @@ class ImVoxelNet(Base3DDetector):
batch_data_samples: SampleList):
"""Extract 3d features from the backbone -> fpn -> 3d projection.
-> 3d neck -> bbox_head.
Args:
batch_inputs_dict (dict): The model input dict which include
the 'imgs' key.
......@@ -72,7 +79,9 @@ class ImVoxelNet(Base3DDetector):
as `gt_instance` or `gt_panoptic_seg` or `gt_sem_seg`.
Returns:
torch.Tensor: of shape (N, C_out, N_x, N_y, N_z)
Tuple:
- torch.Tensor: Features of shape (N, C_out, N_x, N_y, N_z).
- torch.Tensor: Valid mask of shape (N, 1, N_x, N_y, N_z).
"""
img = batch_inputs_dict['imgs']
batch_img_metas = [
......@@ -80,9 +89,9 @@ class ImVoxelNet(Base3DDetector):
]
x = self.backbone(img)
x = self.neck(x)[0]
points = self.anchor_generator.grid_anchors(
[self.n_voxels[::-1]], device=img.device)[0][:, :3]
volumes = []
points = self.prior_generator.grid_anchors([self.n_voxels[::-1]],
device=img.device)[0][:, :3]
volumes, valid_preds = [], []
for feature, img_meta in zip(x, batch_img_metas):
img_scale_factor = (
points.new_tensor(img_meta['scale_factor'][:2])
......@@ -91,13 +100,14 @@ class ImVoxelNet(Base3DDetector):
img_crop_offset = (
points.new_tensor(img_meta['img_crop_offset'])
if 'img_crop_offset' in img_meta.keys() else 0)
lidar2img = points.new_tensor(img_meta['lidar2img'])
proj_mat = points.new_tensor(
get_proj_mat_by_coord_type(img_meta, self.coord_type))
volume = point_sample(
img_meta,
img_features=feature[None, ...],
points=points,
proj_mat=lidar2img,
coord_type='LIDAR',
proj_mat=points.new_tensor(proj_mat),
coord_type=self.coord_type,
img_scale_factor=img_scale_factor,
img_crop_offset=img_crop_offset,
img_flip=img_flip,
......@@ -106,9 +116,11 @@ class ImVoxelNet(Base3DDetector):
aligned=False)
volumes.append(
volume.reshape(self.n_voxels[::-1] + [-1]).permute(3, 2, 1, 0))
valid_preds.append(
~torch.all(volumes[-1] == 0, dim=0, keepdim=True))
x = torch.stack(volumes)
x = self.neck_3d(x)
return x
return x, torch.stack(valid_preds).float()
def loss(self, batch_inputs_dict: dict, batch_data_samples: SampleList,
**kwargs) -> Union[dict, list]:
......@@ -126,8 +138,12 @@ class ImVoxelNet(Base3DDetector):
Returns:
dict: A dictionary of loss components.
"""
x = self.extract_feat(batch_inputs_dict, batch_data_samples)
x, valid_preds = self.extract_feat(batch_inputs_dict,
batch_data_samples)
# For indoor datasets ImVoxelNet uses ImVoxelHead that handles
# mask of visible voxels.
if self.coord_type == 'DEPTH':
x += (valid_preds, )
losses = self.bbox_head.loss(x, batch_data_samples, **kwargs)
return losses
......@@ -159,8 +175,14 @@ class ImVoxelNet(Base3DDetector):
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
"""
x = self.extract_feat(batch_inputs_dict, batch_data_samples)
results_list = self.bbox_head.predict(x, batch_data_samples, **kwargs)
x, valid_preds = self.extract_feat(batch_inputs_dict,
batch_data_samples)
# For indoor datasets ImVoxelNet uses ImVoxelHead that handles
# mask of visible voxels.
if self.coord_type == 'DEPTH':
x += (valid_preds, )
results_list = \
self.bbox_head.predict(x, batch_data_samples, **kwargs)
predictions = self.add_pred_to_datasample(batch_data_samples,
results_list)
return predictions
......@@ -182,7 +204,12 @@ class ImVoxelNet(Base3DDetector):
Returns:
tuple[list]: A tuple of features from ``bbox_head`` forward.
"""
x = self.extract_feat(batch_inputs_dict, batch_data_samples)
x, valid_preds = self.extract_feat(batch_inputs_dict,
batch_data_samples)
# For indoor datasets ImVoxelNet uses ImVoxelHead that handles
# mask of visible voxels.
if self.coord_type == 'DEPTH':
x += (valid_preds, )
results = self.bbox_head.forward(x)
return results
......
......@@ -2,10 +2,11 @@
from mmdet.models.necks.fpn import FPN
from .dla_neck import DLANeck
from .imvoxel_neck import OutdoorImVoxelNeck
from .imvoxel_neck import IndoorImVoxelNeck, OutdoorImVoxelNeck
from .pointnet2_fp_neck import PointNetFPNeck
from .second_fpn import SECONDFPN
__all__ = [
'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck'
'FPN', 'SECONDFPN', 'OutdoorImVoxelNeck', 'PointNetFPNeck', 'DLANeck',
'IndoorImVoxelNeck'
]
# Copyright (c) OpenMMLab. All rights reserved.
from mmcv.cnn import ConvModule
from mmengine.model import BaseModule
from torch import nn
from mmdet3d.registry import MODELS
@MODELS.register_module()
class OutdoorImVoxelNeck(nn.Module):
class OutdoorImVoxelNeck(BaseModule):
"""Neck for ImVoxelNet outdoor scenario.
Args:
in_channels (int): Input channels of multi-scale feature map.
out_channels (int): Output channels of multi-scale feature map.
in_channels (int): Number of channels in an input tensor.
out_channels (int): Number of channels in all output tensors.
"""
def __init__(self, in_channels, out_channels):
super().__init__()
super(OutdoorImVoxelNeck, self).__init__()
self.model = nn.Sequential(
ResModule(in_channels),
ResModule(in_channels, in_channels),
ConvModule(
in_channels=in_channels,
out_channels=in_channels * 2,
......@@ -27,7 +28,7 @@ class OutdoorImVoxelNeck(nn.Module):
conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'),
act_cfg=dict(type='ReLU', inplace=True)),
ResModule(in_channels * 2),
ResModule(in_channels * 2, in_channels * 2),
ConvModule(
in_channels=in_channels * 2,
out_channels=in_channels * 4,
......@@ -37,7 +38,7 @@ class OutdoorImVoxelNeck(nn.Module):
conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'),
act_cfg=dict(type='ReLU', inplace=True)),
ResModule(in_channels * 4),
ResModule(in_channels * 4, in_channels * 4),
ConvModule(
in_channels=in_channels * 4,
out_channels=out_channels,
......@@ -66,31 +67,148 @@ class OutdoorImVoxelNeck(nn.Module):
pass
@MODELS.register_module()
class IndoorImVoxelNeck(BaseModule):
"""Neck for ImVoxelNet outdoor scenario.
Args:
in_channels (int): Number of channels in an input tensor.
out_channels (int): Number of channels in all output tensors.
n_blocks (list[int]): Number of blocks for each feature level.
"""
def __init__(self, in_channels, out_channels, n_blocks):
super(IndoorImVoxelNeck, self).__init__()
self.n_scales = len(n_blocks)
n_channels = in_channels
for i in range(len(n_blocks)):
stride = 1 if i == 0 else 2
self.__setattr__(f'down_layer_{i}',
self._make_layer(stride, n_channels, n_blocks[i]))
n_channels = n_channels * stride
if i > 0:
self.__setattr__(
f'up_block_{i}',
self._make_up_block(n_channels, n_channels // 2))
self.__setattr__(f'out_block_{i}',
self._make_block(n_channels, out_channels))
def forward(self, x):
"""Forward function.
Args:
x (torch.Tensor): of shape (N, C_in, N_x, N_y, N_z).
Returns:
list[torch.Tensor]: of shape (N, C_out, N_xi, N_yi, N_zi).
"""
down_outs = []
for i in range(self.n_scales):
x = self.__getattr__(f'down_layer_{i}')(x)
down_outs.append(x)
outs = []
for i in range(self.n_scales - 1, -1, -1):
if i < self.n_scales - 1:
x = self.__getattr__(f'up_block_{i + 1}')(x)
x = down_outs[i] + x
out = self.__getattr__(f'out_block_{i}')(x)
outs.append(out)
return outs[::-1]
@staticmethod
def _make_layer(stride, n_channels, n_blocks):
"""Make a layer from several residual blocks.
Args:
stride (int): Stride of the first residual block.
n_channels (int): Number of channels of the first residual block.
n_blocks (int): Number of residual blocks.
Returns:
torch.nn.Module: With several residual blocks.
"""
blocks = []
for i in range(n_blocks):
if i == 0 and stride != 1:
blocks.append(ResModule(n_channels, n_channels * 2, stride))
n_channels = n_channels * 2
else:
blocks.append(ResModule(n_channels, n_channels))
return nn.Sequential(*blocks)
@staticmethod
def _make_block(in_channels, out_channels):
"""Make a convolutional block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: Convolutional block.
"""
return nn.Sequential(
nn.Conv3d(in_channels, out_channels, 3, 1, 1, bias=False),
nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True))
@staticmethod
def _make_up_block(in_channels, out_channels):
"""Make upsampling convolutional block.
Args:
in_channels (int): Number of input channels.
out_channels (int): Number of output channels.
Returns:
torch.nn.Module: Upsampling convolutional block.
"""
return nn.Sequential(
nn.ConvTranspose3d(in_channels, out_channels, 2, 2, bias=False),
nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True),
nn.Conv3d(out_channels, out_channels, 3, 1, 1, bias=False),
nn.BatchNorm3d(out_channels), nn.ReLU(inplace=True))
class ResModule(nn.Module):
"""3d residual block for ImVoxelNeck.
Args:
n_channels (int): Input channels of a feature map.
in_channels (int): Number of channels in input tensor.
out_channels (int): Number of channels in output tensor.
stride (int, optional): Stride of the block. Defaults to 1.
"""
def __init__(self, n_channels):
def __init__(self, in_channels, out_channels, stride=1):
super().__init__()
self.conv0 = ConvModule(
in_channels=n_channels,
out_channels=n_channels,
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
padding=1,
conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'),
act_cfg=dict(type='ReLU', inplace=True))
self.conv1 = ConvModule(
in_channels=n_channels,
out_channels=n_channels,
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
padding=1,
conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'),
act_cfg=None)
if stride != 1:
self.downsample = ConvModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
padding=0,
conv_cfg=dict(type='Conv3d'),
norm_cfg=dict(type='BN3d'),
act_cfg=None)
self.stride = stride
self.activation = nn.ReLU(inplace=True)
def forward(self, x):
......@@ -105,6 +223,8 @@ class ResModule(nn.Module):
identity = x
x = self.conv0(x)
x = self.conv1(x)
x = identity + x
if self.stride != 1:
identity = self.downsample(identity)
x = x + identity
x = self.activation(x)
return x
......@@ -9,10 +9,10 @@ from mmdet3d.models.dense_heads import FCAF3DHead
from mmdet3d.testing import create_detector_inputs
class TestAnchor3DHead(TestCase):
class TestFCAF3DHead(TestCase):
def test_fcaf3d_head_loss(self):
"""Test anchor head loss when truth is empty and non-empty."""
"""Test fcaf3d head loss when truth is empty and non-empty."""
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
......
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import pytest
import torch
from mmdet3d import * # noqa
from mmdet3d.models.dense_heads import ImVoxelHead
from mmdet3d.testing import create_detector_inputs
class TestImVoxelHead(TestCase):
def test_imvoxel_head_loss(self):
"""Test imvoxel head loss when truth is empty and non-empty."""
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
# build head
prior_generator = dict(
type='AlignedAnchor3DRangeGenerator',
ranges=[[-3.2, -0.2, -2.28, 3.2, 6.2, 0.28]],
rotations=[.0])
imvoxel_head = ImVoxelHead(
n_classes=1,
n_levels=1,
n_channels=32,
n_reg_outs=7,
pts_assign_threshold=27,
pts_center_threshold=18,
prior_generator=prior_generator,
center_loss=dict(type='mmdet.CrossEntropyLoss', use_sigmoid=True),
bbox_loss=dict(type='RotatedIoU3DLoss'),
cls_loss=dict(type='mmdet.FocalLoss'),
)
imvoxel_head = imvoxel_head.cuda()
# fake input of head
# (x, valid_preds)
x = [
torch.randn(1, 32, 10, 10, 4).cuda(),
torch.ones(1, 1, 10, 10, 4).cuda()
]
# fake annotation
num_gt_instance = 1
packed_inputs = create_detector_inputs(
with_points=False,
with_img=True,
img_size=(128, 128),
num_gt_instance=num_gt_instance,
with_pts_semantic_mask=False,
with_pts_instance_mask=False)
data_samples = [
sample.cuda() for sample in packed_inputs['data_samples']
]
losses = imvoxel_head.loss(x, data_samples)
print(losses)
self.assertGreaterEqual(losses['center_loss'], 0)
self.assertGreaterEqual(losses['bbox_loss'], 0)
self.assertGreaterEqual(losses['cls_loss'], 0)
......@@ -10,11 +10,12 @@ from mmdet3d.testing import (create_detector_inputs, get_detector_cfg,
class TestImVoxelNet(unittest.TestCase):
def test_imvoxelnet(self):
def test_imvoxelnet_kitti(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'ImVoxelNet')
DefaultScope.get_instance('test_ImVoxelNet', scope_name='mmdet3d')
DefaultScope.get_instance(
'test_imvoxelnet_kitti', scope_name='mmdet3d')
setup_seed(0)
imvoxel_net_cfg = get_detector_cfg(
'imvoxelnet/imvoxelnet_8xb4_kitti-3d-car.py')
......@@ -47,3 +48,42 @@ class TestImVoxelNet(unittest.TestCase):
self.assertGreaterEqual(losses['loss_cls'][0], 0)
self.assertGreaterEqual(losses['loss_bbox'][0], 0)
self.assertGreaterEqual(losses['loss_dir'][0], 0)
def test_imvoxelnet_sunrgbd(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'ImVoxelNet')
DefaultScope.get_instance(
'test_imvoxelnet_sunrgbd', scope_name='mmdet3d')
setup_seed(0)
imvoxel_net_cfg = get_detector_cfg(
'imvoxelnet/imvoxelnet_2xb4_sunrgbd-3d-10class.py')
model = MODELS.build(imvoxel_net_cfg)
num_gt_instance = 1
packed_inputs = create_detector_inputs(
with_points=False,
with_img=True,
img_size=(128, 128),
num_gt_instance=num_gt_instance,
with_pts_semantic_mask=False,
with_pts_instance_mask=False)
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
data = model.data_preprocessor(packed_inputs, True)
torch.cuda.empty_cache()
results = model.forward(**data, mode='predict')
self.assertEqual(len(results), 1)
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(**data, mode='loss')
self.assertGreaterEqual(losses['center_loss'], 0)
self.assertGreaterEqual(losses['bbox_loss'], 0)
self.assertGreaterEqual(losses['cls_loss'], 0)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment