Commit f63a62b8 authored by zhangshilong's avatar zhangshilong Committed by ChaimZhu
Browse files

[Rrfactor]Imvotenet

parent edb6b369
......@@ -2,6 +2,9 @@ dataset_type = 'SUNRGBDDataset'
data_root = 'data/sunrgbd/'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
metainfo = dict(CLASSES=class_names)
train_pipeline = [
dict(
type='LoadPointsFromFile',
......@@ -21,8 +24,9 @@ train_pipeline = [
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='PointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
dict(
type='Pack3DDetInputs',
keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(
......@@ -47,61 +51,52 @@ test_pipeline = [
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(type='PointSample', num_points=20000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points'])
dict(type='PointSample', num_points=20000)
]),
dict(type='Pack3DDetInputs', keys=['points'])
]
data = dict(
samples_per_gpu=16,
workers_per_gpu=4,
train=dict(
train_dataloader = dict(
batch_size=16,
num_workers=4,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset',
times=5,
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_train.pkl',
ann_file='sunrgbd_infos_train.pkl',
pipeline=train_pipeline,
classes=class_names,
filter_empty_gt=False,
metainfo=metainfo,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d='Depth')),
val=dict(
box_type_3d='Depth')))
val_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
ann_file='sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'),
test=dict(
box_type_3d='Depth'))
test_dataloader = dict(
batch_size=1,
num_workers=1,
sampler=dict(type='DefaultSampler', shuffle=False),
dataset=dict(
type=dataset_type,
data_root=data_root,
ann_file=data_root + 'sunrgbd_infos_val.pkl',
ann_file='sunrgbd_infos_val.pkl',
pipeline=test_pipeline,
classes=class_names,
metainfo=metainfo,
test_mode=True,
box_type_3d='Depth'))
evaluation = dict(pipeline=eval_pipeline)
val_evaluator = dict(type='IndoorMetric')
test_evaluator = val_evaluator
model = dict(
type='ImVoteNet',
data_preprocessor=dict(
type='Det3DDataPreprocessor',
# use caffe img_norm
mean=[103.530, 116.280, 123.675],
std=[1.0, 1.0, 1.0],
bgr_to_rgb=False,
pad_size_divisor=32),
img_backbone=dict(
type='ResNet',
type='mmdet.ResNet',
depth=50,
num_stages=4,
out_indices=(0, 1, 2, 3),
......@@ -10,11 +17,12 @@ model = dict(
norm_eval=True,
style='caffe'),
img_neck=dict(
type='FPN',
type='mmdet.FPN',
in_channels=[256, 512, 1024, 2048],
out_channels=256,
num_outs=5),
img_rpn_head=dict(
_scope_='mmdet',
type='RPNHead',
in_channels=256,
feat_channels=256,
......@@ -31,6 +39,7 @@ model = dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
loss_bbox=dict(type='L1Loss', loss_weight=1.0)),
img_roi_head=dict(
_scope_='mmdet',
type='StandardRoIHead',
bbox_roi_extractor=dict(
type='SingleRoIExtractor',
......@@ -56,6 +65,7 @@ model = dict(
train_cfg=dict(
img_rpn=dict(
assigner=dict(
_scope_='mmdet',
type='MaxIoUAssigner',
pos_iou_thr=0.7,
neg_iou_thr=0.3,
......@@ -63,7 +73,7 @@ model = dict(
match_low_quality=True,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
type='mmdet.RandomSampler',
num=256,
pos_fraction=0.5,
neg_pos_ub=-1,
......@@ -80,6 +90,7 @@ model = dict(
min_bbox_size=0),
img_rcnn=dict(
assigner=dict(
_scope_='mmdet',
type='MaxIoUAssigner',
pos_iou_thr=0.5,
neg_iou_thr=0.5,
......@@ -87,7 +98,7 @@ model = dict(
match_low_quality=False,
ignore_iof_thr=-1),
sampler=dict(
type='RandomSampler',
type='mmdet.RandomSampler',
num=512,
pos_fraction=0.25,
neg_pos_ub=-1,
......
......@@ -3,56 +3,71 @@ _base_ = [
'../_base_/models/imvotenet_image.py'
]
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(
type='Resize',
img_scale=[(1333, 480), (1333, 504), (1333, 528), (1333, 552),
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_bbox_3d=False,
with_label_3d=False),
dict(
type='RandomChoiceResize',
scales=[(1333, 480), (1333, 504), (1333, 528), (1333, 552),
(1333, 576), (1333, 600)],
multiscale_mode='value',
keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.5),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
dict(type='RandomFlip', prob=0.5),
dict(
type='Pack3DDetInputs', keys=['img', 'gt_bboxes', 'gt_bboxes_labels']),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
# online evaluation
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_bbox_3d=False,
with_label_3d=False),
dict(type='Resize', scale=(1333, 600), keep_ratio=True),
dict(
type='MultiScaleFlipAug',
img_scale=(1333, 600),
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='ImageToTensor', keys=['img']),
dict(type='Collect', keys=['img']),
])
type='Pack3DDetInputs',
keys=(['img']),
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
'scale_factor'))
]
train_dataloader = dict(
batch_size=2,
num_workers=2,
sampler=dict(type='DefaultSampler', shuffle=True),
dataset=dict(
type='RepeatDataset', times=1, dataset=dict(pipeline=train_pipeline)))
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(times=1, dataset=dict(pipeline=train_pipeline)),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=8, val_interval=1)
val_cfg = dict(type='ValLoop')
test_cfg = dict(type='TestLoop')
# learning rate
param_scheduler = [
dict(
type='LinearLR', start_factor=0.001, by_epoch=False, begin=0, end=500),
dict(
type='MultiStepLR',
begin=0,
end=8,
by_epoch=True,
milestones=[6],
gamma=0.1)
]
val_evaluator = dict(type='Indoor2DMetric')
optimizer = dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=None)
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.001,
step=[6])
runner = dict(type='EpochBasedRunner', max_epochs=8)
# optimizer
optim_wrapper = dict(
type='OptimWrapper',
optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
load_from = 'http://download.openmmlab.com/mmdetection/v2.0/mask_rcnn/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco/mask_rcnn_r50_caffe_fpn_mstrain-poly_3x_coco_bbox_mAP-0.408__segm_mAP-0.37_20200504_163245-42aa3d00.pth' # noqa
......@@ -7,10 +7,6 @@ _base_ = [
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
'night_stand', 'bookshelf', 'bathtub')
# use caffe img_norm
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
model = dict(
pts_backbone=dict(
type='PointNet2SASSG',
......@@ -48,10 +44,8 @@ model = dict(
[0.76584, 1.398258, 0.472728]]),
pred_layer_cfg=dict(
in_channels=128, shared_conv_channels=(128, 128), bias=True),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d'),
objectness_loss=dict(
type='CrossEntropyLoss',
type='mmdet.CrossEntropyLoss',
class_weight=[0.2, 0.8],
reduction='sum',
loss_weight=5.0),
......@@ -62,15 +56,23 @@ model = dict(
loss_src_weight=10.0,
loss_dst_weight=10.0),
dir_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss',
reduction='sum',
loss_weight=1.0),
dir_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0),
type='mmdet.SmoothL1Loss', reduction='sum', loss_weight=10.0),
size_class_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0),
type='mmdet.CrossEntropyLoss',
reduction='sum',
loss_weight=1.0),
size_res_loss=dict(
type='SmoothL1Loss', reduction='sum', loss_weight=10.0 / 3.0),
type='mmdet.SmoothL1Loss',
reduction='sum',
loss_weight=10.0 / 3.0),
semantic_loss=dict(
type='CrossEntropyLoss', reduction='sum', loss_weight=1.0)),
type='mmdet.CrossEntropyLoss',
reduction='sum',
loss_weight=1.0)),
joint=dict(
vote_module_cfg=dict(
in_channels=512,
......@@ -154,11 +156,11 @@ model = dict(
# model training and testing settings
train_cfg=dict(
pts=dict(
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mod='vote')),
pos_distance_thr=0.3, neg_distance_thr=0.6, sample_mode='vote')),
test_cfg=dict(
img_rcnn=dict(score_thr=0.1),
pts=dict(
sample_mod='seed',
sample_mode='seed',
nms_thr=0.25,
score_thr=0.05,
per_class_proposal=True)))
......@@ -171,12 +173,13 @@ train_pipeline = [
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='LoadAnnotations3D',
with_bbox=True,
with_label=True,
with_bbox_3d=True,
with_label_3d=True),
dict(type='Resize', scale=(1333, 600), keep_ratio=True),
dict(
type='RandomFlip3D',
sync_2d=False,
......@@ -188,15 +191,13 @@ train_pipeline = [
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='PointSample', num_points=20000),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d',
type='Pack3DDetInputs',
keys=([
'img', 'gt_bboxes', 'gt_bboxes_labels', 'points', 'gt_bboxes_3d',
'gt_labels_3d'
])
]))
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
......@@ -205,56 +206,15 @@ test_pipeline = [
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 600),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(type='Resize', scale=(1333, 600), keep_ratio=True),
dict(type='PointSample', num_points=20000),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img', 'points'])
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=False,
load_dim=6,
use_dim=[0, 1, 2]),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['img', 'points'])
dict(type='Pack3DDetInputs', keys=['img', 'points'])
]
data = dict(
train=dict(dataset=dict(pipeline=train_pipeline)),
val=dict(pipeline=test_pipeline),
test=dict(pipeline=test_pipeline))
evaluation = dict(pipeline=eval_pipeline)
train_dataloader = dict(dataset=dict(dataset=dict(pipeline=train_pipeline)))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# may also use your own pre-trained image branch
load_from = 'https://download.openmmlab.com/mmdetection3d/v0.1.0_models/imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class_20210323_173222-cad62aeb.pth' # noqa
......@@ -178,10 +178,11 @@ class Det3DDataset(BaseDataset):
dict | None: Processed `ann_info`
"""
# add s or gt prefix for most keys after concat
# we only process 3d annotations here, the corresponding
# 2d annotation process is in the `LoadAnnotations3D`
# in `pipelines`
name_mapping = {
'bbox_label': 'gt_labels',
'bbox_label_3d': 'gt_labels_3d',
'bbox': 'gt_bboxes',
'bbox_3d': 'gt_bboxes_3d',
'depth': 'depths',
'center_2d': 'centers_2d',
......@@ -196,6 +197,7 @@ class Det3DDataset(BaseDataset):
keys = list(instances[0].keys())
ann_info = dict()
for ann_name in keys:
if ann_name in name_mapping:
temp_anns = [item[ann_name] for item in instances]
# map the original dataset label to training label
if 'label' in ann_name:
......@@ -203,9 +205,10 @@ class Det3DDataset(BaseDataset):
self.label_mapping[item] for item in temp_anns
]
temp_anns = np.array(temp_anns)
if ann_name in name_mapping:
ann_name = name_mapping[ann_name]
ann_info[ann_name] = temp_anns
ann_info['instances'] = info['instances']
return ann_info
def parse_data_info(self, info: dict) -> dict:
......
......@@ -87,7 +87,8 @@ class KittiDataset(Det3DDataset):
if 'plane' in info:
# convert ground plane to velodyne coordinates
plane = np.array(info['plane'])
lidar2cam = np.array(info['images']['CAM2']['lidar2cam'])
lidar2cam = np.array(
info['images']['CAM2']['lidar2cam'], dtype=np.float32)
reverse = np.linalg.inv(lidar2cam)
(plane_norm_cam, plane_off_cam) = (plane[:3],
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
from typing import List, Sequence, Union
import mmcv
import numpy as np
import torch
from mmcv import BaseTransform
from mmcv.transforms import to_tensor
from mmengine import InstanceData
from numpy import dtype
from mmdet3d.core import Det3DDataSample, PointData
from mmdet3d.core.bbox import BaseInstance3DBoxes
......@@ -12,6 +14,38 @@ from mmdet3d.core.points import BasePoints
from mmdet3d.registry import TRANSFORMS
def to_tensor(
data: Union[torch.Tensor, np.ndarray, Sequence, int,
float]) -> torch.Tensor:
"""Convert objects of various python types to :obj:`torch.Tensor`.
Supported types are: :class:`numpy.ndarray`, :class:`torch.Tensor`,
:class:`Sequence`, :class:`int` and :class:`float`.
Args:
data (torch.Tensor | numpy.ndarray | Sequence | int | float): Data to
be converted.
Returns:
torch.Tensor: the converted data.
"""
if isinstance(data, torch.Tensor):
return data
elif isinstance(data, np.ndarray):
if data.dtype is dtype('float64'):
data = data.astype(np.float32)
return torch.from_numpy(data)
elif isinstance(data, Sequence) and not mmcv.is_str(data):
return torch.tensor(data)
elif isinstance(data, int):
return torch.LongTensor([data])
elif isinstance(data, float):
return torch.FloatTensor([data])
else:
raise TypeError(f'type {type(data)} cannot be converted to tensor.')
@TRANSFORMS.register_module()
class Pack3DDetInputs(BaseTransform):
INPUTS_KEYS = ['points', 'img']
......@@ -20,7 +54,7 @@ class Pack3DDetInputs(BaseTransform):
]
INSTANCEDATA_2D_KEYS = [
'gt_bboxes',
'gt_labels',
'gt_bboxes_labels',
]
SEG_KEYS = [
......@@ -121,8 +155,8 @@ class Pack3DDetInputs(BaseTransform):
for key in [
'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels',
'gt_labels_3d', 'attr_labels', 'pts_instance_mask',
'pts_semantic_mask', 'centers_2d', 'depths'
'gt_bboxes_labels', 'attr_labels', 'pts_instance_mask',
'pts_semantic_mask', 'centers_2d', 'depths', 'gt_labels_3d'
]:
if key not in results:
continue
......@@ -159,6 +193,9 @@ class Pack3DDetInputs(BaseTransform):
elif key in self.INSTANCEDATA_3D_KEYS:
gt_instances_3d[self._remove_prefix(key)] = results[key]
elif key in self.INSTANCEDATA_2D_KEYS:
if key == 'gt_bboxes_labels':
gt_instances['labels'] = results[key]
else:
gt_instances[self._remove_prefix(key)] = results[key]
elif key in self.SEG_KEYS:
gt_pts_seg[self._remove_prefix(key)] = results[key]
......
......@@ -632,34 +632,6 @@ class LoadAnnotations3D(LoadAnnotations):
self.with_seg_3d = with_seg_3d
self.seg_3d_dtype = seg_3d_dtype
def _load_bboxes(self, results: dict) -> None:
"""Private function to load bounding box annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
results['gt_bboxes'] = results['ann_info']['gt_bboxes']
def _load_labels(self, results: dict) -> None:
"""Private function to load label annotations.
Rewrite '_load_bboxes` since mmdet3d uses 'parse_anno_info' in
datasets.
Args:
results (dict): Result dict from :obj:`mmdet3d.CustomDataset`.
Returns:
dict: The dict contains loaded label annotations.
"""
results['gt_labels'] = results['ann_info']['gt_labels']
def _load_bboxes_3d(self, results: dict) -> dict:
"""Private function to move the 3D bounding box annotation from
`ann_info` field to the root of `results`.
......@@ -769,6 +741,56 @@ class LoadAnnotations3D(LoadAnnotations):
results['eval_ann_info']['pts_semantic_mask'] = pts_semantic_mask
return results
def _load_bboxes(self, results: dict) -> None:
"""Private function to load bounding box annotations.
The only difference is it remove the proceess for
`ignore_flag`
Args:
results (dict): Result dict from :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded bounding box annotations.
"""
gt_bboxes = []
for instance in results['instances']:
gt_bboxes.append(instance['bbox'])
if len(gt_bboxes) == 0:
results['gt_bboxes'] = np.zeros((0, 4), dtype=np.float32)
else:
results['gt_bboxes'] = np.array(
gt_bboxes, dtype=np.float32).reshape((-1, 4))
if self.denorm_bbox:
bbox_num = results['gt_bboxes'].shape[0]
if bbox_num != 0:
h, w = results['img_shape']
results['gt_bboxes'][:, 0::2] *= w
results['gt_bboxes'][:, 1::2] *= h
if 'eval_ann_info' in results:
results['eval_ann_info']['gt_bboxes'] = results['gt_bboxes']
def _load_labels(self, results: dict) -> None:
"""Private function to load label annotations.
Args:
results (dict): Result dict from :obj :obj:``mmcv.BaseDataset``.
Returns:
dict: The dict contains loaded label annotations.
"""
gt_bboxes_labels = []
for instance in results['instances']:
gt_bboxes_labels.append(instance['bbox_label'])
if len(gt_bboxes_labels) == 0:
results['gt_bboxes_labels'] = np.zeros((0, ), dtype=np.int64)
else:
results['gt_bboxes_labels'] = np.array(
gt_bboxes_labels, dtype=np.int64)
if 'eval_ann_info' in results:
results['eval_ann_info']['gt_bboxes_labels'] = results[
'gt_bboxes_labels']
def transform(self, results: dict) -> dict:
"""Function to load multiple types annotations.
......
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
from typing import Callable, List, Optional, Union
import numpy as np
......@@ -22,13 +24,15 @@ class SUNRGBDDataset(Det3DDataset):
ann_file (str): Path of annotation file.
metainfo (dict, optional): Meta information for dataset, such as class
information. Defaults to None.
data_prefix (dict, optional): Prefix for data. Defaults to
data_prefix (dict): Prefix for data. Defaults to
`dict(pts='points',img='sunrgbd_trainval')`.
pipeline (list[dict], optional): Pipeline used for data processing.
Defaults to None.
modality (dict, optional): Modality to specify the sensor data used
as input. Defaults to `dict(use_camera=True, use_lidar=True)`.
box_type_3d (str, optional): Type of 3D box of this dataset.
default_cam_key (str): The default camera name adopted.
Defaults to "CAM0".
box_type_3d (str): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
Defaults to 'Depth' in this dataset. Available options includes
......@@ -36,9 +40,9 @@ class SUNRGBDDataset(Det3DDataset):
- 'LiDAR': Box in LiDAR coordinates.
- 'Depth': Box in depth coordinates, usually for indoor dataset.
- 'Camera': Box in camera coordinates.
filter_empty_gt (bool, optional): Whether to filter empty GT.
filter_empty_gt (bool): Whether to filter empty GT.
Defaults to True.
test_mode (bool, optional): Whether the dataset is in test mode.
test_mode (bool): Whether the dataset is in test mode.
Defaults to False.
"""
METAINFO = {
......@@ -51,8 +55,9 @@ class SUNRGBDDataset(Det3DDataset):
ann_file: str,
metainfo: Optional[dict] = None,
data_prefix: dict = dict(
pts='points', img='sunrgbd_trainval'),
pts='points', img='sunrgbd_trainval/image'),
pipeline: List[Union[dict, Callable]] = [],
default_cam_key: str = 'CAM0',
modality=dict(use_camera=True, use_lidar=True),
box_type_3d: str = 'Depth',
filter_empty_gt: bool = True,
......@@ -64,6 +69,7 @@ class SUNRGBDDataset(Det3DDataset):
metainfo=metainfo,
data_prefix=data_prefix,
pipeline=pipeline,
default_cam_key=default_cam_key,
modality=modality,
box_type_3d=box_type_3d,
filter_empty_gt=filter_empty_gt,
......@@ -73,6 +79,47 @@ class SUNRGBDDataset(Det3DDataset):
'use_lidar' in self.modality
assert self.modality['use_camera'] or self.modality['use_lidar']
def parse_data_info(self, info: dict) -> dict:
"""Process the raw data info.
Convert all relative path of needed modality data file to
the absolute path. And process
the `instances` field to `ann_info` in training stage.
Args:
info (dict): Raw info dict.
Returns:
dict: Has `ann_info` in training stage. And
all path has been converted to absolute path.
"""
if self.modality['use_lidar']:
info['lidar_points']['lidar_path'] = \
osp.join(
self.data_prefix.get('pts', ''),
info['lidar_points']['lidar_path'])
if self.modality['use_camera']:
for cam_id, img_info in info['images'].items():
if 'img_path' in img_info:
img_info['img_path'] = osp.join(
self.data_prefix.get('img', ''), img_info['img_path'])
if self.default_cam_key is not None:
info['img_path'] = info['images'][
self.default_cam_key]['img_path']
info['depth2img'] = np.array(
info['images'][self.default_cam_key]['depth2img'],
dtype=np.float32)
if not self.test_mode:
# used in traing
info['ann_info'] = self.parse_ann_info(info)
if self.test_mode and self.load_eval_anns:
info['eval_ann_info'] = self.parse_ann_info(info)
return info
def parse_ann_info(self, info: dict) -> dict:
"""Process the `instances` in data info to `ann_info`
......@@ -83,12 +130,11 @@ class SUNRGBDDataset(Det3DDataset):
dict: Processed `ann_info`
"""
ann_info = super().parse_ann_info(info)
# empty gt
# process data without any annotations
if ann_info is None:
ann_info = dict()
ann_info['gt_bboxes_3d'] = np.zeros((0, 6), dtype=np.float32)
ann_info['gt_labels_3d'] = np.zeros((0, ), dtype=np.int64)
# to target box structure
ann_info['gt_bboxes_3d'] = DepthInstance3DBoxes(
ann_info['gt_bboxes_3d'],
......
# Copyright (c) OpenMMLab. All rights reserved.
from collections import OrderedDict
from typing import Dict, List, Optional, Sequence
import numpy as np
from mmengine.evaluator import BaseMetric
from mmengine.logging import MMLogger
from mmdet3d.core import get_box_type, indoor_eval
from mmdet3d.registry import METRICS
from mmdet.core import eval_map
@METRICS.register_module()
class IndoorMetric(BaseMetric):
"""Kitti evaluation metric.
"""Indoor scene evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
......@@ -90,3 +93,91 @@ class IndoorMetric(BaseMetric):
box_mode_3d=box_mode_3d)
return ret_dict
@METRICS.register_module()
class Indoor2DMetric(BaseMetric):
"""indoor 2d predictions evaluation metric.
Args:
iou_thr (list[float]): List of iou threshold when calculate the
metric. Defaults to [0.5].
collect_device (str, optional): Device name used for collecting
results from different ranks during distributed training.
Must be 'cpu' or 'gpu'. Defaults to 'cpu'.
prefix (str): The prefix that will be added in the metric
names to disambiguate homonymous metrics of different evaluators.
If prefix is not provided in the argument, self.default_prefix
will be used instead. Default: None
"""
def __init__(self,
iou_thr: List[float] = [0.5],
collect_device: str = 'cpu',
prefix: Optional[str] = None,
**kwargs):
super(Indoor2DMetric, self).__init__(
prefix=prefix, collect_device=collect_device)
self.iou_thr = iou_thr
def process(self, data_batch: Sequence[dict],
predictions: Sequence[dict]) -> None:
"""Process one batch of data samples and predictions.
The processed results should be stored in ``self.results``,
which will be used to compute the metrics when all batches
have been processed.
Args:
data_batch (Sequence[dict]): A batch of data
from the dataloader.
predictions (Sequence[dict]): A batch of outputs from
the model.
"""
batch_eval_anns = [
item['data_sample']['eval_ann_info'] for item in data_batch
]
for eval_ann, pred_dict in zip(batch_eval_anns, predictions):
pred = pred_dict['pred_instances']
ann = dict(
labels=eval_ann['gt_bboxes_labels'],
bboxes=eval_ann['gt_bboxes'])
pred_bboxes = pred['bboxes'].cpu().numpy()
pred_scores = pred['scores'].cpu().numpy()
pred_labels = pred['labels'].cpu().numpy()
dets = []
for label in range(len(self.dataset_meta['CLASSES'])):
index = np.where(pred_labels == label)[0]
pred_bbox_scores = np.hstack(
[pred_bboxes[index], pred_scores[index].reshape((-1, 1))])
dets.append(pred_bbox_scores)
self.results.append((ann, dets))
def compute_metrics(self, results: list) -> Dict[str, float]:
"""Compute the metrics from processed results.
Args:
results (list): The processed results of each batch.
Returns:
Dict[str, float]: The computed metrics. The keys are the names of
the metrics, and the values are corresponding results.
"""
logger: MMLogger = MMLogger.get_current_instance()
annotations, preds = zip(*results)
eval_results = OrderedDict()
iou_thr_2d = (self.iou_thr) if isinstance(self.iou_thr,
float) else self.iou_thr
for iou_thr_2d_single in iou_thr_2d:
mean_ap, _ = eval_map(
preds,
annotations,
scale_ranges=None,
iou_thr=iou_thr_2d_single,
dataset=self.dataset_meta['CLASSES'],
logger=logger)
eval_results['mAP_' + str(iou_thr_2d_single)] = mean_ap
return eval_results
......@@ -754,15 +754,16 @@ class VoteHead(BaseModule):
batch_size = bbox3d.shape[0]
results_list = list()
if use_nms:
for b in range(batch_size):
for batch_index in range(batch_size):
temp_results = InstanceData()
bbox_selected, score_selected, labels = \
self.multiclass_nms_single(obj_scores[b],
sem_scores[b],
bbox3d[b],
stack_points[b, ..., :3],
batch_input_metas[b])
bbox = batch_input_metas[b]['box_type_3d'](
self.multiclass_nms_single(
obj_scores[batch_index],
sem_scores[batch_index],
bbox3d[batch_index],
stack_points[batch_index, ..., :3],
batch_input_metas[batch_index])
bbox = batch_input_metas[batch_index]['box_type_3d'](
bbox_selected,
box_dim=bbox_selected.shape[-1],
with_yaw=self.bbox_coder.with_rot)
......
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Union
from typing import List, Optional, Union
from mmengine import InstanceData
......@@ -91,8 +91,8 @@ class Base3DDetector(BaseDetector):
def convert_to_datasample(
self,
results_list_3d: InstanceList,
results_list_2d: InstanceList = None,
results_list_3d: Optional[InstanceList] = None,
results_list_2d: Optional[InstanceList] = None,
) -> SampleList:
"""Convert results list to `Det3DDataSample`.
......@@ -128,10 +128,18 @@ class Base3DDetector(BaseDetector):
"""
data_sample_list = []
assert (results_list_2d is not None) or \
(results_list_3d is not None),\
'please pass at least one type of results_list'
if results_list_2d is None:
results_list_2d = [
InstanceData() for _ in range(len(results_list_3d))
]
if results_list_3d is None:
results_list_3d = [
InstanceData() for _ in range(len(results_list_2d))
]
for i in range(len(results_list_3d)):
result = Det3DDataSample()
result.pred_instances_3d = results_list_3d[i]
......
This diff is collapsed.
......@@ -105,8 +105,8 @@ def extract_2d_info(img_meta, tensor):
"""
img_shape = img_meta['img_shape']
ori_shape = img_meta['ori_shape']
img_h, img_w, _ = img_shape
ori_h, ori_w, _ = ori_shape
img_h, img_w = img_shape
ori_h, ori_w = ori_shape
img_scale_factor = (
tensor.new_tensor(img_meta['scale_factor'][:2])
......
......@@ -45,8 +45,6 @@ class VoteFusion(nn.Module):
seed_num = seed_3d_depth.shape[0]
img_shape = img_meta['img_shape']
img_h, img_w, _ = img_shape
# first reverse the data transformations
xyz_depth = apply_3d_transformation(
seed_3d_depth, 'DEPTH', img_meta, reverse=True)
......
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestH3D(unittest.TestCase):
def test_h3dnet(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'H3DNet')
DefaultScope.get_instance('test_H3DNet', scope_name='mmdet3d')
_setup_seed(0)
voxel_net_cfg = _get_detector_cfg(
'h3dnet/h3dnet_3x8_scannet-3d-18class.py')
model = MODELS.build(voxel_net_cfg)
num_gt_instance = 5
data = [
_create_detector_inputs(
num_gt_instance=num_gt_instance,
points_feat_dim=4,
bboxes_3d_type='depth',
with_pts_semantic_mask=True,
with_pts_instance_mask=True)
]
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['vote_loss'], 0)
self.assertGreater(losses['objectness_loss'], 0)
self.assertGreater(losses['center_loss'], 0)
import unittest
import torch
from mmengine import DefaultScope
from mmdet3d.registry import MODELS
from tests.utils.model_utils import (_create_detector_inputs,
_get_detector_cfg, _setup_seed)
class TestImvoteNet(unittest.TestCase):
def test_imvotenet_only_img(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'ImVoteNet')
DefaultScope.get_instance('test_imvotenet_img', scope_name='mmdet3d')
_setup_seed(0)
votenet_net_cfg = _get_detector_cfg(
'imvotenet/imvotenet_faster_rcnn_r50_fpn_2x4_sunrgbd-3d-10class.py'
)
model = MODELS.build(votenet_net_cfg)
data = [
_create_detector_inputs(
with_points=False, with_img=True, img_size=128)
]
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes', results[0].pred_instances)
self.assertIn('scores', results[0].pred_instances)
self.assertIn('labels', results[0].pred_instances)
# save the memory
with torch.no_grad():
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(sum(losses['loss_rpn_cls']), 0)
self.assertGreater(losses['loss_cls'], 0)
self.assertGreater(losses['loss_bbox'], 0)
def test_imvotenet(self):
import mmdet3d.models
assert hasattr(mmdet3d.models, 'ImVoteNet')
DefaultScope.get_instance('test_imvotenet', scope_name='mmdet3d')
_setup_seed(0)
votenet_net_cfg = _get_detector_cfg(
'imvotenet/imvotenet_stage2_16x8_sunrgbd-3d-10class.py')
model = MODELS.build(votenet_net_cfg)
data = [
_create_detector_inputs(
with_points=True,
with_img=True,
img_size=128,
bboxes_3d_type='depth')
]
if torch.cuda.is_available():
model = model.cuda()
# test simple_test
with torch.no_grad():
batch_inputs, data_samples = model.data_preprocessor(
data, True)
results = model.forward(
batch_inputs, data_samples, mode='predict')
self.assertEqual(len(results), len(data))
self.assertIn('bboxes_3d', results[0].pred_instances_3d)
self.assertIn('scores_3d', results[0].pred_instances_3d)
self.assertIn('labels_3d', results[0].pred_instances_3d)
# save the memory
with torch.no_grad():
losses = model.forward(batch_inputs, data_samples, mode='loss')
self.assertGreater(losses['vote_loss'], 0)
self.assertGreater(losses['objectness_loss'], 0)
self.assertGreater(losses['semantic_loss'], 0)
......@@ -75,6 +75,7 @@ def _get_detector_cfg(fname):
def _create_detector_inputs(seed=0,
with_points=True,
with_img=False,
img_size=10,
num_gt_instance=20,
num_points=10,
points_feat_dim=4,
......@@ -90,23 +91,46 @@ def _create_detector_inputs(seed=0,
'depth': DepthInstance3DBoxes,
'cam': CameraInstance3DBoxes
}
meta_info = dict()
meta_info['depth2img'] = np.array(
[[5.23289349e+02, 3.68831943e+02, 6.10469439e+01],
[1.09560138e+02, 1.97404735e+02, -5.47377738e+02],
[1.25930002e-02, 9.92229998e-01, -1.23769999e-01]])
meta_info['lidar2img'] = np.array(
[[5.23289349e+02, 3.68831943e+02, 6.10469439e+01],
[1.09560138e+02, 1.97404735e+02, -5.47377738e+02],
[1.25930002e-02, 9.92229998e-01, -1.23769999e-01]])
if with_points:
points = torch.rand([num_points, points_feat_dim])
else:
points = None
if with_img:
img = torch.rand(3, 10, 10)
img = torch.rand(3, img_size, img_size)
meta_info['img_shape'] = (img_size, img_size)
meta_info['ori_shape'] = (img_size, img_size)
meta_info['scale_factor'] = np.array([1., 1.])
else:
img = None
inputs_dict = dict(img=img, points=points)
gt_instance_3d = InstanceData()
gt_instance_3d.bboxes_3d = bbox_3d_class[bboxes_3d_type](
torch.rand([num_gt_instance, gt_bboxes_dim]), box_dim=gt_bboxes_dim)
gt_instance_3d.labels_3d = torch.randint(0, num_classes, [num_gt_instance])
data_sample = Det3DDataSample(
metainfo=dict(box_type_3d=bbox_3d_class[bboxes_3d_type]))
data_sample.set_metainfo(meta_info)
data_sample.gt_instances_3d = gt_instance_3d
gt_instance = InstanceData()
gt_instance.labels = torch.randint(0, num_classes, [num_gt_instance])
gt_instance.bboxes = torch.rand(num_gt_instance, 4)
gt_instance.bboxes[:,
2:] = gt_instance.bboxes[:, :2] + gt_instance.bboxes[:,
2:]
data_sample.gt_instances = gt_instance
data_sample.gt_pts_seg = PointData()
if with_pts_instance_mask:
pts_instance_mask = torch.randint(0, num_gt_instance, [num_points])
......
......@@ -652,6 +652,9 @@ def update_sunrgbd_infos(pkl_path, out_dir):
temp_data_info['images']['CAM0']['width'] = w
anns = ori_info_dict['annos']
if anns['gt_num'] == 0:
instance_list = []
else:
num_instances = len(anns['name'])
ignore_class_name = set()
instance_list = []
......@@ -659,12 +662,16 @@ def update_sunrgbd_infos(pkl_path, out_dir):
empty_instance = get_empty_instance()
empty_instance['bbox_3d'] = anns['gt_boxes_upright_depth'][
instance_id].tolist()
empty_instance['bbox'] = anns['bbox'][instance_id].tolist()
if anns['name'][instance_id] in METAINFO['CLASSES']:
empty_instance['bbox_label_3d'] = METAINFO['CLASSES'].index(
anns['name'][instance_id])
empty_instance['bbox_label_3d'] = METAINFO[
'CLASSES'].index(anns['name'][instance_id])
empty_instance['bbox_label'] = empty_instance[
'bbox_label_3d']
else:
ignore_class_name.add(anns['name'][instance_id])
empty_instance['bbox_label_3d'] = -1
empty_instance['bbox_label'] = -1
empty_instance = clear_instance_unused_keys(empty_instance)
instance_list.append(empty_instance)
temp_data_info['instances'] = instance_list
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment