Unverified Commit e21e61e0 authored by Ziyi Wu's avatar Ziyi Wu Committed by GitHub
Browse files

[Feature] Support multi-modality visualization (demos and dataset show function) (#405)



* uncomplete

* support lidar2img loading

* add box_type_3d args in MVX-Net config file

* support multi-modality demo for LiDAR point clouds

* support multi-modality demo for indoor (depth) point clouds

* move demo data into folder and modify docs

* add input check & more general filename matching

* update docs for demo and add README file for demo

* add score_threshold option to demos

* add data for ScanNet & KITTI dataset multi-modality test

* add multi-modality visualization in ScanNet and KITTI dataset

* add unittest for modified visualization function

* delete saved temp file and dirs in unittests using TemporaryDirectory

* fix typos in docs & move README of demos to docs/

* add demo docs to documentation

* fix link error
Co-authored-by: default avatarwHao-Wu <wenhaowu.chn@gmail.com>
parent 825f47a4
...@@ -123,7 +123,7 @@ class ScanNetDataset(Custom3DDataset): ...@@ -123,7 +123,7 @@ class ScanNetDataset(Custom3DDataset):
points = np.fromfile( points = np.fromfile(
osp.join(self.data_root, pts_path), osp.join(self.data_root, pts_path),
dtype=np.float32).reshape(-1, 6) dtype=np.float32).reshape(-1, 6)
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
pred_bboxes = result['boxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy()
show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name,
show) show)
......
import mmcv
import numpy as np import numpy as np
from collections import OrderedDict from collections import OrderedDict
from os import path as osp from os import path as osp
from mmdet3d.core import show_result from mmdet3d.core import show_multi_modality_result, show_result
from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet.core import eval_map from mmdet.core import eval_map
from mmdet.datasets import DATASETS from mmdet.datasets import DATASETS
...@@ -164,14 +165,38 @@ class SUNRGBDDataset(Custom3DDataset): ...@@ -164,14 +165,38 @@ class SUNRGBDDataset(Custom3DDataset):
data_info = self.data_infos[i] data_info = self.data_infos[i]
pts_path = data_info['pts_path'] pts_path = data_info['pts_path']
file_name = osp.split(pts_path)[-1].split('.')[0] file_name = osp.split(pts_path)[-1].split('.')[0]
if hasattr(self, 'pipeline'):
example = self.prepare_test_data(i)
else:
example = None
points = np.fromfile( points = np.fromfile(
osp.join(self.data_root, pts_path), osp.join(self.data_root, pts_path),
dtype=np.float32).reshape(-1, 6) dtype=np.float32).reshape(-1, 6)
points[:, 3:] *= 255 points[:, 3:] *= 255
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor
gt_bboxes = self.get_ann_info(i)['gt_bboxes_3d'].tensor.numpy()
pred_bboxes = result['boxes_3d'].tensor.numpy() pred_bboxes = result['boxes_3d'].tensor.numpy()
show_result(points, gt_bboxes, pred_bboxes, out_dir, file_name, show_result(points, gt_bboxes.copy(), pred_bboxes.copy(), out_dir,
show) file_name, show)
# multi-modality visualization
if self.modality['use_camera'] and example is not None and \
'calib' in data_info.keys():
img = mmcv.imread(example['img_metas']._data['filename'])
pred_bboxes = DepthInstance3DBoxes(
pred_bboxes, origin=(0.5, 0.5, 0))
gt_bboxes = DepthInstance3DBoxes(
gt_bboxes, origin=(0.5, 0.5, 0))
show_multi_modality_result(
img,
gt_bboxes,
pred_bboxes,
example['calib'],
out_dir,
file_name,
depth_bbox=True,
img_metas=example['img_metas']._data,
show=show)
def evaluate(self, def evaluate(self,
results, results,
......
...@@ -37,7 +37,7 @@ def _generate_kitti_dataset_config(): ...@@ -37,7 +37,7 @@ def _generate_kitti_dataset_config():
point_cloud_range=[0, -40, -3, 70.4, 40, 1]), point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict( dict(
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=['Pedestrian', 'Cyclist', 'Car'], class_names=classes,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points']) dict(type='Collect3D', keys=['points'])
]) ])
...@@ -47,10 +47,55 @@ def _generate_kitti_dataset_config(): ...@@ -47,10 +47,55 @@ def _generate_kitti_dataset_config():
return data_root, ann_file, classes, pts_prefix, pipeline, modality, split return data_root, ann_file, classes, pts_prefix, pipeline, modality, split
def _generate_kitti_multi_modality_dataset_config():
data_root = 'tests/data/kitti'
ann_file = 'tests/data/kitti/kitti_infos_train.pkl'
classes = ['Pedestrian', 'Cyclist', 'Car']
pts_prefix = 'velodyne_reduced'
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4,
file_client_args=dict(backend='disk')),
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug3D',
img_scale=(1333, 800),
pts_scale_ratio=1,
flip=False,
transforms=[
dict(type='Resize', multiscale_mode='value', keep_ratio=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[0, 0],
scale_ratio_range=[1., 1.],
translation_std=[0, 0, 0]),
dict(type='RandomFlip3D'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='PointsRangeFilter',
point_cloud_range=[0, -40, -3, 70.4, 40, 1]),
dict(
type='DefaultFormatBundle3D',
class_names=classes,
with_label=False),
dict(type='Collect3D', keys=['points', 'img'])
])
]
modality = dict(use_lidar=True, use_camera=True)
split = 'training'
return data_root, ann_file, classes, pts_prefix, pipeline, modality, split
def test_getitem(): def test_getitem():
np.random.seed(0) np.random.seed(0)
data_root, ann_file, classes, pts_prefix,\ data_root, ann_file, classes, pts_prefix, \
pipeline, modality, split = _generate_kitti_dataset_config() _, modality, split = _generate_kitti_dataset_config()
pipeline = [ pipeline = [
dict( dict(
type='LoadPointsFromFile', type='LoadPointsFromFile',
...@@ -98,9 +143,9 @@ def test_getitem(): ...@@ -98,9 +143,9 @@ def test_getitem():
dict( dict(
type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']) type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
classes, modality) pipeline, classes, modality)
data = self[0] data = kitti_dataset[0]
points = data['points']._data points = data['points']._data
gt_bboxes_3d = data['gt_bboxes_3d']._data gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data gt_labels_3d = data['gt_labels_3d']._data
...@@ -112,14 +157,64 @@ def test_getitem(): ...@@ -112,14 +157,64 @@ def test_getitem():
gt_bboxes_3d.tensor, expected_gt_bboxes_3d, atol=1e-4) gt_bboxes_3d.tensor, expected_gt_bboxes_3d, atol=1e-4)
assert torch.all(gt_labels_3d == expected_gt_labels_3d) assert torch.all(gt_labels_3d == expected_gt_labels_3d)
# test multi-modality KITTI dataset
np.random.seed(0)
point_cloud_range = [0, -40, -3, 70.4, 40, 1]
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
multi_modality_pipeline = [
dict(
type='LoadPointsFromFile',
coord_type='LIDAR',
load_dim=4,
use_dim=4),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D', with_bbox_3d=True, with_label_3d=True),
dict(
type='Resize',
img_scale=[(640, 192), (2560, 768)],
multiscale_mode='range',
keep_ratio=True),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.78539816, 0.78539816],
scale_ratio_range=[0.95, 1.05],
translation_std=[0.2, 0.2, 0.2]),
dict(type='RandomFlip3D', flip_ratio_bev_horizontal=0.5),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle3D', class_names=classes),
dict(
type='Collect3D',
keys=['points', 'img', 'gt_bboxes_3d', 'gt_labels_3d']),
]
modality = dict(use_lidar=True, use_camera=True)
kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
multi_modality_pipeline, classes, modality)
data = kitti_dataset[0]
img = data['img']._data
lidar2img = data['img_metas']._data['lidar2img']
expected_lidar2img = np.array(
[[6.02943726e+02, -7.07913330e+02, -1.22748432e+01, -1.70942719e+02],
[1.76777252e+02, 8.80879879e+00, -7.07936157e+02, -1.02568634e+02],
[9.99984801e-01, -1.52826728e-03, -5.29071223e-03, -3.27567995e-01],
[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
assert img.shape[:] == (3, 416, 1344)
assert np.allclose(lidar2img, expected_lidar2img)
def test_evaluate(): def test_evaluate():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
data_root, ann_file, classes, pts_prefix,\ data_root, ann_file, classes, pts_prefix, \
pipeline, modality, split = _generate_kitti_dataset_config() pipeline, modality, split = _generate_kitti_dataset_config()
self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
classes, modality) pipeline, classes, modality)
boxes_3d = LiDARInstance3DBoxes( boxes_3d = LiDARInstance3DBoxes(
torch.tensor( torch.tensor(
[[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))
...@@ -129,7 +224,7 @@ def test_evaluate(): ...@@ -129,7 +224,7 @@ def test_evaluate():
scores_3d = torch.tensor([0.5]) scores_3d = torch.tensor([0.5])
metric = ['mAP'] metric = ['mAP']
result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d) result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
ap_dict = self.evaluate([result], metric) ap_dict = kitti_dataset.evaluate([result], metric)
assert np.isclose(ap_dict['KITTI/Overall_3D_easy'], 3.0303030303030307) assert np.isclose(ap_dict['KITTI/Overall_3D_easy'], 3.0303030303030307)
assert np.isclose(ap_dict['KITTI/Overall_3D_moderate'], 3.0303030303030307) assert np.isclose(ap_dict['KITTI/Overall_3D_moderate'], 3.0303030303030307)
assert np.isclose(ap_dict['KITTI/Overall_3D_hard'], 3.0303030303030307) assert np.isclose(ap_dict['KITTI/Overall_3D_hard'], 3.0303030303030307)
...@@ -137,12 +232,12 @@ def test_evaluate(): ...@@ -137,12 +232,12 @@ def test_evaluate():
def test_show(): def test_show():
import mmcv import mmcv
import tempfile
from os import path as osp from os import path as osp
from mmdet3d.core.bbox import LiDARInstance3DBoxes from mmdet3d.core.bbox import LiDARInstance3DBoxes
temp_dir = tempfile.mkdtemp() tmp_dir = tempfile.TemporaryDirectory()
data_root, ann_file, classes, pts_prefix,\ temp_dir = tmp_dir.name
data_root, ann_file, classes, pts_prefix, \
pipeline, modality, split = _generate_kitti_dataset_config() pipeline, modality, split = _generate_kitti_dataset_config()
kitti_dataset = KittiDataset( kitti_dataset = KittiDataset(
data_root, ann_file, split=split, modality=modality, pipeline=pipeline) data_root, ann_file, split=split, modality=modality, pipeline=pipeline)
...@@ -164,14 +259,37 @@ def test_show(): ...@@ -164,14 +259,37 @@ def test_show():
mmcv.check_file_exist(pts_file_path) mmcv.check_file_exist(pts_file_path)
mmcv.check_file_exist(gt_file_path) mmcv.check_file_exist(gt_file_path)
mmcv.check_file_exist(pred_file_path) mmcv.check_file_exist(pred_file_path)
tmp_dir.cleanup()
# test multi-modality show
tmp_dir = tempfile.TemporaryDirectory()
temp_dir = tmp_dir.name
_, _, _, _, multi_modality_pipeline, modality, _ = \
_generate_kitti_multi_modality_dataset_config()
kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
multi_modality_pipeline, classes, modality)
kitti_dataset.show(results, temp_dir, show=False)
pts_file_path = osp.join(temp_dir, '000000', '000000_points.obj')
gt_file_path = osp.join(temp_dir, '000000', '000000_gt.obj')
pred_file_path = osp.join(temp_dir, '000000', '000000_pred.obj')
img_file_path = osp.join(temp_dir, '000000', '000000_img.png')
img_pred_path = osp.join(temp_dir, '000000', '000000_pred.png')
img_gt_file = osp.join(temp_dir, '000000', '000000_gt.png')
mmcv.check_file_exist(pts_file_path)
mmcv.check_file_exist(gt_file_path)
mmcv.check_file_exist(pred_file_path)
mmcv.check_file_exist(img_file_path)
mmcv.check_file_exist(img_pred_path)
mmcv.check_file_exist(img_gt_file)
tmp_dir.cleanup()
def test_format_results(): def test_format_results():
from mmdet3d.core.bbox import LiDARInstance3DBoxes from mmdet3d.core.bbox import LiDARInstance3DBoxes
data_root, ann_file, classes, pts_prefix,\ data_root, ann_file, classes, pts_prefix, \
pipeline, modality, split = _generate_kitti_dataset_config() pipeline, modality, split = _generate_kitti_dataset_config()
self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
classes, modality) pipeline, classes, modality)
boxes_3d = LiDARInstance3DBoxes( boxes_3d = LiDARInstance3DBoxes(
torch.tensor( torch.tensor(
[[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))
...@@ -181,7 +299,7 @@ def test_format_results(): ...@@ -181,7 +299,7 @@ def test_format_results():
scores_3d = torch.tensor([0.5]) scores_3d = torch.tensor([0.5])
result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d) result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
results = [result] results = [result]
result_files, _ = self.format_results(results) result_files, tmp_dir = kitti_dataset.format_results(results)
expected_name = np.array(['Pedestrian']) expected_name = np.array(['Pedestrian'])
expected_truncated = np.array([0.]) expected_truncated = np.array([0.])
expected_occluded = np.array([0]) expected_occluded = np.array([0])
...@@ -202,13 +320,14 @@ def test_format_results(): ...@@ -202,13 +320,14 @@ def test_format_results():
assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y) assert np.allclose(result_files[0]['rotation_y'], expected_rotation_y)
assert np.allclose(result_files[0]['score'], expected_score) assert np.allclose(result_files[0]['score'], expected_score)
assert np.allclose(result_files[0]['sample_idx'], expected_sample_idx) assert np.allclose(result_files[0]['sample_idx'], expected_sample_idx)
tmp_dir.cleanup()
def test_bbox2result_kitti(): def test_bbox2result_kitti():
data_root, ann_file, classes, pts_prefix,\ data_root, ann_file, classes, pts_prefix, \
pipeline, modality, split = _generate_kitti_dataset_config() pipeline, modality, split = _generate_kitti_dataset_config()
self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
classes, modality) pipeline, classes, modality)
boxes_3d = LiDARInstance3DBoxes( boxes_3d = LiDARInstance3DBoxes(
torch.tensor( torch.tensor(
[[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]])) [[8.7314, -1.8559, -1.5997, 0.4800, 1.2000, 1.8900, 0.0100]]))
...@@ -218,8 +337,9 @@ def test_bbox2result_kitti(): ...@@ -218,8 +337,9 @@ def test_bbox2result_kitti():
scores_3d = torch.tensor([0.5]) scores_3d = torch.tensor([0.5])
result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d) result = dict(boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
results = [result] results = [result]
temp_kitti_result_dir = tempfile.mkdtemp() tmp_dir = tempfile.TemporaryDirectory()
det_annos = self.bbox2result_kitti( temp_kitti_result_dir = tmp_dir.name
det_annos = kitti_dataset.bbox2result_kitti(
results, classes, submission_prefix=temp_kitti_result_dir) results, classes, submission_prefix=temp_kitti_result_dir)
expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt') expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt')
expected_name = np.array(['Pedestrian']) expected_name = np.array(['Pedestrian'])
...@@ -231,34 +351,33 @@ def test_bbox2result_kitti(): ...@@ -231,34 +351,33 @@ def test_bbox2result_kitti():
assert np.allclose(det_annos[0]['score'], expected_score) assert np.allclose(det_annos[0]['score'], expected_score)
assert np.allclose(det_annos[0]['dimensions'], expected_dimensions) assert np.allclose(det_annos[0]['dimensions'], expected_dimensions)
assert os.path.exists(expected_file_path) assert os.path.exists(expected_file_path)
os.remove(expected_file_path) tmp_dir.cleanup()
os.removedirs(temp_kitti_result_dir)
temp_kitti_result_dir = tempfile.mkdtemp() tmp_dir = tempfile.TemporaryDirectory()
temp_kitti_result_dir = tmp_dir.name
boxes_3d = LiDARInstance3DBoxes(torch.tensor([])) boxes_3d = LiDARInstance3DBoxes(torch.tensor([]))
labels_3d = torch.tensor([]) labels_3d = torch.tensor([])
scores_3d = torch.tensor([]) scores_3d = torch.tensor([])
empty_result = dict( empty_result = dict(
boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d) boxes_3d=boxes_3d, labels_3d=labels_3d, scores_3d=scores_3d)
results = [empty_result] results = [empty_result]
det_annos = self.bbox2result_kitti( det_annos = kitti_dataset.bbox2result_kitti(
results, classes, submission_prefix=temp_kitti_result_dir) results, classes, submission_prefix=temp_kitti_result_dir)
expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt') expected_file_path = os.path.join(temp_kitti_result_dir, '000000.txt')
assert os.path.exists(expected_file_path) assert os.path.exists(expected_file_path)
os.remove(expected_file_path) tmp_dir.cleanup()
os.removedirs(temp_kitti_result_dir)
def test_bbox2result_kitti2d(): def test_bbox2result_kitti2d():
data_root, ann_file, classes, pts_prefix,\ data_root, ann_file, classes, pts_prefix, \
pipeline, modality, split = _generate_kitti_dataset_config() pipeline, modality, split = _generate_kitti_dataset_config()
self = KittiDataset(data_root, ann_file, split, pts_prefix, pipeline, kitti_dataset = KittiDataset(data_root, ann_file, split, pts_prefix,
classes, modality) pipeline, classes, modality)
bboxes = np.array([[[46.1218, -4.6496, -0.9275, 0.5316, 0.5], bboxes = np.array([[[46.1218, -4.6496, -0.9275, 0.5316, 0.5],
[33.3189, 0.1981, 0.3136, 0.5656, 0.5]], [33.3189, 0.1981, 0.3136, 0.5656, 0.5]],
[[46.1366, -4.6404, -0.9510, 0.5162, 0.5], [[46.1366, -4.6404, -0.9510, 0.5162, 0.5],
[33.2646, 0.2297, 0.3446, 0.5746, 0.5]]]) [33.2646, 0.2297, 0.3446, 0.5746, 0.5]]])
det_annos = self.bbox2result_kitti2d([bboxes], classes) det_annos = kitti_dataset.bbox2result_kitti2d([bboxes], classes)
expected_name = np.array( expected_name = np.array(
['Pedestrian', 'Pedestrian', 'Cyclist', 'Cyclist']) ['Pedestrian', 'Pedestrian', 'Cyclist', 'Cyclist'])
expected_bbox = np.array([[46.1218, -4.6496, -0.9275, 0.5316], expected_bbox = np.array([[46.1218, -4.6496, -0.9275, 0.5316],
......
...@@ -172,7 +172,8 @@ def test_show(): ...@@ -172,7 +172,8 @@ def test_show():
from os import path as osp from os import path as osp
from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes
temp_dir = tempfile.mkdtemp() tmp_dir = tempfile.TemporaryDirectory()
temp_dir = tmp_dir.name
root_path = './tests/data/scannet' root_path = './tests/data/scannet'
ann_file = './tests/data/scannet/scannet_infos.pkl' ann_file = './tests/data/scannet/scannet_infos.pkl'
scannet_dataset = ScanNetDataset(root_path, ann_file) scannet_dataset = ScanNetDataset(root_path, ann_file)
...@@ -211,6 +212,7 @@ def test_show(): ...@@ -211,6 +212,7 @@ def test_show():
mmcv.check_file_exist(pts_file_path) mmcv.check_file_exist(pts_file_path)
mmcv.check_file_exist(gt_file_path) mmcv.check_file_exist(gt_file_path)
mmcv.check_file_exist(pred_file_path) mmcv.check_file_exist(pred_file_path)
tmp_dir.cleanup()
def test_seg_getitem(): def test_seg_getitem():
......
...@@ -5,8 +5,7 @@ import torch ...@@ -5,8 +5,7 @@ import torch
from mmdet3d.datasets import SUNRGBDDataset from mmdet3d.datasets import SUNRGBDDataset
def test_getitem(): def _generate_sunrgbd_dataset_config():
np.random.seed(0)
root_path = './tests/data/sunrgbd' root_path = './tests/data/sunrgbd'
ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl' ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk', class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
...@@ -39,8 +38,61 @@ def test_getitem(): ...@@ -39,8 +38,61 @@ def test_getitem():
'pcd_scale_factor', 'pcd_rotation' 'pcd_scale_factor', 'pcd_rotation'
]), ]),
] ]
modality = dict(use_lidar=True, use_camera=False)
return root_path, ann_file, class_names, pipelines, modality
sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file, pipelines)
def _generate_sunrgbd_multi_modality_dataset_config():
root_path = './tests/data/sunrgbd'
ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl'
class_names = ('bed', 'table', 'sofa', 'chair', 'toilet', 'desk',
'dresser', 'night_stand', 'bookshelf', 'bathtub')
img_norm_cfg = dict(
mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
pipelines = [
dict(
type='LoadPointsFromFile',
coord_type='DEPTH',
shift_height=True,
load_dim=6,
use_dim=[0, 1, 2]),
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations3D'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='Resize', img_scale=(1333, 600), keep_ratio=True),
dict(type='RandomFlip', flip_ratio=0.0),
dict(type='Normalize', **img_norm_cfg),
dict(type='Pad', size_divisor=32),
dict(
type='RandomFlip3D',
sync_2d=False,
flip_ratio_bev_horizontal=0.5,
),
dict(
type='GlobalRotScaleTrans',
rot_range=[-0.523599, 0.523599],
scale_ratio_range=[0.85, 1.15],
shift_height=True),
dict(type='IndoorPointSample', num_points=5),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(
type='Collect3D',
keys=[
'img', 'gt_bboxes', 'gt_labels', 'points', 'gt_bboxes_3d',
'gt_labels_3d', 'calib'
])
]
modality = dict(use_lidar=True, use_camera=True)
return root_path, ann_file, class_names, pipelines, modality
def test_getitem():
np.random.seed(0)
root_path, ann_file, class_names, pipelines, modality = \
_generate_sunrgbd_dataset_config()
sunrgbd_dataset = SUNRGBDDataset(
root_path, ann_file, pipelines, modality=modality)
data = sunrgbd_dataset[0] data = sunrgbd_dataset[0]
points = data['points']._data points = data['points']._data
gt_bboxes_3d = data['gt_bboxes_3d']._data gt_bboxes_3d = data['gt_bboxes_3d']._data
...@@ -95,14 +147,41 @@ def test_getitem(): ...@@ -95,14 +147,41 @@ def test_getitem():
assert SUNRGBD_dataset.CLASSES != original_classes assert SUNRGBD_dataset.CLASSES != original_classes
assert SUNRGBD_dataset.CLASSES == ['bed', 'table'] assert SUNRGBD_dataset.CLASSES == ['bed', 'table']
# test multi-modality SUN RGB-D dataset
np.random.seed(0)
root_path, ann_file, class_names, multi_modality_pipelines, modality = \
_generate_sunrgbd_multi_modality_dataset_config()
sunrgbd_dataset = SUNRGBDDataset(
root_path, ann_file, multi_modality_pipelines, modality=modality)
data = sunrgbd_dataset[0]
points = data['points']._data
gt_bboxes_3d = data['gt_bboxes_3d']._data
gt_labels_3d = data['gt_labels_3d']._data
calib = data['calib']
img = data['img']._data
expected_Rt = np.array([[0.97959, 0.012593, -0.20061],
[0.012593, 0.99223, 0.12377],
[0.20061, -0.12377, 0.97182]])
expected_K = np.array([[529.5, 0., 0.], [0., 529.5, 0.], [365., 265., 1.]])
assert torch.allclose(points, expected_points, 1e-2)
assert torch.allclose(gt_bboxes_3d.tensor, expected_gt_bboxes_3d, 1e-3)
assert np.all(gt_labels_3d.numpy() == expected_gt_labels)
assert img.shape[:] == (3, 608, 832)
assert np.allclose(calib['Rt'], expected_Rt)
assert np.allclose(calib['K'], expected_K)
def test_evaluate(): def test_evaluate():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip() pytest.skip()
from mmdet3d.core.bbox.structures import DepthInstance3DBoxes from mmdet3d.core.bbox.structures import DepthInstance3DBoxes
root_path = './tests/data/sunrgbd' root_path, ann_file, _, pipelines, modality = \
ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl' _generate_sunrgbd_dataset_config()
sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file) sunrgbd_dataset = SUNRGBDDataset(
root_path, ann_file, pipelines, modality=modality)
results = [] results = []
pred_boxes = dict() pred_boxes = dict()
pred_boxes['boxes_3d'] = DepthInstance3DBoxes( pred_boxes['boxes_3d'] = DepthInstance3DBoxes(
...@@ -129,10 +208,12 @@ def test_show(): ...@@ -129,10 +208,12 @@ def test_show():
from os import path as osp from os import path as osp
from mmdet3d.core.bbox import DepthInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes
temp_dir = tempfile.mkdtemp() tmp_dir = tempfile.TemporaryDirectory()
root_path = './tests/data/sunrgbd' temp_dir = tmp_dir.name
ann_file = './tests/data/sunrgbd/sunrgbd_infos.pkl' root_path, ann_file, _, pipelines, modality = \
sunrgbd_dataset = SUNRGBDDataset(root_path, ann_file) _generate_sunrgbd_dataset_config()
sunrgbd_dataset = SUNRGBDDataset(
root_path, ann_file, pipelines, modality=modality)
boxes_3d = DepthInstance3DBoxes( boxes_3d = DepthInstance3DBoxes(
torch.tensor( torch.tensor(
[[1.1500, 4.2614, -1.0669, 1.3219, 2.1593, 1.0267, 1.6473], [[1.1500, 4.2614, -1.0669, 1.3219, 2.1593, 1.0267, 1.6473],
...@@ -152,3 +233,26 @@ def test_show(): ...@@ -152,3 +233,26 @@ def test_show():
mmcv.check_file_exist(pts_file_path) mmcv.check_file_exist(pts_file_path)
mmcv.check_file_exist(gt_file_path) mmcv.check_file_exist(gt_file_path)
mmcv.check_file_exist(pred_file_path) mmcv.check_file_exist(pred_file_path)
tmp_dir.cleanup()
# test multi-modality show
tmp_dir = tempfile.TemporaryDirectory()
temp_dir = tmp_dir.name
root_path, ann_file, _, multi_modality_pipelines, modality = \
_generate_sunrgbd_multi_modality_dataset_config()
sunrgbd_dataset = SUNRGBDDataset(
root_path, ann_file, multi_modality_pipelines, modality=modality)
sunrgbd_dataset.show(results, temp_dir, show=False)
pts_file_path = osp.join(temp_dir, '000001', '000001_points.obj')
gt_file_path = osp.join(temp_dir, '000001', '000001_gt.obj')
pred_file_path = osp.join(temp_dir, '000001', '000001_pred.obj')
img_file_path = osp.join(temp_dir, '000001', '000001_img.png')
img_pred_path = osp.join(temp_dir, '000001', '000001_pred.png')
img_gt_file = osp.join(temp_dir, '000001', '000001_gt.png')
mmcv.check_file_exist(pts_file_path)
mmcv.check_file_exist(gt_file_path)
mmcv.check_file_exist(pred_file_path)
mmcv.check_file_exist(img_file_path)
mmcv.check_file_exist(img_pred_path)
mmcv.check_file_exist(img_gt_file)
tmp_dir.cleanup()
...@@ -9,7 +9,7 @@ from os.path import dirname, exists, join ...@@ -9,7 +9,7 @@ from os.path import dirname, exists, join
from mmdet3d.apis import (convert_SyncBN, inference_detector, init_detector, from mmdet3d.apis import (convert_SyncBN, inference_detector, init_detector,
show_result_meshlab, single_gpu_test) show_result_meshlab, single_gpu_test)
from mmdet3d.core import Box3DMode from mmdet3d.core import Box3DMode
from mmdet3d.core.bbox import LiDARInstance3DBoxes from mmdet3d.core.bbox import DepthInstance3DBoxes, LiDARInstance3DBoxes
from mmdet3d.datasets import build_dataloader, build_dataset from mmdet3d.datasets import build_dataloader, build_dataset
from mmdet3d.models import build_detector from mmdet3d.models import build_detector
...@@ -65,19 +65,122 @@ def test_show_result_meshlab(): ...@@ -65,19 +65,122 @@ def test_show_result_meshlab():
pts_bbox=dict( pts_bbox=dict(
boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d)) boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
] ]
temp_out_dir = tempfile.mkdtemp() tmp_dir = tempfile.TemporaryDirectory()
temp_out_dir = tmp_dir.name
out_dir, file_name = show_result_meshlab(data, result, temp_out_dir) out_dir, file_name = show_result_meshlab(data, result, temp_out_dir)
expected_outfile_ply = file_name + '_pred.obj' expected_outfile_pred = file_name + '_pred.obj'
expected_outfile_obj = file_name + '_points.obj' expected_outfile_pts = file_name + '_points.obj'
expected_outfile_ply_path = os.path.join(out_dir, file_name, expected_outfile_pred_path = os.path.join(out_dir, file_name,
expected_outfile_ply) expected_outfile_pred)
expected_outfile_obj_path = os.path.join(out_dir, file_name, expected_outfile_pts_path = os.path.join(out_dir, file_name,
expected_outfile_obj) expected_outfile_pts)
assert os.path.exists(expected_outfile_ply_path) assert os.path.exists(expected_outfile_pred_path)
assert os.path.exists(expected_outfile_obj_path) assert os.path.exists(expected_outfile_pts_path)
os.remove(expected_outfile_obj_path) tmp_dir.cleanup()
os.remove(expected_outfile_ply_path)
os.removedirs(os.path.join(temp_out_dir, file_name)) # test multi-modality show
# Indoor scene
pcd = 'tests/data/sunrgbd/points/000001.bin'
filename = 'tests/data/sunrgbd/sunrgbd_trainval/image/000001.jpg'
box_3d = DepthInstance3DBoxes(
torch.tensor(
[[-1.1580, 3.3041, -0.9961, 0.3829, 0.4647, 0.5574, 1.1213]]))
img = np.random.randn(1, 3, 608, 832)
K = np.array([[[529.5000, 0.0000, 365.0000], [0.0000, 529.5000, 265.0000],
[0.0000, 0.0000, 1.0000]]])
Rt = torch.tensor([[[0.9980, 0.0058, -0.0634], [0.0058, 0.9835, 0.1808],
[0.0634, -0.1808, 0.9815]]])
img_meta = dict(
filename=filename,
pcd_horizontal_flip=False,
pcd_vertical_flip=False,
box_mode_3d=Box3DMode.DEPTH,
box_type_3d=DepthInstance3DBoxes,
pcd_trans=np.array([0., 0., 0.]),
pcd_scale_factor=1.0,
pts_filename=pcd,
transformation_3d_flow=['R', 'S', 'T'])
calib = dict(K=K, Rt=Rt)
data = dict(
points=[[torch.tensor(points)]],
img_metas=[[img_meta]],
img=[img],
calib=[calib])
result = [
dict(
pts_bbox=dict(
boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
]
tmp_dir = tempfile.TemporaryDirectory()
temp_out_dir = tmp_dir.name
out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.3)
expected_outfile_pred = file_name + '_pred.obj'
expected_outfile_pts = file_name + '_points.obj'
expected_outfile_png = file_name + '_img.png'
expected_outfile_proj = file_name + '_pred.png'
expected_outfile_pred_path = os.path.join(out_dir, file_name,
expected_outfile_pred)
expected_outfile_pts_path = os.path.join(out_dir, file_name,
expected_outfile_pts)
expected_outfile_png_path = os.path.join(out_dir, file_name,
expected_outfile_png)
expected_outfile_proj_path = os.path.join(out_dir, file_name,
expected_outfile_proj)
assert os.path.exists(expected_outfile_pred_path)
assert os.path.exists(expected_outfile_pts_path)
assert os.path.exists(expected_outfile_png_path)
assert os.path.exists(expected_outfile_proj_path)
tmp_dir.cleanup()
# outdoor scene
pcd = 'tests/data/kitti/training/velodyne_reduced/000000.bin'
filename = 'tests/data/kitti/training/image_2/000000.png'
box_3d = LiDARInstance3DBoxes(
torch.tensor(
[[6.4495, -3.9097, -1.7409, 1.5063, 3.1819, 1.4716, 1.8782]]))
img = np.random.randn(1, 3, 384, 1280)
lidar2img = np.array(
[[6.09695435e+02, -7.21421631e+02, -1.25125790e+00, -1.23041824e+02],
[1.80384201e+02, 7.64479828e+00, -7.19651550e+02, -1.01016693e+02],
[9.99945343e-01, 1.24365499e-04, 1.04513029e-02, -2.69386917e-01],
[0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 1.00000000e+00]])
img_meta = dict(
filename=filename,
pcd_horizontal_flip=False,
pcd_vertical_flip=False,
box_mode_3d=Box3DMode.LIDAR,
box_type_3d=LiDARInstance3DBoxes,
pcd_trans=np.array([0., 0., 0.]),
pcd_scale_factor=1.0,
pts_filename=pcd,
lidar2img=lidar2img)
data = dict(
points=[[torch.tensor(points)]], img_metas=[[img_meta]], img=[img])
result = [
dict(
pts_bbox=dict(
boxes_3d=box_3d, labels_3d=labels_3d, scores_3d=scores_3d))
]
out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.1)
tmp_dir = tempfile.TemporaryDirectory()
temp_out_dir = tmp_dir.name
out_dir, file_name = show_result_meshlab(data, result, temp_out_dir, 0.3)
expected_outfile_pred = file_name + '_pred.obj'
expected_outfile_pts = file_name + '_points.obj'
expected_outfile_png = file_name + '_img.png'
expected_outfile_proj = file_name + '_pred.png'
expected_outfile_pred_path = os.path.join(out_dir, file_name,
expected_outfile_pred)
expected_outfile_pts_path = os.path.join(out_dir, file_name,
expected_outfile_pts)
expected_outfile_png_path = os.path.join(out_dir, file_name,
expected_outfile_png)
expected_outfile_proj_path = os.path.join(out_dir, file_name,
expected_outfile_proj)
assert os.path.exists(expected_outfile_pred_path)
assert os.path.exists(expected_outfile_pts_path)
assert os.path.exists(expected_outfile_png_path)
assert os.path.exists(expected_outfile_proj_path)
tmp_dir.cleanup()
def test_inference_detector(): def test_inference_detector():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment