import mmcv import numpy as np import re import torch from copy import deepcopy from mmcv.parallel import collate, scatter from mmcv.runner import load_checkpoint from os import path as osp from mmdet3d.core import (Box3DMode, DepthInstance3DBoxes, LiDARInstance3DBoxes, show_multi_modality_result, show_result) from mmdet3d.core.bbox import get_box_type from mmdet3d.datasets.pipelines import Compose from mmdet3d.models import build_detector def convert_SyncBN(config): """Convert config's naiveSyncBN to BN. Args: config (str or :obj:`mmcv.Config`): Config file path or the config object. """ if isinstance(config, dict): for item in config: if item == 'norm_cfg': config[item]['type'] = config[item]['type']. \ replace('naiveSyncBN', 'BN') else: convert_SyncBN(config[item]) def init_detector(config, checkpoint=None, device='cuda:0'): """Initialize a detector from config file. Args: config (str or :obj:`mmcv.Config`): Config file path or the config object. checkpoint (str, optional): Checkpoint path. If left as None, the model will not load any weights. device (str): Device to use. Returns: nn.Module: The constructed detector. """ if isinstance(config, str): config = mmcv.Config.fromfile(config) elif not isinstance(config, mmcv.Config): raise TypeError('config must be a filename or Config object, ' f'but got {type(config)}') config.model.pretrained = None convert_SyncBN(config.model) config.model.train_cfg = None model = build_detector(config.model, test_cfg=config.get('test_cfg')) if checkpoint is not None: checkpoint = load_checkpoint(model, checkpoint) if 'CLASSES' in checkpoint['meta']: model.CLASSES = checkpoint['meta']['CLASSES'] else: model.CLASSES = config.class_names model.cfg = config # save the config in the model for convenience model.to(device) model.eval() return model def inference_detector(model, pcd): """Inference point cloud with the detector. Args: model (nn.Module): The loaded detector. pcd (str): Point cloud files. Returns: tuple: Predicted results and data from pipeline. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = deepcopy(cfg.data.test.pipeline) test_pipeline = Compose(test_pipeline) box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d) data = dict( pts_filename=pcd, box_type_3d=box_type_3d, box_mode_3d=box_mode_3d, sweeps=[], # set timestamp = 0 timestamp=[0], img_fields=[], bbox3d_fields=[], pts_mask_fields=[], pts_seg_fields=[], bbox_fields=[], mask_fields=[], seg_fields=[]) data = test_pipeline(data) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device.index])[0] else: # this is a workaround to avoid the bug of MMDataParallel data['img_metas'] = data['img_metas'][0].data data['points'] = data['points'][0].data # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result, data def inference_multi_modality_detector(model, pcd, image, ann_file): """Inference point cloud with the multimodality detector. Args: model (nn.Module): The loaded detector. pcd (str): Point cloud files. image (str): Image files. ann_file (str): Annotation files. Returns: tuple: Predicted results and data from pipeline. """ cfg = model.cfg device = next(model.parameters()).device # model device # build the data pipeline test_pipeline = deepcopy(cfg.data.test.pipeline) test_pipeline = Compose(test_pipeline) box_type_3d, box_mode_3d = get_box_type(cfg.data.test.box_type_3d) # get data info containing calib data_infos = mmcv.load(ann_file) image_idx = int(re.findall(r'\d+', image)[-1]) # xxx/sunrgbd_000017.jpg for x in data_infos: if int(x['image']['image_idx']) != image_idx: continue info = x break data = dict( pts_filename=pcd, img_prefix=osp.dirname(image), img_info=dict(filename=osp.basename(image)), box_type_3d=box_type_3d, box_mode_3d=box_mode_3d, img_fields=[], bbox3d_fields=[], pts_mask_fields=[], pts_seg_fields=[], bbox_fields=[], mask_fields=[], seg_fields=[]) # depth map points to image conversion if box_mode_3d == Box3DMode.DEPTH: data.update(dict(calib=info['calib'])) data = test_pipeline(data) # LiDAR to image conversion if box_mode_3d == Box3DMode.LIDAR: rect = info['calib']['R0_rect'].astype(np.float32) Trv2c = info['calib']['Tr_velo_to_cam'].astype(np.float32) P2 = info['calib']['P2'].astype(np.float32) lidar2img = P2 @ rect @ Trv2c data['img_metas'][0].data['lidar2img'] = lidar2img elif box_mode_3d == Box3DMode.DEPTH: data['calib'][0]['Rt'] = data['calib'][0]['Rt'].astype(np.float32) data['calib'][0]['K'] = data['calib'][0]['K'].astype(np.float32) data = collate([data], samples_per_gpu=1) if next(model.parameters()).is_cuda: # scatter to specified GPU data = scatter(data, [device.index])[0] else: # this is a workaround to avoid the bug of MMDataParallel data['img_metas'] = data['img_metas'][0].data data['points'] = data['points'][0].data data['img'] = data['img'][0].data if box_mode_3d == Box3DMode.DEPTH: data['calib'] = data['calib'][0].data # forward the model with torch.no_grad(): result = model(return_loss=False, rescale=True, **data) return result, data def show_result_meshlab(data, result, out_dir, score_thr=0.0, show=False, snapshot=False): """Show result by meshlab. Args: data (dict): Contain data from pipeline. result (dict): Predicted result from model. out_dir (str): Directory to save visualized result. score_thr (float): Minimum score of bboxes to be shown. Default: 0.0 show (bool): Visualize the results online. Defaults to False. snapshot (bool): Whether to save the online results. Defaults to False. """ points = data['points'][0][0].cpu().numpy() pts_filename = data['img_metas'][0][0]['pts_filename'] file_name = osp.split(pts_filename)[-1].split('.')[0] assert out_dir is not None, 'Expect out_dir, got none.' if 'pts_bbox' in result[0].keys(): pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy() pred_scores = result[0]['pts_bbox']['scores_3d'].numpy() else: pred_bboxes = result[0]['boxes_3d'].tensor.numpy() pred_scores = result[0]['scores_3d'].numpy() # filter out low score bboxes for visualization if score_thr > 0: inds = pred_scores > score_thr pred_bboxes = pred_bboxes[inds] # for now we convert points into depth mode box_mode = data['img_metas'][0][0]['box_mode_3d'] if box_mode != Box3DMode.DEPTH: points = points[..., [1, 0, 2]] points[..., 0] *= -1 show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH) else: show_bboxes = deepcopy(pred_bboxes) show_result( points, None, show_bboxes, out_dir, file_name, show=show, snapshot=snapshot) if 'img' not in data.keys(): return out_dir, file_name # multi-modality visualization # project 3D bbox to 2D image plane if box_mode == Box3DMode.LIDAR: if 'lidar2img' not in data['img_metas'][0][0]: raise NotImplementedError( 'LiDAR to image transformation matrix is not provided') show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0)) img = mmcv.imread(data['img_metas'][0][0]['filename']) show_multi_modality_result( img, None, show_bboxes, data['img_metas'][0][0]['lidar2img'], out_dir, file_name, show=show) elif box_mode == Box3DMode.DEPTH: if 'calib' not in data.keys(): raise NotImplementedError( 'camera calibration information is not provided') show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0)) img = mmcv.imread(data['img_metas'][0][0]['filename']) show_multi_modality_result( img, None, show_bboxes, data['calib'][0], out_dir, file_name, depth_bbox=True, img_metas=data['img_metas'][0][0], show=show) else: raise NotImplementedError( f'visualization of {box_mode} bbox is not supported') return out_dir, file_name