import numpy as np from mmcv.parallel import DataContainer as DC from mmdet3d.core.bbox import BaseInstance3DBoxes from mmdet.datasets.builder import PIPELINES from mmdet.datasets.pipelines import to_tensor PIPELINES._module_dict.pop('DefaultFormatBundle') @PIPELINES.register_module() class DefaultFormatBundle(object): """Default formatting bundle. It simplifies the pipeline of formatting common fields, including "img", "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg". These fields are formatted as follows. - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) - proposals: (1)to tensor, (2)to DataContainer - gt_bboxes: (1)to tensor, (2)to DataContainer - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer - gt_labels: (1)to tensor, (2)to DataContainer - gt_masks: (1)to tensor, (2)to DataContainer (cpu_only=True) - gt_semantic_seg: (1)unsqueeze dim-0 (2)to tensor, (3)to DataContainer (stack=True) """ def __init__(self, ): return def __call__(self, results): if 'img' in results: if isinstance(results['img'], list): # process multiple imgs in single frame imgs = [img.transpose(2, 0, 1) for img in results['img']] imgs = np.ascontiguousarray(np.stack(imgs, axis=0)) results['img'] = DC(to_tensor(imgs), stack=True) else: img = np.ascontiguousarray(results['img'].transpose(2, 0, 1)) results['img'] = DC(to_tensor(img), stack=True) for key in [ 'proposals', 'gt_bboxes', 'gt_bboxes_ignore', 'gt_labels', 'gt_labels_3d', 'pts_instance_mask', 'pts_semantic_mask' ]: if key not in results: continue if isinstance(results[key], list): results[key] = DC([to_tensor(res) for res in results[key]]) else: results[key] = DC(to_tensor(results[key])) if 'gt_bboxes_3d' in results: if isinstance(results['gt_bboxes_3d'], BaseInstance3DBoxes): results['gt_bboxes_3d'] = DC( results['gt_bboxes_3d'], cpu_only=True) else: results['gt_bboxes_3d'] = DC( to_tensor(results['gt_bboxes_3d'])) if 'gt_masks' in results: results['gt_masks'] = DC(results['gt_masks'], cpu_only=True) if 'gt_semantic_seg' in results: results['gt_semantic_seg'] = DC( to_tensor(results['gt_semantic_seg'][None, ...]), stack=True) return results def __repr__(self): return self.__class__.__name__ @PIPELINES.register_module() class Collect3D(object): def __init__(self, keys, meta_keys=('filename', 'ori_shape', 'img_shape', 'lidar2img', 'pad_shape', 'scale_factor', 'flip', 'pcd_flip', 'box_mode_3d', 'box_type_3d', 'img_norm_cfg', 'rect', 'Trv2c', 'P2', 'pcd_trans', 'sample_idx', 'pcd_scale_factor', 'pcd_rotation')): self.keys = keys self.meta_keys = meta_keys def __call__(self, results): data = {} img_meta = {} for key in self.meta_keys: if key in results: img_meta[key] = results[key] data['img_meta'] = DC(img_meta, cpu_only=True) for key in self.keys: data[key] = results[key] return data def __repr__(self): return self.__class__.__name__ + '(keys={}, meta_keys={})'.format( self.keys, self.meta_keys) @PIPELINES.register_module() class DefaultFormatBundle3D(DefaultFormatBundle): """Default formatting bundle. It simplifies the pipeline of formatting common fields for voxels, including "proposals", "gt_bboxes", "gt_labels", "gt_masks" and "gt_semantic_seg". These fields are formatted as follows. - img: (1)transpose, (2)to tensor, (3)to DataContainer (stack=True) - proposals: (1)to tensor, (2)to DataContainer - gt_bboxes: (1)to tensor, (2)to DataContainer - gt_bboxes_ignore: (1)to tensor, (2)to DataContainer - gt_labels: (1)to tensor, (2)to DataContainer """ def __init__(self, class_names, with_gt=True, with_label=True): super(DefaultFormatBundle3D, self).__init__() self.class_names = class_names self.with_gt = with_gt self.with_label = with_label def __call__(self, results): # Format 3D data for key in [ 'voxels', 'coors', 'voxel_centers', 'num_points', 'points' ]: if key not in results: continue results[key] = DC(to_tensor(results[key]), stack=False) if self.with_gt: # Clean GT bboxes in the final if 'gt_bboxes_3d_mask' in results: gt_bboxes_3d_mask = results['gt_bboxes_3d_mask'] results['gt_bboxes_3d'] = results['gt_bboxes_3d'][ gt_bboxes_3d_mask] if 'gt_names_3d' in results: results['gt_names_3d'] = results['gt_names_3d'][ gt_bboxes_3d_mask] if 'gt_bboxes_mask' in results: gt_bboxes_mask = results['gt_bboxes_mask'] if 'gt_bboxes' in results: results['gt_bboxes'] = results['gt_bboxes'][gt_bboxes_mask] results['gt_names'] = results['gt_names'][gt_bboxes_mask] if self.with_label: if 'gt_names' in results and len(results['gt_names']) == 0: results['gt_labels'] = np.array([], dtype=np.int64) elif 'gt_names' in results and isinstance( results['gt_names'][0], list): # gt_labels might be a list of list in multi-view setting results['gt_labels'] = [ np.array([self.class_names.index(n) for n in res], dtype=np.int64) for res in results['gt_names'] ] elif 'gt_names' in results: results['gt_labels'] = np.array([ self.class_names.index(n) for n in results['gt_names'] ], dtype=np.int64) # we still assume one pipeline for one frame LiDAR # thus, the 3D name is list[string] if 'gt_names_3d' in results: results['gt_labels_3d'] = np.array([ self.class_names.index(n) for n in results['gt_names_3d'] ], dtype=np.int64) results = super(DefaultFormatBundle3D, self).__call__(results) return results def __repr__(self): repr_str = self.__class__.__name__ repr_str += '(class_names={}, '.format(self.class_names) repr_str += 'with_gt={}, with_label={})'.format( self.with_gt, self.with_label) return repr_str