finish scannet_converter and sunrgbd_converter

97e4aeb7 · liyinhao · 6d71b439 · 97e4aeb7 · 97e4aeb7 · 97e4aeb7
Commit 97e4aeb7 authored Apr 25, 2020 by liyinhao
5 changed files
--- a/.isort.cfg
+++ b/.isort.cfg
@@ -3,6 +3,6 @@ line_length = 79
 multi_line_output = 0
 known_standard_library = setuptools
 known_first_party = mmdet,mmdet3d
-known_third_party = cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,shapely,six,skimage,torch,torchvision
+known_third_party = cv2,mmcv,numba,numpy,nuscenes,pycocotools,pyquaternion,scipy,shapely,six,skimage,torch,torchvision
 no_lines_before = STDLIB,LOCALFOLDER
 default_section = THIRDPARTY
--- a/tools/data_converter/scannet_converter.py
+++ b/tools/data_converter/scannet_converter.py
+import pickle
+from pathlib import Path
+from tools.data_converter.scannet_data_utils import ScannetObject
+def create_scannet_info_file(data_path,
+                             pkl_prefix='scannet_',
+                             save_path=None,
+                             relative_path=True):
+    if save_path is None:
+        save_path = Path(data_path)
+    else:
+        save_path = Path(save_path)
+    train_filename = save_path / f'{pkl_prefix}_infos_train.pkl'
+    val_filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    dataset = ScannetObject(root_path=data_path, split='train')
+    train_split, val_split = 'train', 'val'
+    dataset.set_split(train_split)
+    scannet_infos_train = dataset.get_scannet_infos(has_label=True)
+    with open(train_filename, 'wb') as f:
+        pickle.dump(scannet_infos_train, f)
+    print('Scannet info train file is saved to %s' % train_filename)
+    dataset.set_split(val_split)
+    scannet_infos_val = dataset.get_scannet_infos(has_label=True)
+    with open(val_filename, 'wb') as f:
+        pickle.dump(scannet_infos_val, f)
+    print('Scannet info val file is saved to %s' % val_filename)
+if __name__ == '__main__':
+    create_scannet_info_file(
+        data_path='./data/scannet', save_path='./data/scannet')
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
+import os
+import numpy as np
+class ScannetObject(object):
+    ''' Load and parse object data '''
+    def __init__(self, root_path, split='train'):
+        self.root_dir = root_path
+        self.split = split
+        self.split_dir = os.path.join(root_path)
+        self.type2class = {
+            'cabinet': 0,
+            'bed': 1,
+            'chair': 2,
+            'sofa': 3,
+            'table': 4,
+            'door': 5,
+            'window': 6,
+            'bookshelf': 7,
+            'picture': 8,
+            'counter': 9,
+            'desk': 10,
+            'curtain': 11,
+            'refrigerator': 12,
+            'showercurtrain': 13,
+            'toilet': 14,
+            'sink': 15,
+            'bathtub': 16,
+            'garbagebin': 17
+        }
+        self.class2type = {self.type2class[t]: t for t in self.type2class}
+        self.nyu40ids = np.array(
+            [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36, 39])
+        self.nyu40id2class = {
+            nyu40id: i
+            for i, nyu40id in enumerate(list(self.nyu40ids))
+        }
+        assert split in ['train', 'val', 'test']
+        split_dir = os.path.join(self.root_dir, 'meta_data',
+                                 'scannetv2_%s.txt' % split)
+        self.sample_id_list = [x.strip() for x in open(split_dir).readlines()
+                               ] if os.path.exists(split_dir) else None
+    def __len__(self):
+        return len(self.sample_id_list)
+    def set_split(self, split):
+        self.__init__(self.root_dir, split)
+    def get_box_label(self, idx):
+        box_file = os.path.join(self.root_dir, 'scannet_train_instance_data',
+                                '%s_bbox.npy' % idx)
+        assert os.path.exists(box_file)
+        return np.load(box_file)
+    def get_scannet_infos(self,
+                          num_workers=4,
+                          has_label=True,
+                          sample_id_list=None):
+        import concurrent.futures as futures
+        def process_single_scene(sample_idx):
+            print('%s sample_idx: %s' % (self.split, sample_idx))
+            info = dict()
+            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
+            info['point_cloud'] = pc_info
+            if has_label:
+                annotations = {}
+                boxes_with_classes = self.get_box_label(
+                    sample_idx)  # k, 6 + class
+                annotations['gt_num'] = boxes_with_classes.shape[0]
+                if annotations['gt_num'] != 0:
+                    minmax_boxes3d = boxes_with_classes[:, :-1]  # k, 6
+                    classes = boxes_with_classes[:, -1]  # k, 1
+                    annotations['name'] = np.array([
+                        self.class2type[self.nyu40id2class[classes[i]]]
+                        for i in range(annotations['gt_num'])
+                    ])
+                    annotations['location'] = minmax_boxes3d[:, :3]
+                    annotations['dimensions'] = minmax_boxes3d[:, 3:6]
+                    annotations['gt_boxes_upright_depth'] = minmax_boxes3d
+                    annotations['index'] = np.arange(
+                        annotations['gt_num'], dtype=np.int32)
+                    annotations['class'] = np.array([
+                        self.nyu40id2class[classes[i]]
+                        for i in range(annotations['gt_num'])
+                    ])
+                info['annos'] = annotations
+            return info
+        sample_id_list = sample_id_list if sample_id_list is not None \
+            else self.sample_id_list
+        with futures.ThreadPoolExecutor(num_workers) as executor:
+            infos = executor.map(process_single_scene, sample_id_list)
+        # infos = list()
+        # for sample in sample_id_list:
+        #     infos.append(process_single_scene(sample))
+        return list(infos)
--- a/tools/data_converter/sunrgbd_converter.py
+++ b/tools/data_converter/sunrgbd_converter.py
+import pickle
+from pathlib import Path
+from tools.data_converter.sunrgbd_data_utils import SUNRGBDObject
+def create_sunrgbd_info_file(data_path,
+                             pkl_prefix='sunrgbd_',
+                             save_path=None,
+                             relative_path=True):
+    if save_path is None:
+        save_path = Path(data_path)
+    else:
+        save_path = Path(save_path)
+    train_filename = save_path / f'{pkl_prefix}_infos_train.pkl'
+    val_filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    dataset = SUNRGBDObject(root_path=data_path, split='train')
+    train_split, val_split = 'train', 'val'
+    dataset.set_split(train_split)
+    sunrgbd_infos_train = dataset.get_sunrgbd_infos(has_label=True)
+    with open(train_filename, 'wb') as f:
+        pickle.dump(sunrgbd_infos_train, f)
+    print('Sunrgbd info train file is saved to %s' % train_filename)
+    dataset.set_split(val_split)
+    sunrgbd_infos_val = dataset.get_sunrgbd_infos(has_label=True)
+    with open(val_filename, 'wb') as f:
+        pickle.dump(sunrgbd_infos_val, f)
+    print('Sunrgbd info val file is saved to %s' % val_filename)
+if __name__ == '__main__':
+    create_sunrgbd_info_file(
+        data_path='./data/sunrgbd/sunrgbd_trainval',
+        save_path='./data/sunrgbd')
--- a/tools/data_converter/sunrgbd_data_utils.py
+++ b/tools/data_converter/sunrgbd_data_utils.py
+import os
+import cv2
+import numpy as np
+import scipy.io as sio
+def random_sampling(pc, num_sample, replace=None, return_choices=False):
+    """ Input is NxC, output is num_samplexC
+    """
+    if replace is None:
+        replace = (pc.shape[0] < num_sample)
+    choices = np.random.choice(pc.shape[0], num_sample, replace=replace)
+    if return_choices:
+        return pc[choices], choices
+    else:
+        return pc[choices]
+class SUNObject3d(object):
+    def __init__(self, line):
+        data = line.split(' ')
+        data[1:] = [float(x) for x in data[1:]]
+        self.classname = data[0]
+        self.xmin = data[1]
+        self.ymin = data[2]
+        self.xmax = data[1] + data[3]
+        self.ymax = data[2] + data[4]
+        self.box2d = np.array([self.xmin, self.ymin, self.xmax, self.ymax])
+        self.centroid = np.array([data[5], data[6], data[7]])
+        self.w = data[8]
+        self.l = data[9]  # noqa: E741
+        self.h = data[10]
+        self.orientation = np.zeros((3, ))
+        self.orientation[0] = data[11]
+        self.orientation[1] = data[12]
+        self.heading_angle = -1 * np.arctan2(self.orientation[1],
+                                             self.orientation[0])
+        self.box3d = np.concatenate([
+            self.centroid,
+            np.array([self.l * 2, self.w * 2, self.h * 2, self.heading_angle])
+        ])
+class SUNRGBDObject(object):
+    ''' Load and parse object data '''
+    def __init__(self, root_path, split='train', use_v1=False):
+        self.root_dir = root_path
+        self.split = split
+        self.split_dir = os.path.join(root_path)
+        self.type2class = {
+            'bed': 0,
+            'table': 1,
+            'sofa': 2,
+            'chair': 3,
+            'toilet': 4,
+            'desk': 5,
+            'dresser': 6,
+            'night_stand': 7,
+            'bookshelf': 8,
+            'bathtub': 9
+        }
+        self.class2type = {
+            0: 'bed',
+            1: 'table',
+            2: 'sofa',
+            3: 'chair',
+            4: 'toilet',
+            5: 'desk',
+            6: 'dresser',
+            7: 'night_stand',
+            8: 'bookshelf',
+            9: 'bathtub'
+        }
+        assert split in ['train', 'val', 'test']
+        split_dir = os.path.join(self.root_dir, '%s_data_idx.txt' % split)
+        self.sample_id_list = [
+            int(x.strip()) for x in open(split_dir).readlines()
+        ] if os.path.exists(split_dir) else None
+        self.image_dir = os.path.join(self.split_dir, 'image')
+        self.calib_dir = os.path.join(self.split_dir, 'calib')
+        self.depth_dir = os.path.join(self.split_dir, 'depth')
+        if use_v1:
+            self.label_dir = os.path.join(self.split_dir, 'label_v1')
+        else:
+            self.label_dir = os.path.join(self.split_dir, 'label')
+    def __len__(self):
+        return len(self.sample_id_list)
+    def set_split(self, split):
+        self.__init__(self.root_dir, split)
+    def get_image(self, idx):
+        img_filename = os.path.join(self.image_dir, '%06d.jpg' % (idx))
+        return cv2.imread(img_filename)
+    def get_image_shape(self, idx):
+        image = self.get_image(idx)
+        return np.array(image.shape[:2], dtype=np.int32)
+    def get_depth(self, idx):
+        depth_filename = os.path.join(self.depth_dir, '%06d.mat' % (idx))
+        depth = sio.loadmat(depth_filename)['instance']
+        return depth
+    def get_calibration(self, idx):
+        calib_filepath = os.path.join(self.calib_dir, '%06d.txt' % (idx))
+        lines = [line.rstrip() for line in open(calib_filepath)]
+        Rt = np.array([float(x) for x in lines[0].split(' ')])
+        Rt = np.reshape(Rt, (3, 3), order='F')
+        K = np.array([float(x) for x in lines[1].split(' ')])
+        return K, Rt
+    def get_label_objects(self, idx):
+        label_filename = os.path.join(self.label_dir, '%06d.txt' % (idx))
+        lines = [line.rstrip() for line in open(label_filename)]
+        objects = [SUNObject3d(line) for line in lines]
+        return objects
+    def get_sunrgbd_infos(self,
+                          num_workers=4,
+                          has_label=True,
+                          sample_id_list=None):
+        import concurrent.futures as futures
+        def process_single_scene(sample_idx):
+            print('%s sample_idx: %s' % (self.split, sample_idx))
+            # convert depth to points
+            SAMPLE_NUM = 50000
+            pc_upright_depth = self.get_depth(sample_idx)
+            pc_upright_depth_subsampled = random_sampling(
+                pc_upright_depth, SAMPLE_NUM)
+            np.savez_compressed(
+                os.path.join(self.root_dir, 'lidar', '%06d.npz' % sample_idx),
+                pc=pc_upright_depth_subsampled)
+            info = dict()
+            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
+            info['point_cloud'] = pc_info
+            image_info = {
+                'image_idx': sample_idx,
+                'image_shape': self.get_image_shape(sample_idx)
+            }
+            info['image'] = image_info
+            K, Rt = self.get_calibration(sample_idx)
+            calib_info = {'K': K, 'Rt': Rt}
+            info['calib'] = calib_info
+            if has_label:
+                obj_list = self.get_label_objects(sample_idx)
+                annotations = {}
+                annotations['gt_num'] = len([
+                    obj.classname for obj in obj_list
+                    if obj.classname in self.type2class.keys()
+                ])
+                if annotations['gt_num'] != 0:
+                    annotations['name'] = np.array([
+                        obj.classname for obj in obj_list
+                        if obj.classname in self.type2class.keys()
+                    ])
+                    annotations['bbox'] = np.concatenate([
+                        obj.box2d.reshape(1, 4) for obj in obj_list
+                        if obj.classname in self.type2class.keys()
+                    ],
+                                                         axis=0)
+                    annotations['location'] = np.concatenate([
+                        obj.centroid.reshape(1, 3) for obj in obj_list
+                        if obj.classname in self.type2class.keys()
+                    ],
+                                                             axis=0)
+                    annotations['dimensions'] = 2 * np.array([
+                        [obj.l, obj.h, obj.w] for obj in obj_list
+                        if obj.classname in self.type2class.keys()
+                    ])  # lhw(depth) format
+                    annotations['rotation_y'] = np.array([
+                        obj.heading_angle for obj in obj_list
+                        if obj.classname in self.type2class.keys()
+                    ])
+                    annotations['index'] = np.arange(
+                        len(obj_list), dtype=np.int32)
+                    annotations['class'] = np.array([
+                        self.type2class[obj.classname] for obj in obj_list
+                        if obj.classname in self.type2class.keys()
+                    ])
+                    annotations['gt_boxes_upright_depth'] = np.stack(
+                        [
+                            obj.box3d for obj in obj_list
+                            if obj.classname in self.type2class.keys()
+                        ],
+                        axis=0)  # (K,8)
+                info['annos'] = annotations
+            return info
+        lidar_save_dir = os.path.join(self.root_dir, 'lidar')
+        if not os.path.exists(lidar_save_dir):
+            os.mkdir(lidar_save_dir)
+        sample_id_list = sample_id_list if \
+            sample_id_list is not None else self.sample_id_list
+        with futures.ThreadPoolExecutor(num_workers) as executor:
+            infos = executor.map(process_single_scene, sample_id_list)
+        return list(infos)