Change data converter

49a2bc85 · liyinhao · zhangwenwei · c42ad958 · 49a2bc85 · 49a2bc85
Commit 49a2bc85 authored Jun 01, 2020 by liyinhao Committed by zhangwenwei Jun 01, 2020
3 changed files
--- a/tools/data_converter/indoor_converter.py
+++ b/tools/data_converter/indoor_converter.py
@@ -8,7 +8,8 @@ from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
 def create_indoor_info_file(data_path,
                            pkl_prefix='sunrgbd',
                            save_path=None,
-                            use_v1=False):
+                            use_v1=False,
+                            workers=4):
    """Create indoor information file.

    Get information of the raw data and save it to the pkl file.
@@ -18,6 +19,7 @@ def create_indoor_info_file(data_path,
        pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
        save_path (str): Path of the pkl to be saved. Default: None.
        use_v1 (bool): Whether to use v1. Default: False.
+        workers (int): Number of threads to be used. Default: 4.
    """
    assert os.path.exists(data_path)
    assert pkl_prefix in ['sunrgbd', 'scannet']
@@ -35,10 +37,10 @@ def create_indoor_info_file(data_path,
        train_dataset = ScanNetData(root_path=data_path, split='train')
        val_dataset = ScanNetData(root_path=data_path, split='val')

-    infos_train = train_dataset.get_infos(has_label=True)
+    infos_train = train_dataset.get_infos(num_workers=workers, has_label=True)
    mmcv.dump(infos_train, train_filename, 'pkl')
    print(f'{pkl_prefix} info train file is saved to {train_filename}')

-    infos_val = val_dataset.get_infos(has_label=True)
+    infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
    mmcv.dump(infos_val, val_filename, 'pkl')
    print(f'{pkl_prefix} info val file is saved to {val_filename}')
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
 import concurrent.futures as futures
-import os
+import os.path as osp

 import mmcv
 import numpy as np
@@ -18,7 +18,7 @@ class ScanNetData(object):
    def __init__(self, root_path, split='train'):
        self.root_dir = root_path
        self.split = split
-        self.split_dir = os.path.join(root_path)
+        self.split_dir = osp.join(root_path)
        self.classes = [
            'cabinet', 'bed', 'chair', 'sofa', 'table', 'door', 'window',
            'bookshelf', 'picture', 'counter', 'desk', 'curtain',
@@ -34,8 +34,8 @@ class ScanNetData(object):
            for i, nyu40id in enumerate(list(self.cat_ids))
        }
        assert split in ['train', 'val', 'test']
-        split_file = os.path.join(self.root_dir, 'meta_data',
-                                  f'scannetv2_{split}.txt')
+        split_file = osp.join(self.root_dir, 'meta_data',
+                              f'scannetv2_{split}.txt')
        mmcv.check_file_exist(split_file)
        self.sample_id_list = mmcv.list_from_file(split_file)

@@ -43,9 +43,9 @@ class ScanNetData(object):
        return len(self.sample_id_list)

    def get_box_label(self, idx):
-        box_file = os.path.join(self.root_dir, 'scannet_train_instance_data',
-                                f'{idx}_bbox.npy')
-        assert os.path.exists(box_file)
+        box_file = osp.join(self.root_dir, 'scannet_train_instance_data',
+                            f'{idx}_bbox.npy')
+        mmcv.check_file_exist(box_file)
        return np.load(box_file)

    def get_infos(self, num_workers=4, has_label=True, sample_id_list=None):
@@ -68,6 +68,36 @@ class ScanNetData(object):
            info = dict()
            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info
+            pts_filename = osp.join(self.root_dir,
+                                    'scannet_train_instance_data',
+                                    f'{sample_idx}_vert.npy')
+            pts_instance_mask_path = osp.join(self.root_dir,
+                                              'scannet_train_instance_data',
+                                              f'{sample_idx}_ins_label.npy')
+            pts_semantic_mask_path = osp.join(self.root_dir,
+                                              'scannet_train_instance_data',
+                                              f'{sample_idx}_sem_label.npy')
+
+            points = np.load(pts_filename)
+            pts_instance_mask = np.load(pts_instance_mask_path).astype(np.long)
+            pts_semantic_mask = np.load(pts_semantic_mask_path).astype(np.long)
+
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'instance_mask'))
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'semantic_mask'))
+
+            points.tofile(
+                osp.join(self.root_dir, 'points', f'{sample_idx}.bin'))
+            pts_instance_mask.tofile(
+                osp.join(self.root_dir, 'instance_mask', f'{sample_idx}.bin'))
+            pts_semantic_mask.tofile(
+                osp.join(self.root_dir, 'semantic_mask', f'{sample_idx}.bin'))
+
+            info['pts_path'] = osp.join('points', f'{sample_idx}.bin')
+            info['pts_instance_mask_path'] = osp.join('instance_mask',
+                                                      f'{sample_idx}.bin')
+            info['pts_semantic_mask_path'] = osp.join('semantic_mask',
+                                                      f'{sample_idx}.bin')

            if has_label:
                annotations = {}

--- a/tools/data_converter/sunrgbd_data_utils.py
+++ b/tools/data_converter/sunrgbd_data_utils.py
 import concurrent.futures as futures
-import os
+import os.path as osp

 import mmcv
 import numpy as np
@@ -70,7 +70,7 @@ class SUNRGBDData(object):
    def __init__(self, root_path, split='train', use_v1=False):
        self.root_dir = root_path
        self.split = split
-        self.split_dir = os.path.join(root_path)
+        self.split_dir = osp.join(root_path, 'sunrgbd_trainval')
        self.classes = [
            'bed', 'table', 'sofa', 'chair', 'toilet', 'desk', 'dresser',
            'night_stand', 'bookshelf', 'bathtub'
@@ -81,22 +81,22 @@ class SUNRGBDData(object):
            for label in range(len(self.classes))
        }
        assert split in ['train', 'val', 'test']
-        split_file = os.path.join(self.root_dir, f'{split}_data_idx.txt')
+        split_file = osp.join(self.split_dir, f'{split}_data_idx.txt')
        mmcv.check_file_exist(split_file)
        self.sample_id_list = map(int, mmcv.list_from_file(split_file))
-        self.image_dir = os.path.join(self.split_dir, 'image')
-        self.calib_dir = os.path.join(self.split_dir, 'calib')
-        self.depth_dir = os.path.join(self.split_dir, 'depth')
+        self.image_dir = osp.join(self.split_dir, 'image')
+        self.calib_dir = osp.join(self.split_dir, 'calib')
+        self.depth_dir = osp.join(self.split_dir, 'depth')
        if use_v1:
-            self.label_dir = os.path.join(self.split_dir, 'label_v1')
+            self.label_dir = osp.join(self.split_dir, 'label_v1')
        else:
-            self.label_dir = os.path.join(self.split_dir, 'label')
+            self.label_dir = osp.join(self.split_dir, 'label')

    def __len__(self):
        return len(self.sample_id_list)

    def get_image(self, idx):
-        img_filename = os.path.join(self.image_dir, f'{idx:06d}.jpg')
+        img_filename = osp.join(self.image_dir, f'{idx:06d}.jpg')
        return mmcv.imread(img_filename)

    def get_image_shape(self, idx):
@@ -104,12 +104,12 @@ class SUNRGBDData(object):
        return np.array(image.shape[:2], dtype=np.int32)

    def get_depth(self, idx):
-        depth_filename = os.path.join(self.depth_dir, f'{idx:06d}.mat')
+        depth_filename = osp.join(self.depth_dir, f'{idx:06d}.mat')
        depth = sio.loadmat(depth_filename)['instance']
        return depth

    def get_calibration(self, idx):
-        calib_filepath = os.path.join(self.calib_dir, f'{idx:06d}.txt')
+        calib_filepath = osp.join(self.calib_dir, f'{idx:06d}.txt')
        lines = [line.rstrip() for line in open(calib_filepath)]
        Rt = np.array([float(x) for x in lines[0].split(' ')])
        Rt = np.reshape(Rt, (3, 3), order='F')
@@ -117,7 +117,7 @@ class SUNRGBDData(object):
        return K, Rt

    def get_label_objects(self, idx):
-        label_filename = os.path.join(self.label_dir, f'{idx:06d}.txt')
+        label_filename = osp.join(self.label_dir, f'{idx:06d}.txt')
        lines = [line.rstrip() for line in open(label_filename)]
        objects = [SUNRGBDInstance(line) for line in lines]
        return objects
@@ -146,15 +146,18 @@ class SUNRGBDData(object):
            pc_upright_depth = self.get_depth(sample_idx)
            pc_upright_depth_subsampled = random_sampling(
                pc_upright_depth, SAMPLE_NUM)
-            np.save(
-                os.path.join(self.root_dir, 'lidar', f'{sample_idx:06d}.npy'),
-                pc_upright_depth_subsampled)

            info = dict()
            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info
-            img_name = os.path.join(self.image_dir, f'{sample_idx:06d}')
-            img_path = os.path.join(self.image_dir, img_name)
+
+            mmcv.mkdir_or_exist(osp.join(self.root_dir, 'points'))
+            pc_upright_depth_subsampled.tofile(
+                osp.join(self.root_dir, 'points', f'{sample_idx:06d}.bin'))
+
+            info['pts_path'] = osp.join('points', f'{sample_idx:06d}.bin')
+            img_name = osp.join(self.image_dir, f'{sample_idx:06d}')
+            img_path = osp.join(self.image_dir, img_name)
            image_info = {
                'image_idx': sample_idx,
                'image_shape': self.get_image_shape(sample_idx),
@@ -211,8 +214,6 @@ class SUNRGBDData(object):
                info['annos'] = annotations
            return info

-        lidar_save_dir = os.path.join(self.root_dir, 'lidar')
-        mmcv.mkdir_or_exist(lidar_save_dir)
        sample_id_list = sample_id_list if \
            sample_id_list is not None else self.sample_id_list
        with futures.ThreadPoolExecutor(num_workers) as executor: