changed based on third comment

d3564d6d · liyinhao · 7a872356 · d3564d6d · d3564d6d · d3564d6d
Commit d3564d6d authored Apr 27, 2020 by liyinhao
4 changed files
--- a/tools/data_converter/scannet_converter.py
+++ b/tools/data_converter/scannet_converter.py
 import os
-import pickle
-from pathlib import Path

+import mmcv
 from tools.data_converter.scannet_data_utils import ScanNetData


 def create_scannet_info_file(data_path, pkl_prefix='scannet', save_path=None):
+    '''
+        Create scannet information file.
+
+        Get information of the raw data and save it to the pkl file.
+
+        Args:
+            data_path (str): Path of the data.
+            pkl_prefix (str): Prefix ofr the pkl to be saved. Default: 'scannet'. # noqa: E501
+            save_path (str): Path of the pkl to be saved. Default: None.
+
+        Returns:
+            None
+
+    '''
    assert os.path.exists(data_path)
    if save_path is None:
-        save_path = Path(data_path)
+        save_path = data_path
    else:
-        save_path = Path(save_path)
+        save_path = save_path
    assert os.path.exists(save_path)
-    train_filename = save_path / f'{pkl_prefix}_infos_train.pkl'
-    val_filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl')
+    val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
    train_dataset = ScanNetData(root_path=data_path, split='train')
    val_dataset = ScanNetData(root_path=data_path, split='val')
    scannet_infos_train = train_dataset.get_scannet_infos(has_label=True)
    with open(train_filename, 'wb') as f:
-        pickle.dump(scannet_infos_train, f)
-    print('Scannet info train file is saved to %s' % train_filename)
+        mmcv.dump(scannet_infos_train, f, 'pkl')
+    print(f'Scannet info train file is saved to {train_filename}')
    scannet_infos_val = val_dataset.get_scannet_infos(has_label=True)
    with open(val_filename, 'wb') as f:
-        pickle.dump(scannet_infos_val, f)
-    print('Scannet info val file is saved to %s' % val_filename)
-
-
-if __name__ == '__main__':
-    create_scannet_info_file(
-        data_path='./data/scannet', save_path='./data/scannet')
+        mmcv.dump(scannet_infos_val, f, 'pkl')
+    print(f'Scannet info val file is saved to {val_filename}')
--- a/tools/data_converter/scannet_data_utils.py
+++ b/tools/data_converter/scannet_data_utils.py
+import concurrent.futures as futures
 import os

 import numpy as np


 class ScanNetData(object):
-    ''' Load and parse object data '''
+    '''
+    ScanNet Data
+
+    Generate scannet infos for scannet_converter
+
+    Args:
+        root_path (str): Root path of the raw data
+        split (str): Set split type of the data. Default: 'train'.
+    '''

    def __init__(self, root_path, split='train'):
        self.root_dir = root_path
@@ -26,7 +35,7 @@ class ScanNetData(object):
        }
        assert split in ['train', 'val', 'test']
        split_dir = os.path.join(self.root_dir, 'meta_data',
-                                 'scannetv2_%s.txt' % split)
+                                 f'scannetv2_{split}.txt')
        self.sample_id_list = [x.strip() for x in open(split_dir).readlines()
                               ] if os.path.exists(split_dir) else None

@@ -35,7 +44,7 @@ class ScanNetData(object):

    def get_box_label(self, idx):
        box_file = os.path.join(self.root_dir, 'scannet_train_instance_data',
-                                '%s_bbox.npy' % idx)
+                                f'{idx}_bbox.npy')
        assert os.path.exists(box_file)
        return np.load(box_file)

@@ -43,10 +52,22 @@ class ScanNetData(object):
                          num_workers=4,
                          has_label=True,
                          sample_id_list=None):
-        import concurrent.futures as futures
+        '''
+        Get scannet infos.
+
+        This method gets information from the raw data.
+
+        Args:
+            num_workers (int): Number of threads to be used. Default: 4.
+            has_label (bool): Whether the data has label. Default: True.
+            sample_id_list (List[int]): Index list of the sample. Default: None. # noqa: E501
+
+        Returns:
+            infos (List[dict]): Information of the raw data.
+        '''

        def process_single_scene(sample_idx):
-            print('%s sample_idx: %s' % (self.split, sample_idx))
+            print(f'{self.split} sample_idx: {sample_idx}')
            info = dict()
            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info

--- a/tools/data_converter/sunrgbd_converter.py
+++ b/tools/data_converter/sunrgbd_converter.py
 import os
-import pickle
-from pathlib import Path

+import mmcv
 from tools.data_converter.sunrgbd_data_utils import SUNRGBDData


@@ -9,28 +8,37 @@ def create_sunrgbd_info_file(data_path,
                             pkl_prefix='sunrgbd',
                             save_path=None,
                             use_v1=False):
+    '''
+    Create sunrgbd information file.
+
+    Get information of the raw data and save it to the pkl file.
+
+    Args:
+        data_path (str): Path of the data.
+        pkl_prefix (str): Prefix ofr the pkl to be saved. Default: 'sunrgbd'.
+        save_path (str): Path of the pkl to be saved. Default: None.
+        use_v1 (bool): Whether to use v1. Default: False.
+
+    Returns:
+        None
+
+    '''
    assert os.path.exists(data_path)
    if save_path is None:
-        save_path = Path(data_path)
+        save_path = data_path
    else:
-        save_path = Path(save_path)
+        save_path = save_path
    assert os.path.exists(save_path)
-    train_filename = save_path / f'{pkl_prefix}_infos_train.pkl'
-    val_filename = save_path / f'{pkl_prefix}_infos_val.pkl'
+    train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl')
+    val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
    train_dataset = SUNRGBDData(
        root_path=data_path, split='train', use_v1=use_v1)
    val_dataset = SUNRGBDData(root_path=data_path, split='val', use_v1=use_v1)
    sunrgbd_infos_train = train_dataset.get_sunrgbd_infos(has_label=True)
    with open(train_filename, 'wb') as f:
-        pickle.dump(sunrgbd_infos_train, f)
-    print('Sunrgbd info train file is saved to %s' % train_filename)
+        mmcv.dump(sunrgbd_infos_train, f, 'pkl')
+    print(f'Sunrgbd info train file is saved to {train_filename}')
    sunrgbd_infos_val = val_dataset.get_sunrgbd_infos(has_label=True)
    with open(val_filename, 'wb') as f:
-        pickle.dump(sunrgbd_infos_val, f)
-    print('Sunrgbd info val file is saved to %s' % val_filename)
-
-
-if __name__ == '__main__':
-    create_sunrgbd_info_file(
-        data_path='./data/sunrgbd/sunrgbd_trainval',
-        save_path='./data/sunrgbd')
+        mmcv.dump(sunrgbd_infos_val, f, 'pkl')
+    print(f'Sunrgbd info val file is saved to {val_filename}')
--- a/tools/data_converter/sunrgbd_data_utils.py
+++ b/tools/data_converter/sunrgbd_data_utils.py
+import concurrent.futures as futures
 import os

 import cv2
@@ -5,12 +6,24 @@ import numpy as np
 import scipy.io as sio


-def random_sampling(pc, num_sample, replace=None, return_choices=False):
-    """ Input is NxC, output is num_samplexC
-    """
+def random_sampling(pc, num_samples, replace=None, return_choices=False):
+    '''
+    Random Sampling.
+
+    Sampling point cloud to num_samples points.
+
+    Args:
+        pc (ndarray): Point cloud.
+        num_samples (int): The number of samples.
+        replace (bool): Whether the sample is with or without replacement.
+        return_choices (bool): Whether to return choices.
+
+    Returns:
+        pc (ndarray): Point cloud after sampling.
+    '''
    if replace is None:
-        replace = (pc.shape[0] < num_sample)
-    choices = np.random.choice(pc.shape[0], num_sample, replace=replace)
+        replace = (pc.shape[0] < num_samples)
+    choices = np.random.choice(pc.shape[0], num_samples, replace=replace)
    if return_choices:
        return pc[choices], choices
    else:
@@ -44,7 +57,16 @@ class SUNRGBDInstance(object):


 class SUNRGBDData(object):
-    ''' Load and parse object data '''
+    '''
+    SUNRGBD Data
+
+    Generate scannet infos for sunrgbd_converter
+
+    Args:
+        root_path (str): Root path of the raw data.
+        split (str): Set split type of the data. Default: 'train'.
+        use_v1 (bool): Whether to use v1. Default: False.
+    '''

    def __init__(self, root_path, split='train', use_v1=False):
        self.root_dir = root_path
@@ -60,7 +82,7 @@ class SUNRGBDData(object):
            for label in range(len(self.classes))
        }
        assert split in ['train', 'val', 'test']
-        split_dir = os.path.join(self.root_dir, '%s_data_idx.txt' % split)
+        split_dir = os.path.join(self.root_dir, f'{split}_data_idx.txt')
        self.sample_id_list = [
            int(x.strip()) for x in open(split_dir).readlines()
        ] if os.path.exists(split_dir) else None
@@ -77,7 +99,7 @@ class SUNRGBDData(object):
        return len(self.sample_id_list)

    def get_image(self, idx):
-        img_filename = os.path.join(self.image_dir, '%06d.jpg' % (idx))
+        img_filename = os.path.join(self.image_dir, f'{idx:06d}.jpg')
        return cv2.imread(img_filename)

    def get_image_shape(self, idx):
@@ -85,12 +107,12 @@ class SUNRGBDData(object):
        return np.array(image.shape[:2], dtype=np.int32)

    def get_depth(self, idx):
-        depth_filename = os.path.join(self.depth_dir, '%06d.mat' % (idx))
+        depth_filename = os.path.join(self.depth_dir, f'{idx:06d}.mat')
        depth = sio.loadmat(depth_filename)['instance']
        return depth

    def get_calibration(self, idx):
-        calib_filepath = os.path.join(self.calib_dir, '%06d.txt' % (idx))
+        calib_filepath = os.path.join(self.calib_dir, f'{idx:06d}.txt')
        lines = [line.rstrip() for line in open(calib_filepath)]
        Rt = np.array([float(x) for x in lines[0].split(' ')])
        Rt = np.reshape(Rt, (3, 3), order='F')
@@ -98,7 +120,7 @@ class SUNRGBDData(object):
        return K, Rt

    def get_label_objects(self, idx):
-        label_filename = os.path.join(self.label_dir, '%06d.txt' % (idx))
+        label_filename = os.path.join(self.label_dir, f'{idx:06d}.txt')
        lines = [line.rstrip() for line in open(label_filename)]
        objects = [SUNRGBDInstance(line) for line in lines]
        return objects
@@ -107,10 +129,22 @@ class SUNRGBDData(object):
                          num_workers=4,
                          has_label=True,
                          sample_id_list=None):
-        import concurrent.futures as futures
+        '''
+        Get sunrgbd infos.
+
+        This method gets information from the raw data.
+
+        Args:
+            num_workers (int): Number of threads to be used. Default: 4.
+            has_label (bool): Whether the data has label. Default: True.
+            sample_id_list (List[int]): Index list of the sample. Default: None. # noqa: E501
+
+        Returns:
+            infos (List[dict]): Information of the raw data.
+        '''

        def process_single_scene(sample_idx):
-            print('%s sample_idx: %s' % (self.split, sample_idx))
+            print(f'{self.split} sample_idx: {sample_idx}')
            # convert depth to points
            SAMPLE_NUM = 50000
            pc_upright_depth = self.get_depth(sample_idx)
@@ -118,13 +152,13 @@ class SUNRGBDData(object):
            pc_upright_depth_subsampled = random_sampling(
                pc_upright_depth, SAMPLE_NUM)
            np.savez_compressed(
-                os.path.join(self.root_dir, 'lidar', '%06d.npz' % sample_idx),
+                os.path.join(self.root_dir, 'lidar', f'{sample_idx:06d}.npz'),
                pc=pc_upright_depth_subsampled)

            info = dict()
            pc_info = {'num_features': 6, 'lidar_idx': sample_idx}
            info['point_cloud'] = pc_info
-            img_name = os.path.join(self.image_dir, '%06d.jpg' % (sample_idx))
+            img_name = os.path.join(self.image_dir, f'{sample_idx:06d}')
            img_path = os.path.join(self.image_dir, img_name)
            image_info = {
                'image_idx': sample_idx,