indoor_converter.py 3.19 KB
Newer Older
zhangwenwei's avatar
zhangwenwei committed
1
import mmcv
2
import numpy as np
liyinhao's avatar
liyinhao committed
3
4
import os

5
from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
liyinhao's avatar
liyinhao committed
6
7
8
9
10
11
from tools.data_converter.sunrgbd_data_utils import SUNRGBDData


def create_indoor_info_file(data_path,
                            pkl_prefix='sunrgbd',
                            save_path=None,
liyinhao's avatar
liyinhao committed
12
13
                            use_v1=False,
                            workers=4):
liyinhao's avatar
liyinhao committed
14
    """Create indoor information file.
liyinhao's avatar
liyinhao committed
15
16
17
18
19
20
21
22

    Get information of the raw data and save it to the pkl file.

    Args:
        data_path (str): Path of the data.
        pkl_prefix (str): Prefix of the pkl to be saved. Default: 'sunrgbd'.
        save_path (str): Path of the pkl to be saved. Default: None.
        use_v1 (bool): Whether to use v1. Default: False.
liyinhao's avatar
liyinhao committed
23
        workers (int): Number of threads to be used. Default: 4.
liyinhao's avatar
liyinhao committed
24
25
26
    """
    assert os.path.exists(data_path)
    assert pkl_prefix in ['sunrgbd', 'scannet']
27
    save_path = data_path if save_path is None else save_path
liyinhao's avatar
liyinhao committed
28
    assert os.path.exists(save_path)
29

liyinhao's avatar
liyinhao committed
30
    train_filename = os.path.join(save_path, f'{pkl_prefix}_infos_train.pkl')
liyinhao's avatar
liyinhao committed
31
32
33
34
35
36
37
38
39
    val_filename = os.path.join(save_path, f'{pkl_prefix}_infos_val.pkl')
    if pkl_prefix == 'sunrgbd':
        train_dataset = SUNRGBDData(
            root_path=data_path, split='train', use_v1=use_v1)
        val_dataset = SUNRGBDData(
            root_path=data_path, split='val', use_v1=use_v1)
    else:
        train_dataset = ScanNetData(root_path=data_path, split='train')
        val_dataset = ScanNetData(root_path=data_path, split='val')
40
41
        test_dataset = ScanNetData(root_path=data_path, split='test')
        test_filename = os.path.join(save_path, f'{pkl_prefix}_infos_test.pkl')
42

liyinhao's avatar
liyinhao committed
43
    infos_train = train_dataset.get_infos(num_workers=workers, has_label=True)
44
    mmcv.dump(infos_train, train_filename, 'pkl')
liyinhao's avatar
liyinhao committed
45
    print(f'{pkl_prefix} info train file is saved to {train_filename}')
46

liyinhao's avatar
liyinhao committed
47
    infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
48
    mmcv.dump(infos_val, val_filename, 'pkl')
liyinhao's avatar
liyinhao committed
49
    print(f'{pkl_prefix} info val file is saved to {val_filename}')
50

51
52
53
54
55
56
    if pkl_prefix == 'scannet':
        infos_test = test_dataset.get_infos(
            num_workers=workers, has_label=False)
        mmcv.dump(infos_test, test_filename, 'pkl')
        print(f'{pkl_prefix} info test file is saved to {test_filename}')

57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
    # generate infos for the semantic segmentation task
    # e.g. re-sampled scene indexes and label weights
    if pkl_prefix == 'scannet':
        # label weight computation function is adopted from
        # https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
        train_dataset = ScanNetSegData(
            data_root=data_path,
            ann_file=train_filename,
            split='train',
            num_points=8192,
            label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
        # TODO: do we need to generate on val set?
        val_dataset = ScanNetSegData(
            data_root=data_path,
            ann_file=val_filename,
            split='val',
            num_points=8192,
            label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
75
        # no need to generate for test set
76
77
78

        train_dataset.get_seg_infos()
        val_dataset.get_seg_infos()