Unverified Commit 9cb75e7d authored by Ziyi Wu's avatar Ziyi Wu Committed by GitHub
Browse files

[Feature] Support ScanNet semantic segmentation dataset (#390)

* remove max_num_point in ScanNet data preprocessing

* add config file for ScanNet semantic segmentation dataset

* modify NormalizePointsColor in pipeline

* add visualization function for semantic segmentation

* add ignore_index to semantic segmentation visualization function

* add ignore_index to semantic segmentation evaluation function

* fix ignore_index bug in semantic segmentation evaluation function

* add test function to check ignore_index assignment in PointSegClassMapping

* fix slicing bug in BasePoints class and add unittest

* add IndoorPatchPointSample class for indoor semantic segmentation data loading and add unittest

* modify LoadPointsFromFile class and its unittest to support point color loading

* fix data path in unittest

* add setter function for coord and attributes of BasePoint and modify unittest

* modify color normalization operation to work on BasePoint class

* add unittest for ScanNet semantic segmentation data loading pipeline

* fix ignore_index bug in seg_eval function

* add ScanNet semantic segmentation dataset and unittest

* modify config file for ScanNet semantic segmentation

* fix visualization function and modify unittest

* fix a typo in seg_eval.py

* raise exception when semantic mask is not provided in train/eval data loading

* support custom computation of label weight for loss calculation

* modify seg_eval function to be more efficient

* fix small bugs & change variable names for clarity & add more cases to unittest

* move room index resampling and label weight computation to data pre-processing

* add option allowing user to determine whether to sub-sample point clouds

* fix typos & change .format to f-string & fix link in comment

* save all visualizations into .obj format for consistency

* infer num_classes from label2cat in eval_seg function

* add pre-computed room index and label weight for ScanNet dataset

* replace .ply with .obj in unittests and documents

* add TODO in case data is on ceph

* add base dataset for all semantic segmentation tasks & add ScanNet dataset inheriting from base dataset

* rename class for consistency

* fix minor typos in comment

* move Custom3DSegDataset to a new file

* modify BasePoint setter function to enable attribute adding

* add unittest for NormalizePointsColor and fix small bugs

* fix unittest for BasePoints

* modify ScanNet data pre-processing scripts

* change ignore_idx to -1 in seg_eval function

* remove sliding inference from PatchSample function and modify unittest

* remove PatchSample from scannet seg test_pipeline
parent d055876a
import numpy as np
from mmdet3d.core.points import DepthPoints
from mmdet3d.datasets.pipelines import IndoorPointSample
from mmdet3d.datasets.pipelines import (IndoorPatchPointSample,
IndoorPointSample,
PointSegClassMapping)
def test_indoor_sample():
......@@ -60,3 +62,56 @@ def test_indoor_sample():
assert repr_str == expected_repr_str
assert np.allclose(sunrgbd_point_cloud[sunrgbd_choices],
sunrgbd_points_result)
def test_indoor_seg_sample():
# test the train time behavior of IndoorPatchPointSample
np.random.seed(0)
scannet_patch_sample_points = IndoorPatchPointSample(5, 1.5, 1.0, 20, True)
scannet_seg_class_mapping = \
PointSegClassMapping((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
24, 28, 33, 34, 36, 39))
scannet_results = dict()
scannet_points = np.fromfile(
'./tests/data/scannet/points/scene0000_00.bin',
dtype=np.float32).reshape((-1, 6))
scannet_results['points'] = DepthPoints(
scannet_points, points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
scannet_pts_semantic_mask = np.fromfile(
'./tests/data/scannet/semantic_mask/scene0000_00.bin', dtype=np.long)
scannet_results['pts_semantic_mask'] = scannet_pts_semantic_mask
scannet_results = scannet_seg_class_mapping(scannet_results)
scannet_results = scannet_patch_sample_points(scannet_results)
scannet_points_result = scannet_results['points']
scannet_semantic_labels_result = scannet_results['pts_semantic_mask']
# manually constructed sampled points
scannet_choices = np.array([87, 34, 58, 9, 18])
scannet_center = np.array([-2.1772466, -3.4789145, 1.242711])
scannet_center[2] = 0.0
scannet_coord_max = np.amax(scannet_points[:, :3], axis=0)
scannet_input_points = np.concatenate([
scannet_points[scannet_choices, :3] - scannet_center,
scannet_points[scannet_choices, 3:],
scannet_points[scannet_choices, :3] / scannet_coord_max
],
axis=1)
assert scannet_points_result.points_dim == 9
assert scannet_points_result.attribute_dims == dict(
color=[3, 4, 5], normalized_coord=[6, 7, 8])
scannet_points_result = scannet_points_result.tensor.numpy()
assert np.allclose(scannet_input_points, scannet_points_result, atol=1e-6)
assert np.all(
np.array([13, 13, 12, 2, 0]) == scannet_semantic_labels_result)
repr_str = repr(scannet_patch_sample_points)
expected_repr_str = 'IndoorPatchPointSample(num_points=5, ' \
'block_size=1.5, ' \
'sample_rate=1.0, ' \
'ignore_index=20, ' \
'use_normalized_coord=True, ' \
'num_try=10)'
assert repr_str == expected_repr_str
......@@ -4,9 +4,11 @@ import pytest
from os import path as osp
from mmdet3d.core.bbox import DepthInstance3DBoxes
from mmdet3d.core.points import LiDARPoints
from mmdet3d.core.points import DepthPoints, LiDARPoints
from mmdet3d.datasets.pipelines import (LoadAnnotations3D, LoadPointsFromFile,
LoadPointsFromMultiSweeps)
LoadPointsFromMultiSweeps,
NormalizePointsColor,
PointSegClassMapping)
def test_load_points_from_indoor_file():
......@@ -35,11 +37,33 @@ def test_load_points_from_indoor_file():
scannet_point_cloud = scannet_results['points'].tensor.numpy()
repr_str = repr(scannet_load_data)
expected_repr_str = 'LoadPointsFromFile(shift_height=True, ' \
'file_client_args={\'backend\': \'disk\'}), ' \
'use_color=False, ' \
'file_client_args={\'backend\': \'disk\'}, ' \
'load_dim=6, use_dim=[0, 1, 2])'
assert repr_str == expected_repr_str
assert scannet_point_cloud.shape == (100, 4)
# test load point cloud with both shifted height and color
scannet_load_data = LoadPointsFromFile(
coord_type='DEPTH',
load_dim=6,
use_dim=[0, 1, 2, 3, 4, 5],
shift_height=True,
use_color=True)
scannet_results = dict()
scannet_results['pts_filename'] = osp.join(data_path,
scannet_info['pts_path'])
scannet_results = scannet_load_data(scannet_results)
scannet_point_cloud = scannet_results['points']
assert scannet_point_cloud.points_dim == 7
assert scannet_point_cloud.attribute_dims == dict(
height=3, color=[4, 5, 6])
scannet_point_cloud = scannet_point_cloud.tensor.numpy()
assert scannet_point_cloud.shape == (100, 7)
def test_load_points_from_outdoor_file():
data_path = 'tests/data/kitti/a.bin'
......@@ -117,6 +141,43 @@ def test_load_annotations3D():
assert scannet_pts_semantic_mask.shape == (100, )
def test_load_segmentation_mask():
# Test loading semantic segmentation mask on ScanNet dataset
scannet_info = mmcv.load('./tests/data/scannet/scannet_infos.pkl')[0]
scannet_load_annotations3D = LoadAnnotations3D(
with_bbox_3d=False,
with_label_3d=False,
with_mask_3d=False,
with_seg_3d=True)
scannet_results = dict()
data_path = './tests/data/scannet'
# prepare input of loading pipeline
scannet_results['ann_info'] = dict()
scannet_results['ann_info']['pts_semantic_mask_path'] = osp.join(
data_path, scannet_info['pts_semantic_mask_path'])
scannet_results['pts_seg_fields'] = []
scannet_results = scannet_load_annotations3D(scannet_results)
scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
assert scannet_pts_semantic_mask.shape == (100, )
# Convert class_id to label and assign ignore_index
scannet_seg_class_mapping = \
PointSegClassMapping((1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16,
24, 28, 33, 34, 36, 39))
scannet_results = scannet_seg_class_mapping(scannet_results)
scannet_pts_semantic_mask = scannet_results['pts_semantic_mask']
assert np.all(scannet_pts_semantic_mask == np.array([
13, 20, 1, 2, 6, 2, 13, 1, 13, 2, 0, 20, 5, 20, 2, 0, 1, 13, 0, 0, 0,
20, 6, 20, 13, 20, 2, 20, 20, 2, 16, 5, 13, 5, 13, 0, 20, 0, 0, 1, 7,
20, 20, 20, 20, 20, 20, 20, 0, 1, 2, 13, 16, 1, 1, 1, 6, 2, 12, 20, 3,
20, 20, 14, 1, 20, 2, 1, 7, 2, 0, 5, 20, 5, 20, 20, 3, 6, 5, 20, 0, 13,
12, 2, 20, 0, 0, 13, 20, 1, 20, 5, 3, 0, 13, 1, 2, 2, 2, 1
]))
def test_load_points_from_multi_sweeps():
load_points_from_multi_sweeps = LoadPointsFromMultiSweeps()
sweep = dict(
......@@ -140,3 +201,31 @@ def test_load_points_from_multi_sweeps():
expected_repr_str = 'LoadPointsFromMultiSweeps(sweeps_num=10)'
assert repr_str == expected_repr_str
assert points.shape == (403, 4)
def test_normalize_points_color():
coord = np.array([[68.137, 3.358, 2.516], [67.697, 3.55, 2.501],
[67.649, 3.76, 2.5], [66.414, 3.901, 2.459],
[66.012, 4.085, 2.446], [65.834, 4.178, 2.44],
[65.841, 4.386, 2.44], [65.745, 4.587, 2.438],
[65.551, 4.78, 2.432], [65.486, 4.982, 2.43]])
color = np.array([[131, 95, 138], [71, 185, 253], [169, 47, 41],
[174, 161, 88], [6, 158, 213], [6, 86, 78],
[118, 161, 78], [72, 195, 138], [180, 170, 32],
[197, 85, 27]])
points = np.concatenate([coord, color], axis=1)
points = DepthPoints(
points, points_dim=6, attribute_dims=dict(color=[3, 4, 5]))
input_dict = dict(points=points)
color_mean = [100, 150, 200]
points_color_normalizer = NormalizePointsColor(color_mean=color_mean)
input_dict = points_color_normalizer(input_dict)
points = input_dict['points']
repr_str = repr(points_color_normalizer)
expected_repr_str = f'NormalizePointsColor(color_mean={color_mean})'
assert repr_str == expected_repr_str
assert np.allclose(points.coord, coord)
assert np.allclose(points.color,
(color - np.array(color_mean)[None, :]) / 255.0)
......@@ -9,12 +9,15 @@ def test_indoor_eval():
if not torch.cuda.is_available():
pytest.skip()
seg_preds = [
torch.Tensor(
[0, 0, 1, 0, 2, 1, 3, 1, 1, 0, 2, 2, 2, 2, 1, 3, 0, 3, 3, 3])
torch.Tensor([
0, 0, 1, 0, 0, 2, 1, 3, 1, 2, 1, 0, 2, 2, 2, 2, 1, 3, 0, 3, 3, 3, 3
])
]
gt_labels = [
torch.Tensor(
[0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3])
torch.Tensor([
0, 0, 0, 255, 0, 0, 1, 1, 1, 255, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3,
3, 255
])
]
label2cat = {
......@@ -23,7 +26,7 @@ def test_indoor_eval():
2: 'motorcycle',
3: 'truck',
}
ret_value = seg_eval(gt_labels, seg_preds, label2cat)
ret_value = seg_eval(gt_labels, seg_preds, label2cat, ignore_index=255)
assert np.isclose(ret_value['car'], 0.428571429)
assert np.isclose(ret_value['bicycle'], 0.428571429)
......
......@@ -67,7 +67,7 @@ def test_show_result_meshlab():
]
temp_out_dir = tempfile.mkdtemp()
out_dir, file_name = show_result_meshlab(data, result, temp_out_dir)
expected_outfile_ply = file_name + '_pred.ply'
expected_outfile_ply = file_name + '_pred.obj'
expected_outfile_obj = file_name + '_points.obj'
expected_outfile_ply_path = os.path.join(out_dir, file_name,
expected_outfile_ply)
......
import numpy as np
import pytest
import torch
from mmdet3d.core.points import (BasePoints, CameraPoints, DepthPoints,
......@@ -228,6 +229,41 @@ def test_base_points():
new_points.tensor,
torch.tensor([[1, 2, 3, 4, 5, 6, 7]], dtype=base_points.tensor.dtype))
# test BasePoint indexing
base_points = BasePoints(
points_np,
points_dim=7,
attribute_dims=dict(height=3, color=[4, 5, 6]))
assert torch.all(base_points[:, 3:].tensor == torch.tensor(points_np[:,
3:]))
# test set and get function for BasePoint color and height
base_points = BasePoints(points_np[:, :3])
assert base_points.attribute_dims is None
base_points.height = points_np[:, 3]
assert base_points.attribute_dims == dict(height=3)
base_points.color = points_np[:, 4:]
assert base_points.attribute_dims == dict(height=3, color=[4, 5, 6])
assert torch.allclose(base_points.height,
torch.tensor([0.6666, 0.1502, 0.6565, 0.2803]))
assert torch.allclose(
base_points.color,
torch.tensor([[0.1956, 0.4974, 0.9409], [0.3707, 0.1086, 0.6297],
[0.6248, 0.6954, 0.2538], [0.0258, 0.4896, 0.3269]]))
# values to be set should have correct shape (e.g. number of points)
with pytest.raises(ValueError):
base_points.coord = np.random.rand(5, 3)
with pytest.raises(ValueError):
base_points.height = np.random.rand(3)
with pytest.raises(ValueError):
base_points.color = np.random.rand(4, 2)
base_points.coord = points_np[:, [1, 2, 3]]
base_points.height = points_np[:, 0]
base_points.color = points_np[:, [4, 5, 6]]
assert np.allclose(base_points.coord, points_np[:, 1:4])
assert np.allclose(base_points.height, points_np[:, 0])
assert np.allclose(base_points.color, points_np[:, 4:])
def test_cam_points():
# test empty initialization
......
import mmcv
import numpy as np
import os
from tools.data_converter.scannet_data_utils import ScanNetData
from tools.data_converter.scannet_data_utils import ScanNetData, ScanNetSegData
from tools.data_converter.sunrgbd_data_utils import SUNRGBDData
......@@ -44,3 +45,25 @@ def create_indoor_info_file(data_path,
infos_val = val_dataset.get_infos(num_workers=workers, has_label=True)
mmcv.dump(infos_val, val_filename, 'pkl')
print(f'{pkl_prefix} info val file is saved to {val_filename}')
# generate infos for the semantic segmentation task
# e.g. re-sampled scene indexes and label weights
if pkl_prefix == 'scannet':
# label weight computation function is adopted from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
train_dataset = ScanNetSegData(
data_root=data_path,
ann_file=train_filename,
split='train',
num_points=8192,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
# TODO: do we need to generate on val set?
val_dataset = ScanNetSegData(
data_root=data_path,
ann_file=val_filename,
split='val',
num_points=8192,
label_weight_func=lambda x: 1.0 / np.log(1.2 + x))
train_dataset.get_seg_infos()
val_dataset.get_seg_infos()
......@@ -127,3 +127,101 @@ class ScanNetData(object):
with futures.ThreadPoolExecutor(num_workers) as executor:
infos = executor.map(process_single_scene, sample_id_list)
return list(infos)
class ScanNetSegData(object):
"""ScanNet dataset used to generate infos for semantic segmentation task.
Args:
data_root (str): Root path of the raw data.
ann_file (str): The generated scannet infos.
split (str): Set split type of the data. Default: 'train'.
num_points (int): Number of points in each data input. Default: 8192.
label_weight_func (function): Function to compute the label weight.
Default: None.
"""
def __init__(self,
data_root,
ann_file,
split='train',
num_points=8192,
label_weight_func=None):
self.data_root = data_root
self.data_infos = mmcv.load(ann_file)
self.split = split
self.num_points = num_points
self.all_ids = np.arange(41) # all possible ids
self.cat_ids = np.array([
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16, 24, 28, 33, 34, 36,
39
]) # used for seg task
self.ignore_index = len(self.cat_ids)
self.cat_id2class = np.ones((self.all_ids.shape[0],), dtype=np.int) * \
self.ignore_index
for i, cat_id in enumerate(self.cat_ids):
self.cat_id2class[cat_id] = i
# label weighting function is taken from
# https://github.com/charlesq34/pointnet2/blob/master/scannet/scannet_dataset.py#L24
self.label_weight_func = (lambda x: 1.0 / np.log(1.2 + x)) if \
label_weight_func is None else label_weight_func
def get_seg_infos(self):
scene_idxs, label_weight = self.get_scene_idxs_and_label_weight()
save_folder = osp.join(self.data_root, 'seg_info')
mmcv.mkdir_or_exist(save_folder)
np.save(
osp.join(save_folder, f'{self.split}_resampled_scene_idxs.npy'),
scene_idxs)
np.save(
osp.join(save_folder, f'{self.split}_label_weight.npy'),
label_weight)
print(f'{self.split} resampled scene index and label weight saved')
def _convert_to_label(self, mask):
"""Convert class_id in loaded segmentation mask to label."""
if isinstance(mask, str):
if mask.endswith('npy'):
mask = np.load(mask)
else:
mask = np.fromfile(mask, dtype=np.long)
# first filter out unannotated points (labeled as 0)
mask = mask[mask != 0]
# then convert to [0, 20) labels
label = self.cat_id2class[mask]
return label
def get_scene_idxs_and_label_weight(self):
"""Compute scene_idxs for data sampling and label weight for loss \
calculation.
We sample more times for scenes with more points. Label_weight is
inversely proportional to number of class points.
"""
num_classes = len(self.cat_ids)
num_point_all = []
label_weight = np.zeros((num_classes + 1, )) # ignore_index
for data_info in self.data_infos:
label = self._convert_to_label(
osp.join(self.data_root, data_info['pts_semantic_mask_path']))
num_point_all.append(label.shape[0])
class_count, _ = np.histogram(label, range(num_classes + 2))
label_weight += class_count
# repeat scene_idx for num_scene_point // num_sample_point times
sample_prob = np.array(num_point_all) / float(np.sum(num_point_all))
num_iter = int(np.sum(num_point_all) / float(self.num_points))
scene_idxs = []
for idx in range(len(self.data_infos)):
scene_idxs.extend([idx] * round(sample_prob[idx] * num_iter))
scene_idxs = np.array(scene_idxs).astype(np.int32)
# calculate label weight, adopted from PointNet++
label_weight = label_weight[:-1].astype(np.float32)
label_weight = label_weight / label_weight.sum()
label_weight = self.label_weight_func(label_weight).astype(np.float32)
return scene_idxs, label_weight
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment