Commit 0da847e2 authored by yan.yan's avatar yan.yan
Browse files

working on spconv 2.2

parents bf011c76 fe4a2e61
# Changelog # Changelog
## [2.1.12] - 2021-11-23
### Added
- Add a method for voxel generator to get pc_voxel_id, which is usually used in semantic segmentation
### Fixed
- Fix a bug in cuda voxel generater when max_voxels is smaller than real number of voxels
## [2.1.11] - 2021-11-22
### Fixed
- Fixed a bug Volta kernels (TITAN V, Tesla V100), backward weight kernels use f16 as accumulator. we should use f32.
- Fixed a corner case when user use kernel size = 1x1 but stride != 1.
- Fixed a corner case when input feature is non-contiguous when maxpool.
## [2.1.10] - 2021-11-19
### Fixed
- Fixed a bug in utils.PointToVoxel, shouldn't get cuda stream in cpu code
## [2.1.9] - 2021-11-18
### Removed
- Remove a wrong assert
## [2.1.8] - 2021-11-15 ## [2.1.8] - 2021-11-15
### Added ### Added
- Add support for pytorch 1.5 - Add support for pytorch 1.5
......
...@@ -82,6 +82,8 @@ class ExampleNet(nn.Module): ...@@ -82,6 +82,8 @@ class ExampleNet(nn.Module):
Inverse sparse convolution means "inv" of sparse convolution. the output of inverse convolution contains same indices as input of sparse convolution. Inverse sparse convolution means "inv" of sparse convolution. the output of inverse convolution contains same indices as input of sparse convolution.
**WARNING** ```SparseInverseConv``` isn't equivalent to ```SparseConvTranspose```. SparseConvTranspose is equivalent to ```ConvTranspose``` in pytorch, but SparseInverseConv isn't.
Inverse convolution usually used in semantic segmentation. Inverse convolution usually used in semantic segmentation.
```Python ```Python
...@@ -112,8 +114,10 @@ voxel generator in spconv generate indices in **ZYX** order, the params format a ...@@ -112,8 +114,10 @@ voxel generator in spconv generate indices in **ZYX** order, the params format a
generated indices don't include batch axis, you need to add it by yourself. generated indices don't include batch axis, you need to add it by yourself.
see examples/voxel_gen.py for examples.
```Python ```Python
from spconv.pytorch.utils import PointToVoxel from spconv.pytorch.utils import PointToVoxel, gather_features_by_pc_voxel_id
# this generator generate ZYX indices. # this generator generate ZYX indices.
gen = PointToVoxel( gen = PointToVoxel(
vsize_xyz=[0.1, 0.1, 0.1], vsize_xyz=[0.1, 0.1, 0.1],
...@@ -123,5 +127,14 @@ gen = PointToVoxel( ...@@ -123,5 +127,14 @@ gen = PointToVoxel(
max_num_points_per_voxel=5) max_num_points_per_voxel=5)
pc = np.random.uniform(-10, 10, size=[1000, 3]) pc = np.random.uniform(-10, 10, size=[1000, 3])
pc_th = torch.from_numpy(pc) pc_th = torch.from_numpy(pc)
voxels, coords, num_points_per_voxel = gen(pc_th) voxels, coords, num_points_per_voxel = gen(pc_th, empty_mean=True)
``` ```
If you want to get label for every point of your pc, you need to use another function to get pc_voxel_id and gather features from sematic segmentation result:
```Python
voxels, coords, num_points_per_voxel, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
seg_features = YourSegNet(...)
# if voxel id is invalid (point out of range, or no space left in a voxel)
# features will be zero.
point_features = gather_features_by_pc_voxel_id(seg_features, pc_voxel_id)
```
\ No newline at end of file
...@@ -16,7 +16,7 @@ import numpy as np ...@@ -16,7 +16,7 @@ import numpy as np
from cumm import tensorview as tv from cumm import tensorview as tv
from spconv.utils import Point2VoxelCPU3d from spconv.utils import Point2VoxelCPU3d
from spconv.pytorch.utils import PointToVoxel from spconv.pytorch.utils import PointToVoxel, gather_features_by_pc_voxel_id
import torch import torch
def main_pytorch_voxel_gen(): def main_pytorch_voxel_gen():
...@@ -50,36 +50,60 @@ def main_pytorch_voxel_gen(): ...@@ -50,36 +50,60 @@ def main_pytorch_voxel_gen():
def main_pytorch_voxel_gen_cuda(): def main_pytorch_voxel_gen_cuda():
np.random.seed(50051)
# voxel gen source code: spconv/csrc/sparse/pointops.py
pc = np.random.uniform(-2, 8, size=[1000, 3]).astype(np.float32)
for device in [torch.device("cuda:0"), torch.device("cpu:0")]:
gen = PointToVoxel(vsize_xyz=[0.25, 0.25, 0.25],
coors_range_xyz=[0, 0, 0, 10, 10, 10],
num_point_features=3,
max_num_voxels=5000,
max_num_points_per_voxel=5,
device=device)
pc_th = torch.from_numpy(pc).to(device)
voxels_th, indices_th, num_p_in_vx_th = gen(pc_th)
voxels_np = voxels_th.cpu().numpy()
indices_np = indices_th.cpu().numpy()
num_p_in_vx_np = num_p_in_vx_th.cpu().numpy()
print(f"------{device} Raw Voxels {voxels_np.shape[0]}-------")
print(voxels_np[0])
# run voxel gen and FILL MEAN VALUE to voxel remain
voxels_tv, indices_tv, num_p_in_vx_tv = gen(pc_th, empty_mean=True)
voxels_np = voxels_tv.cpu().numpy()
indices_np = indices_tv.cpu().numpy()
num_p_in_vx_np = num_p_in_vx_tv.cpu().numpy()
print(f"------{device} Voxels with mean filled-------")
print(voxels_np[0])
voxels_th, indices_th, num_p_in_vx_th, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
print(f"------{device} Reconstruct Indices From Voxel ids for every point-------")
indices_th_float = indices_th.float()
# we gather indices by voxel_id to see correctness of voxel id.
indices_th_voxel_id = gather_features_by_pc_voxel_id(indices_th_float, pc_voxel_id)
indices_th_voxel_id_np = indices_th_voxel_id[:10].cpu().numpy()
print(pc[:10])
print(indices_th_voxel_id_np[:, ::-1] / 4)
def main_gather_features_by_pc_voxel_id():
np.random.seed(50051) np.random.seed(50051)
# voxel gen source code: spconv/csrc/sparse/pointops.py # voxel gen source code: spconv/csrc/sparse/pointops.py
device = torch.device("cuda:0") device = torch.device("cuda:0")
gen = PointToVoxel(vsize_xyz=[0.1, 0.1, 0.1], gen = PointToVoxel(vsize_xyz=[0.25, 0.25, 0.25],
coors_range_xyz=[-80, -80, -6, 80, 80, 6], coors_range_xyz=[-10, -10, -10, 10, 10, 10],
num_point_features=3, num_point_features=3,
max_num_voxels=5000, max_num_voxels=2000,
max_num_points_per_voxel=5, max_num_points_per_voxel=5,
device=device) device=device)
pc = np.random.uniform(-4, 4, size=[1000, 3]).astype(np.float32) pc = np.random.uniform(-8, 8, size=[5000, 3]).astype(np.float32)
pc_th = torch.from_numpy(pc).to(device) pc_th = torch.from_numpy(pc).to(device)
voxels_th, indices_th, num_p_in_vx_th = gen(pc_th)
voxels_np = voxels_th.cpu().numpy()
indices_np = indices_th.cpu().numpy()
num_p_in_vx_np = num_p_in_vx_th.cpu().numpy()
print(f"------Raw Voxels {voxels_np.shape[0]}-------")
print(voxels_np[0])
# run voxel gen and FILL MEAN VALUE to voxel remain
voxels_tv, indices_tv, num_p_in_vx_tv = gen(pc_th, empty_mean=True)
voxels_np = voxels_tv.cpu().numpy()
indices_np = indices_tv.cpu().numpy()
num_p_in_vx_np = num_p_in_vx_tv.cpu().numpy()
print("------Voxels with mean filled-------")
print(voxels_np[0])
voxels_th, indices_th, num_p_in_vx_th, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
print("------Voxel ids for every point-------")
print(pc[:10])
print(indices_th[pc_voxel_id[:10]])
voxels_th, indices_th, num_p_in_vx_th, pc_voxel_id = gen.generate_voxel_with_id(pc_th, empty_mean=True)
res_features_from_seg = torch.zeros((voxels_th.shape[0], 128), dtype=torch.float32, device=device)
pc_features = gather_features_by_pc_voxel_id(res_features_from_seg, pc_voxel_id)
print(pc.shape, pc_features.shape)
def main(): def main():
np.random.seed(50051) np.random.seed(50051)
...@@ -172,3 +196,4 @@ if __name__ == "__main__": ...@@ -172,3 +196,4 @@ if __name__ == "__main__":
if torch.cuda.is_available(): if torch.cuda.is_available():
main_cuda() main_cuda()
main_pytorch_voxel_gen_cuda() main_pytorch_voxel_gen_cuda()
main_gather_features_by_pc_voxel_id()
...@@ -445,7 +445,6 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin): ...@@ -445,7 +445,6 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
int64_t expected_hash_data_num = points.dim(0) * 2; int64_t expected_hash_data_num = points.dim(0) * 2;
TV_ASSERT_RT_ERR(hashdata.dim(0) >= expected_hash_data_num, "hash table too small") TV_ASSERT_RT_ERR(hashdata.dim(0) >= expected_hash_data_num, "hash table too small")
TV_ASSERT_RT_ERR(point_indice_data.dim(0) >= points.dim(0), "point_indice_data too small") TV_ASSERT_RT_ERR(point_indice_data.dim(0) >= points.dim(0), "point_indice_data too small")
// auto timer = tv::CudaContextTimer<>();
num_per_voxel.zero_(ctx); num_per_voxel.zero_(ctx);
table_t hash = table_t(hashdata.data_ptr<pair_t>(), expected_hash_data_num); table_t hash = table_t(hashdata.data_ptr<pair_t>(), expected_hash_data_num);
hash.clear(custream); hash.clear(custream);
...@@ -462,14 +461,12 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin): ...@@ -462,14 +461,12 @@ class Point2Voxel(pccm.ParameterizedClass, pccm.pybind.PybindClassMixin):
layout, voxels.dim(0)); layout, voxels.dim(0));
auto count_cpu = count.cpu(); auto count_cpu = count.cpu();
int count_val = count_cpu.item<int32_t>(); int count_val = count_cpu.item<int32_t>();
// tv::ssprint("assign_table", timer.report()); count_val = count_val > voxels.dim(0) ? voxels.dim(0) : count_val;
launcher(kernel::generate_voxel<table_t>, hash, points.data_ptr<const {self.dtype}>(), launcher(kernel::generate_voxel<table_t>, hash, points.data_ptr<const {self.dtype}>(),
point_indice_data.data_ptr<const int64_t>(), voxels.data_ptr<{self.dtype}>(), point_indice_data.data_ptr<const int64_t>(), voxels.data_ptr<{self.dtype}>(),
num_per_voxel.data_ptr<int>(), points_voxel_id.data_ptr<int64_t>(), points.dim(1), voxels.dim(1), num_per_voxel.data_ptr<int>(), points_voxel_id.data_ptr<int64_t>(), points.dim(1), voxels.dim(1),
voxels.dim(0), vsize_tv, coors_range_tv, voxels.dim(0), vsize_tv, coors_range_tv,
grid_size_tv, grid_stride_tv, points.dim(0)); grid_size_tv, grid_stride_tv, points.dim(0));
// tv::ssprint("generate_voxel", timer.report());
auto voxel_launcher = tv::cuda::Launch(count_val, custream); auto voxel_launcher = tv::cuda::Launch(count_val, custream);
if (empty_mean){{ if (empty_mean){{
launcher(kernel::voxel_empty_fill_mean, voxels.data_ptr<{self.dtype}>(), launcher(kernel::voxel_empty_fill_mean, voxels.data_ptr<{self.dtype}>(),
......
...@@ -124,9 +124,11 @@ class SparseConvolution(SparseModule): ...@@ -124,9 +124,11 @@ class SparseConvolution(SparseModule):
self.out_channels = out_channels self.out_channels = out_channels
self.kernel_size = kernel_size self.kernel_size = kernel_size
kv = int(np.prod(kernel_size)) kv = int(np.prod(kernel_size))
kv_stride = int(np.prod(kernel_size)) kv_stride = int(np.prod(stride))
self.conv1x1 = kv == 1
self.conv1x1 = kv == 1 and kv_stride == 1 # TODO we should deprecate support for ksize == 1 but stride != 1.
if not subm:
self.conv1x1 &= kv_stride == 1
self.stride = stride self.stride = stride
self.padding = padding self.padding = padding
self.dilation = dilation self.dilation = dilation
...@@ -296,6 +298,8 @@ class SparseConvolution(SparseModule): ...@@ -296,6 +298,8 @@ class SparseConvolution(SparseModule):
if self.bias is not None: if self.bias is not None:
features += self.bias features += self.bias
out_tensor = out_tensor.replace_feature(features) out_tensor = out_tensor.replace_feature(features)
# padding may change spatial shape of conv 1x1.
out_tensor.spatial_shape = out_spatial_shape
return out_tensor return out_tensor
indice_dict = input.indice_dict.copy() indice_dict = input.indice_dict.copy()
......
...@@ -825,6 +825,9 @@ def indice_conv_backward(features: torch.Tensor, ...@@ -825,6 +825,9 @@ def indice_conv_backward(features: torch.Tensor,
filter_shape_per_kv = [out_channel, filters.shape[-1]] filter_shape_per_kv = [out_channel, filters.shape[-1]]
kv_center = kv // 2 kv_center = kv // 2
# TODO handle this in nn.Module to make sure features in backward is contiguous
if not features.is_contiguous():
features = features.contiguous()
if not out_bp.is_contiguous(): if not out_bp.is_contiguous():
out_bp = out_bp.contiguous() out_bp = out_bp.contiguous()
assert out_bp.is_contiguous() assert out_bp.is_contiguous()
...@@ -1246,6 +1249,9 @@ def implicit_gemm_backward(features: torch.Tensor, ...@@ -1246,6 +1249,9 @@ def implicit_gemm_backward(features: torch.Tensor,
raise NotImplementedError("work in progress") raise NotImplementedError("work in progress")
if not out_bp.is_contiguous(): if not out_bp.is_contiguous():
out_bp = out_bp.contiguous() out_bp = out_bp.contiguous()
if not features.is_contiguous():
features = features.contiguous()
assert out_bp.is_contiguous() assert out_bp.is_contiguous()
assert filters.is_contiguous() assert filters.is_contiguous()
assert features.is_contiguous() assert features.is_contiguous()
...@@ -1450,6 +1456,9 @@ def indice_maxpool_backward(features, out_features, out_bp, indice_pairs, ...@@ -1450,6 +1456,9 @@ def indice_maxpool_backward(features, out_features, out_bp, indice_pairs,
indice_pair_num_cpu = indice_pair_num.cpu().tolist() indice_pair_num_cpu = indice_pair_num.cpu().tolist()
if not out_bp.is_contiguous(): if not out_bp.is_contiguous():
out_bp = out_bp.contiguous() out_bp = out_bp.contiguous()
if not features.is_contiguous():
features = features.contiguous()
out_features_tv = torch_tensor_to_tv(out_features) out_features_tv = torch_tensor_to_tv(out_features)
features_tv = torch_tensor_to_tv(features) features_tv = torch_tensor_to_tv(features)
out_bp_tv = torch_tensor_to_tv(out_bp) out_bp_tv = torch_tensor_to_tv(out_bp)
...@@ -1509,6 +1518,9 @@ def indice_maxpool_implicit_gemm_backward(features, out_features, out_bp, ...@@ -1509,6 +1518,9 @@ def indice_maxpool_implicit_gemm_backward(features, out_features, out_bp,
assert features.is_cuda assert features.is_cuda
if not out_bp.is_contiguous(): if not out_bp.is_contiguous():
out_bp = out_bp.contiguous() out_bp = out_bp.contiguous()
if not features.is_contiguous():
features = features.contiguous()
stream = get_current_stream() stream = get_current_stream()
out_features_tv = torch_tensor_to_tv(out_features) out_features_tv = torch_tensor_to_tv(out_features)
features_tv = torch_tensor_to_tv(features) features_tv = torch_tensor_to_tv(features)
......
...@@ -156,3 +156,17 @@ class PointToVoxel(object): ...@@ -156,3 +156,17 @@ class PointToVoxel(object):
return (self.voxels[:num_voxels], self.indices[:num_voxels], return (self.voxels[:num_voxels], self.indices[:num_voxels],
self.num_per_voxel[:num_voxels], pc_voxel_id) self.num_per_voxel[:num_voxels], pc_voxel_id)
def gather_features_by_pc_voxel_id(seg_res_features: torch.Tensor, pc_voxel_id: torch.Tensor):
"""This function is used to gather segmentation result to match origin pc.
"""
if seg_res_features.device != pc_voxel_id.device:
pc_voxel_id = pc_voxel_id.to(seg_res_features.device)
res = torch.zeros((pc_voxel_id.shape[0], seg_res_features.shape[1]), dtype=seg_res_features.dtype, device=seg_res_features.device)
pc_voxel_id_valid = pc_voxel_id != -1
pc_voxel_id_valid_ids = torch.nonzero(pc_voxel_id_valid).view(-1)
seg_res_features_valid = seg_res_features[pc_voxel_id[pc_voxel_id_valid_ids]]
res[pc_voxel_id_valid_ids] = seg_res_features_valid
return res
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment