Commit 06a01f0f authored by yan.yan's avatar yan.yan
Browse files

merge master code

parents d03b947a 370334aa
# Changelog
## [2.1.19] - 2021-12-3
### Fixed
- Fix wrong arch assert in all kernels for old GPUs to make spconv work in sm_50 GPUs
## [2.1.18] - 2021-11-29
### Fixed
- Fix a small bug of spatial_shape.
- Fix a bug in PointToVoxel, we must always return a clone instead of a view.
## [2.1.17] - 2021-11-29
### Fixed
- Fix a bug in sparse add.
- Fix a serious bug in conv weight init.
### Added
- Add more wrong usage check
- Add insert_exist_keys for hash table
......
......@@ -48,7 +48,7 @@
Check [spconv 2.x algorithm introduction](docs/spconv2_algo.pdf) to understand sparse convolution algorithm in spconv 2.x!
**WARNING** spconv < 2.1.4 users need to upgrade your version to 2.1.4, it fix a serious bug in SparseInverseConvXd.
**WARNING** spconv < 2.1.18 users need to upgrade your version to 2.1.18, it fix a bug in conv weight init which cause std of inited weight too large, and a bug in PointToVoxel.
## Breaking changes in Spconv 2.x
......
......@@ -38,9 +38,9 @@ if cuda_ver:
cuda_ver = cuda_ver.replace(".", "") # 10.2 to 102
RELEASE_NAME += "-cu{}".format(cuda_ver)
deps = ["cumm-cu{}>=0.2.3".format(cuda_ver)]
deps = ["cumm-cu{}>=0.2.6".format(cuda_ver)]
else:
deps = ["cumm>=0.2.3"]
deps = ["cumm>=0.2.6"]
......
......@@ -196,7 +196,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (32, 128, 16), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -208,7 +208,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (32, 256, 8), (32, 64, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -220,7 +220,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (32, 64, 16), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -232,7 +232,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (32, 32, 32), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -244,7 +244,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (64, 256, 8), (32, 64, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -256,7 +256,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (64, 128, 8), (32, 64, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -268,7 +268,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (64, 64, 8), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -280,7 +280,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (64, 32, 16), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f16,f16"],
NHWC,
NHWC,
NHWC,
......@@ -292,7 +292,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (32, 128, 16), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -306,7 +306,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (32, 64, 16), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -318,7 +318,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (32, 32, 32), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -330,7 +330,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (64, 256, 8), (32, 64, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -342,7 +342,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (64, 128, 8), (32, 64, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -354,7 +354,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (64, 64, 8), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -366,7 +366,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (64, 32, 16), (32, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -378,7 +378,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (128, 128, 8), (32, 64, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......@@ -390,7 +390,7 @@ IMPLGEMM_SIMT_PARAMS = [
*gen_conv_params(ConvBwdWeight, (128, 64, 8), (64, 32, 8),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f32,f32,f32,f32,f32"],
2, ["f32,f32,f32,f32,f32", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
......
......@@ -34,6 +34,7 @@ from spconv.pytorch.core import IndiceData, SparseConvTensor, ImplicitGemmIndice
from spconv.pytorch.modules import SparseModule
from spconv.constants import SAVED_WEIGHT_LAYOUT, ALL_WEIGHT_IS_KRSC
from spconv.utils import nullcontext
from torch.nn.init import calculate_gain
FILTER_HWIO = False
......@@ -51,39 +52,6 @@ def expand_nd(val: Union[int, List[int], Tuple[int, ...]], ndim: int) -> List[in
return val
def _calculate_fan_in_and_fan_out_hwio(tensor, algo: ConvAlgo):
dimensions = tensor.ndimension()
if dimensions < 2:
raise ValueError(
"Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
)
if dimensions == 2: # Linear
fan_in = tensor.size(-2)
fan_out = tensor.size(-1)
else:
if algo == ConvAlgo.Native:
if FILTER_HWIO:
num_input_fmaps = tensor.size(-2)
num_output_fmaps = tensor.size(-1)
else:
num_input_fmaps = tensor.size(-1)
num_output_fmaps = tensor.size(-2)
receptive_field_size = 1
if tensor.dim() > 2:
receptive_field_size = tensor[..., 0, 0].numel()
else:
num_input_fmaps = tensor.size(-1)
num_output_fmaps = tensor.size(0)
receptive_field_size = 1
if tensor.dim() > 2:
receptive_field_size = int(np.prod(tensor.shape[1:-1]))
fan_in = num_input_fmaps * receptive_field_size
fan_out = num_output_fmaps * receptive_field_size
return fan_in, fan_out
class SparseConvolution(SparseModule):
......@@ -115,15 +83,18 @@ class SparseConvolution(SparseModule):
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = expand_nd(ndim, kernel_size)
kv = int(np.prod(kernel_size))
kv_stride = int(np.prod(stride))
self.stride = expand_nd(ndim, stride)
kv = int(np.prod(self.kernel_size))
kv_stride = int(np.prod(self.stride))
self.dilation = expand_nd(ndim, dilation)
self.padding = expand_nd(ndim, padding)
self.conv1x1 = kv == 1
# TODO we should deprecate support for ksize == 1 but stride != 1.
if not subm:
self.conv1x1 &= kv_stride == 1
self.stride = expand_nd(ndim, stride)
self.padding = expand_nd(ndim, padding)
self.dilation = expand_nd(ndim, dilation)
if self.conv1x1:
assert self.padding == [0] * ndim, "padding must be zero for 1x1 conv (k=1,s=1)"
self.transposed = transposed
self.inverse = inverse
self.output_padding = expand_nd(ndim, output_padding)
......@@ -212,20 +183,39 @@ class SparseConvolution(SparseModule):
s += f', algo={self.algo}'
return s.format(**self.__dict__)
def _calculate_fan_in_and_fan_out(self):
receptive_field_size = 1
# math.prod is not always available, accumulate the product manually
# we could use functools.reduce but that is not supported by TorchScript
for s in self.kernel_size:
receptive_field_size *= s
fan_in = self.in_channels * receptive_field_size
fan_out = self.out_channels * receptive_field_size
return fan_in, fan_out
def _calculate_correct_fan(self, mode):
mode = mode.lower()
valid_modes = ['fan_in', 'fan_out']
if mode not in valid_modes:
raise ValueError("Mode {} not supported, please use one of {}".format(mode, valid_modes))
fan_in, fan_out = self._calculate_fan_in_and_fan_out()
return fan_in if mode == 'fan_in' else fan_out
def _custom_kaiming_uniform_(self, tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'):
r"""same as torch.init.kaiming_uniform_, with KRSC layout support
"""
fan = self._calculate_correct_fan(mode)
gain = calculate_gain(nonlinearity, a)
std = gain / math.sqrt(fan)
bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation
with torch.no_grad():
return tensor.uniform_(-bound, bound)
def reset_parameters(self):
n = self.in_channels
# following commented code is used to make weight different layout have same value
# if self.algo != ConvAlgo.Native:
# weight2 = self.weight.data.permute(1, 2, 3, 0,
# 4).contiguous().clone()
# init.uniform_(weight2, 0, 0.001)
# self.weight.data[:] = weight2.permute(3, 0, 1, 2, 4)
# else:
# init.uniform_(self.weight, 0, 0.001)
init.kaiming_uniform_(self.weight, a=math.sqrt(0.005))
self._custom_kaiming_uniform_(self.weight, a=math.sqrt(5))
if self.bias is not None:
fan_in, _ = _calculate_fan_in_and_fan_out_hwio(
self.weight, self.algo)
fan_in, _ = self._calculate_fan_in_and_fan_out()
bound = 1 / math.sqrt(fan_in)
init.uniform_(self.bias, -bound, bound)
......@@ -318,14 +308,14 @@ class SparseConvolution(SparseModule):
indice_pairs = datas.indice_pairs
indice_pair_num = datas.indice_pair_num
out_spatial_shape = datas.spatial_shape
assert indice_pair_num.shape[0] == np.prod(
self.kernel_size
), "inverse conv must have same kernel size as its couple conv"
assert datas.ksize == self.kernel_size, "inverse conv must have same kernel size as its couple conv"
else:
if self.indice_key is not None and datas is not None:
outids = datas.out_indices
indice_pairs = datas.indice_pairs
indice_pair_num = datas.indice_pair_num
assert self.subm, "only support reuse subm indices"
self._check_subm_reuse_valid(input, spatial_shape, datas)
else:
if input.benchmark:
torch.cuda.synchronize()
......@@ -416,19 +406,8 @@ class SparseConvolution(SparseModule):
mask_argsort_fwd_splits = datas.mask_argsort_fwd_splits
mask_argsort_bwd_splits = datas.mask_argsort_bwd_splits
masks = datas.masks
assert datas.is_subm, "only support reuse subm indices"
if self.kernel_size != datas.ksize:
raise ValueError(f"subm with same indice_key must have same kernel"
f" size, expect {datas.ksize}, this layer {self.kernel_size}")
if self.dilation != datas.dilation:
raise ValueError(f"subm with same indice_key must have same dilation"
f", expect {datas.dilation}, this layer {self.dilation}")
if input.spatial_shape != datas.spatial_shape:
raise ValueError(f"subm with same indice_key must have same spatial structure"
f", expect {datas.spatial_shape}, input {spatial_shape}")
if input.indices.shape[0] != datas.indices.shape[0]:
raise ValueError(f"subm with same indice_key must have same num of indices"
f", expect {datas.indices.shape[0]}, input {input.indices.shape[0]}")
assert self.subm, "only support reuse subm indices"
self._check_subm_reuse_valid(input, spatial_shape, datas)
else:
with input._timer.namespace("gen_pairs"):
......@@ -518,6 +497,22 @@ class SparseConvolution(SparseModule):
return out_tensor
def _check_subm_reuse_valid(self, inp: SparseConvTensor, spatial_shape: List[int], datas: Union[ImplicitGemmIndiceData, IndiceData]):
assert datas.is_subm, "only support reuse subm indices"
if self.kernel_size != datas.ksize:
raise ValueError(f"subm with same indice_key must have same kernel"
f" size, expect {datas.ksize}, this layer {self.kernel_size}")
if self.dilation != datas.dilation:
raise ValueError(f"subm with same indice_key must have same dilation"
f", expect {datas.dilation}, this layer {self.dilation}")
if inp.spatial_shape != datas.spatial_shape:
raise ValueError(f"subm with same indice_key must have same spatial structure"
f", expect {datas.spatial_shape}, input {spatial_shape}")
if inp.indices.shape[0] != datas.indices.shape[0]:
raise ValueError(f"subm with same indice_key must have same num of indices"
f", expect {datas.indices.shape[0]}, input {inp.indices.shape[0]}")
class SparseConv1d(SparseConvolution):
def __init__(self,
in_channels,
......
......@@ -124,7 +124,7 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
def __init__(self,
features: torch.Tensor,
indices: torch.Tensor,
spatial_shape: List[int],
spatial_shape: Union[List[int], np.ndarray],
batch_size: int,
grid: Optional[torch.Tensor] = None,
voxel_num: Optional[torch.Tensor] = None,
......@@ -154,7 +154,7 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
assert batch_size > 0
self._features = features
self.indices = indices
self.spatial_shape = spatial_shape
self.spatial_shape = [int(v) for v in spatial_shape]
self.batch_size = batch_size
if indice_dict is None:
indice_dict = {}
......@@ -253,12 +253,14 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
tensor.force_algo = self.force_algo
return tensor
def expand_nd(ndim: int, val: Union[int, List[int], Tuple[int, ...]]) -> List[int]:
def expand_nd(ndim: int, val: Union[int, List[int], Tuple[int, ...], np.ndarray]) -> List[int]:
if isinstance(val, int):
res = [val] * ndim
elif isinstance(val, tuple):
res = list(val)
elif isinstance(val, np.ndarray):
res = list(val)
else:
res = val
assert len(res) == ndim
return res
return [int(v) for v in res]
......@@ -79,7 +79,7 @@ class HashTable:
def query(self, keys: torch.Tensor, values: Optional[torch.Tensor] = None):
"""query value by keys, if values is not None, create a new one.
return values and a uint8 tensor that whether query success.
return values and a uint8 tensor that whether query fail.
"""
keys_tv = torch_tensor_to_tv(keys)
if values is None:
......@@ -95,17 +95,17 @@ class HashTable:
def insert_exist_keys(self, keys: torch.Tensor, values: torch.Tensor):
"""insert kv that k exists in table. return a uint8 tensor that
whether insert success.
whether insert fail.
"""
keys_tv = torch_tensor_to_tv(keys)
values_tv = torch_tensor_to_tv(values)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
is_success = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_success_tv = torch_tensor_to_tv(is_success)
self._table.insert_exist_keys(keys_tv, values_tv, is_success_tv, stream)
return is_success
is_empty = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_empty_tv = torch_tensor_to_tv(is_empty)
self._table.insert_exist_keys(keys_tv, values_tv, is_empty_tv, stream)
return is_empty
def assign_arange_(self):
"""iterate table, assign values with "arange" value.
......
......@@ -153,8 +153,8 @@ class PointToVoxel(object):
clear_voxels)
num_voxels = res[0].shape[0]
return (self.voxels[:num_voxels], self.indices[:num_voxels],
self.num_per_voxel[:num_voxels], pc_voxel_id)
return (self.voxels[:num_voxels].clone(), self.indices[:num_voxels].clone(),
self.num_per_voxel[:num_voxels].clone(), pc_voxel_id)
def gather_features_by_pc_voxel_id(seg_res_features: torch.Tensor, pc_voxel_id: torch.Tensor, invalid_value: Union[int, float] = 0):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment