Commit d03b947a authored by yan.yan's avatar yan.yan
Browse files

Merge branch 'master' into develop

parents 9d1e33d6 8aa0f1f7
......@@ -14,6 +14,7 @@
import math
import time
import sys
from typing import List, Optional, Tuple, Union
import numpy as np
......@@ -25,10 +26,11 @@ from torch.nn.parameter import Parameter
from spconv import pytorch as spconv
from spconv import SPCONV_VERSION_NUMBERS
from spconv.core import ConvAlgo
from spconv.debug_utils import spconv_save_debug_data
from spconv.pytorch import functional as Fsp
from spconv.pytorch import ops
from spconv.cppconstants import CPU_ONLY_BUILD
from spconv.pytorch.core import IndiceData, SparseConvTensor, ImplicitGemmIndiceData
from spconv.pytorch.core import IndiceData, SparseConvTensor, ImplicitGemmIndiceData, expand_nd
from spconv.pytorch.modules import SparseModule
from spconv.constants import SAVED_WEIGHT_LAYOUT, ALL_WEIGHT_IS_KRSC
from spconv.utils import nullcontext
......@@ -109,32 +111,22 @@ class SparseConvolution(SparseModule):
name=None):
super(SparseConvolution, self).__init__(name=name)
assert groups == 1, "don't support groups for now"
if not isinstance(kernel_size, (list, tuple)):
kernel_size = [kernel_size] * ndim
if not isinstance(stride, (list, tuple)):
stride = [stride] * ndim
if not isinstance(padding, (list, tuple)):
padding = [padding] * ndim
if not isinstance(dilation, (list, tuple)):
dilation = [dilation] * ndim
if not isinstance(output_padding, (list, tuple)):
output_padding = [output_padding] * ndim
self.ndim = ndim
self.in_channels = in_channels
self.out_channels = out_channels
self.kernel_size = kernel_size
self.kernel_size = expand_nd(ndim, kernel_size)
kv = int(np.prod(kernel_size))
kv_stride = int(np.prod(stride))
self.conv1x1 = kv == 1
# TODO we should deprecate support for ksize == 1 but stride != 1.
if not subm:
self.conv1x1 &= kv_stride == 1
self.stride = stride
self.padding = padding
self.dilation = dilation
self.stride = expand_nd(ndim, stride)
self.padding = expand_nd(ndim, padding)
self.dilation = expand_nd(ndim, dilation)
self.transposed = transposed
self.inverse = inverse
self.output_padding = output_padding
self.output_padding = expand_nd(ndim, output_padding)
self.groups = groups
self.subm = subm
self.indice_key = indice_key
......@@ -156,15 +148,15 @@ class SparseConvolution(SparseModule):
if FILTER_HWIO:
# RSCK
self.weight = Parameter(
torch.Tensor(*kernel_size, in_channels, out_channels))
torch.Tensor(*self.kernel_size, in_channels, out_channels))
else:
# RSKC
self.weight = Parameter(
torch.Tensor(*kernel_size, out_channels, in_channels))
torch.Tensor(*self.kernel_size, out_channels, in_channels))
else:
# KRSC
self.weight = Parameter(
torch.Tensor(out_channels, *kernel_size, in_channels))
torch.Tensor(out_channels, *self.kernel_size, in_channels))
if bias:
self.bias = Parameter(torch.Tensor(out_channels))
......@@ -338,11 +330,21 @@ class SparseConvolution(SparseModule):
if input.benchmark:
torch.cuda.synchronize()
t = time.time()
outids, indice_pairs, indice_pair_num = ops.get_indice_pairs(
indices, batch_size, spatial_shape, algo,
self.kernel_size, self.stride, self.padding,
self.dilation, self.output_padding, self.subm,
self.transposed)
try:
outids, indice_pairs, indice_pair_num = ops.get_indice_pairs(
indices, batch_size, spatial_shape, algo,
self.kernel_size, self.stride, self.padding,
self.dilation, self.output_padding, self.subm,
self.transposed)
except Exception as e:
msg = "[Exception|native_pair]"
msg += f"indices={indices.shape},bs={batch_size},ss={spatial_shape},"
msg += f"algo={algo},ksize={self.kernel_size},stride={self.stride},"
msg += f"padding={self.padding},dilation={self.dilation},subm={self.subm},"
msg += f"transpose={self.transposed}"
print(msg, file=sys.stderr)
spconv_save_debug_data(indices)
raise e
if input.benchmark:
torch.cuda.synchronize()
interval = time.time() - t
......@@ -356,7 +358,11 @@ class SparseConvolution(SparseModule):
spatial_shape,
out_spatial_shape,
is_subm=self.subm,
algo=algo)
algo=algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation)
if self.indice_key is not None:
msg = f"your indice key {self.indice_key} already exists in this sparse tensor."
assert self.indice_key not in indice_dict, msg
......@@ -399,10 +405,7 @@ class SparseConvolution(SparseModule):
mask_argsort_bwd_splits = datas.mask_argsort_fwd_splits
masks = datas.masks
out_spatial_shape = datas.spatial_shape
assert datas.pair_fwd.shape[0] == np.prod(
self.kernel_size
), "inverse conv must have same kernel size as its couple conv"
assert datas.ksize == self.kernel_size, "inverse conv must have same kernel size as its couple conv"
else:
if self.indice_key is not None and datas is not None:
outids = datas.out_indices
......@@ -413,25 +416,50 @@ class SparseConvolution(SparseModule):
mask_argsort_fwd_splits = datas.mask_argsort_fwd_splits
mask_argsort_bwd_splits = datas.mask_argsort_bwd_splits
masks = datas.masks
assert datas.is_subm, "only support reuse subm indices"
if self.kernel_size != datas.ksize:
raise ValueError(f"subm with same indice_key must have same kernel"
f" size, expect {datas.ksize}, this layer {self.kernel_size}")
if self.dilation != datas.dilation:
raise ValueError(f"subm with same indice_key must have same dilation"
f", expect {datas.dilation}, this layer {self.dilation}")
if input.spatial_shape != datas.spatial_shape:
raise ValueError(f"subm with same indice_key must have same spatial structure"
f", expect {datas.spatial_shape}, input {spatial_shape}")
if input.indices.shape[0] != datas.indices.shape[0]:
raise ValueError(f"subm with same indice_key must have same num of indices"
f", expect {datas.indices.shape[0]}, input {input.indices.shape[0]}")
else:
with input._timer.namespace("gen_pairs"):
# we need to gen bwd indices for regular conv
# because it may be inversed.
res = ops.get_indice_pairs_implicit_gemm(
indices,
batch_size,
spatial_shape,
algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
out_padding=self.output_padding,
subm=self.subm,
transpose=self.transposed,
is_train=(not self.subm) or self.training,
alloc=input.thrust_allocator,
timer=input._timer)
try:
res = ops.get_indice_pairs_implicit_gemm(
indices,
batch_size,
spatial_shape,
algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
out_padding=self.output_padding,
subm=self.subm,
transpose=self.transposed,
is_train=(not self.subm) or self.training,
alloc=input.thrust_allocator,
timer=input._timer)
except Exception as e:
msg = "[Exception|implicit_gemm_pair]"
msg += f"indices={indices.shape},bs={batch_size},ss={spatial_shape},"
msg += f"algo={algo},ksize={self.kernel_size},stride={self.stride},"
msg += f"padding={self.padding},dilation={self.dilation},subm={self.subm},"
msg += f"transpose={self.transposed}"
print(msg, file=sys.stderr)
spconv_save_debug_data(indices)
raise e
outids = res[0]
num_inds_per_loc = res[1]
pair_fwd = res[2]
......@@ -455,7 +483,11 @@ class SparseConvolution(SparseModule):
is_subm=self.subm,
spatial_shape=spatial_shape,
out_spatial_shape=out_spatial_shape,
algo=algo)
algo=algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation)
msg = f"your indice key {self.indice_key} already exists in this sparse tensor."
assert self.indice_key not in indice_dict, msg
indice_dict[self.indice_key] = indice_data
......
......@@ -12,13 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List, Optional, Union
from typing import List, Optional, Tuple, Union
import numpy as np
import torch
from spconv.core import ConvAlgo
from spconv.pytorch.constants import PYTORCH_VERSION
from spconv.pytorch.ops import ThrustSortAllocator
from spconv.tools import CUDAKernelTimer
if PYTORCH_VERSION >= [1, 8, 0]:
......@@ -39,9 +38,28 @@ else:
pass
class ThrustSortAllocator:
def __init__(self, device: torch.device) -> None:
super().__init__()
self.alloced_objs = {}
self.device = device
def alloc(self, n: int):
if n in self.alloced_objs:
return self.alloced_objs[n].data_ptr()
for n_cur, ten in self.alloced_objs.items():
if n < n_cur:
return ten.data_ptr()
ten = torch.empty([n], dtype=torch.uint8, device=self.device)
self.alloced_objs[n] = ten
return ten.data_ptr()
class IndiceData(object):
def __init__(self, out_indices, indices, indice_pairs, indice_pair_num,
spatial_shape, out_spatial_shape, is_subm: bool, algo: ConvAlgo):
spatial_shape, out_spatial_shape, is_subm: bool, algo: ConvAlgo,
ksize: List[int], stride: List[int], dilation: List[int], padding: List[int]):
self.out_indices = out_indices
self.indices = indices
self.indice_pairs = indice_pairs
......@@ -50,6 +68,10 @@ class IndiceData(object):
self.out_spatial_shape = out_spatial_shape
self.is_subm = is_subm
self.algo = algo
self.ksize = ksize
self.stride = stride
self.dilation = dilation
self.padding = padding
class ImplicitGemmIndiceData(object):
......@@ -60,7 +82,8 @@ class ImplicitGemmIndiceData(object):
mask_argsort_fwd_splits: List[torch.Tensor],
mask_argsort_bwd_splits: List[torch.Tensor],
masks: List[np.ndarray], spatial_shape,
out_spatial_shape, is_subm: bool, algo: ConvAlgo):
out_spatial_shape, is_subm: bool, algo: ConvAlgo,
ksize: List[int], stride: List[int], dilation: List[int], padding: List[int]):
self.out_indices = out_indices
self.indices = indices
self.pair_fwd = pair_fwd
......@@ -74,6 +97,10 @@ class ImplicitGemmIndiceData(object):
self.out_spatial_shape = out_spatial_shape
self.is_subm = is_subm
self.algo = algo
self.ksize = ksize
self.stride = stride
self.dilation = dilation
self.padding = padding
def scatter_nd(indices, updates, shape):
......@@ -225,3 +252,13 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
tensor._timer = self._timer
tensor.force_algo = self.force_algo
return tensor
def expand_nd(ndim: int, val: Union[int, List[int], Tuple[int, ...]]) -> List[int]:
if isinstance(val, int):
res = [val] * ndim
elif isinstance(val, tuple):
res = list(val)
else:
res = val
assert len(res) == ndim
return res
......@@ -56,6 +56,9 @@ def torch_tensor_to_tv(ten: torch.Tensor,
return tv.from_blob(ptr, shape, dtype, tv_device)
return tv.from_blob_strided(ptr, shape, stride, dtype, tv_device)
def torch_tensors_to_tv(*tens: torch.Tensor):
return (torch_tensor_to_tv(t) for t in tens)
def get_current_stream():
return torch.cuda.current_stream().cuda_stream
......
......@@ -12,19 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import pickle
import torch
from torch import nn
from torch.autograd import Function
from typing import Optional, TypeVar
from spconv.pytorch.core import SparseConvTensor
from spconv.tools import CUDAKernelTimer
from spconv.pytorch import ops
from spconv.pytorch import ops, SparseConvTensor
from spconv.pytorch.constants import PYTORCH_VERSION
from spconv.debug_utils import spconv_save_debug_data
from torch.autograd.function import once_differentiable
import numpy as np
from pathlib import Path
from spconv.pytorch.hash import HashTable
from cumm.gemm.layout import to_stride
from typing import List
from functools import reduce
_MAX_INT32 = 2147483647
_T = TypeVar("_T")
......@@ -54,14 +62,22 @@ class SparseConvFunction(Function):
ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)
ctx.algo = algo
ctx.timer = timer
return ops.indice_conv(features,
filters,
indice_pairs,
indice_pair_num,
num_activate_out,
False,
algo=algo,
timer=timer)
try:
return ops.indice_conv(features,
filters,
indice_pairs,
indice_pair_num,
num_activate_out,
False,
algo=algo,
timer=timer)
except Exception as e:
msg = "[Exception|indice_conv]"
msg += f"feat={features.shape},w={filters.shape},pair={indice_pairs.shape},"
msg += f"pairnum={indice_pair_num},act={num_activate_out},algo={algo}"
print(msg, file=sys.stderr)
spconv_save_debug_data((indice_pairs, indice_pair_num))
raise e
@staticmethod
@once_differentiable
......@@ -69,15 +85,22 @@ class SparseConvFunction(Function):
def backward(ctx, grad_output):
indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors
timer = ctx.timer
input_bp, filters_bp = ops.indice_conv_backward(features,
filters,
grad_output,
indice_pairs,
indice_pair_num,
False,
algo=ctx.algo,
timer=timer)
try:
input_bp, filters_bp = ops.indice_conv_backward(features,
filters,
grad_output,
indice_pairs,
indice_pair_num,
False,
algo=ctx.algo,
timer=timer)
except Exception as e:
msg = "[Exception|indice_conv_backward]"
msg += f"feat={features.shape},w={filters.shape},pair={indice_pairs.shape},"
msg += f"pairnum={indice_pair_num},do={grad_output.shape}"
print(msg, file=sys.stderr)
spconv_save_debug_data((indice_pairs, indice_pair_num))
raise e
return input_bp, filters_bp, None, None, None, None, None
......@@ -96,16 +119,23 @@ class SparseInverseConvFunction(Function):
ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)
ctx.algo = algo
ctx.timer = timer
return ops.indice_conv(features,
filters,
indice_pairs,
indice_pair_num,
num_activate_out,
True,
False,
algo=algo,
timer=timer)
try:
return ops.indice_conv(features,
filters,
indice_pairs,
indice_pair_num,
num_activate_out,
True,
False,
algo=algo,
timer=timer)
except Exception as e:
msg = "[Exception|indice_conv|inverse]"
msg += f"feat={features.shape},w={filters.shape},pair={indice_pairs.shape},"
msg += f"pairnum={indice_pair_num},act={num_activate_out},algo={algo}"
print(msg, file=sys.stderr)
spconv_save_debug_data((indice_pairs, indice_pair_num))
raise e
@staticmethod
@once_differentiable
......@@ -113,16 +143,23 @@ class SparseInverseConvFunction(Function):
def backward(ctx, grad_output):
indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors
timer = ctx.timer
input_bp, filters_bp = ops.indice_conv_backward(features,
filters,
grad_output,
indice_pairs,
indice_pair_num,
True,
False,
algo=ctx.algo,
timer=timer)
try:
input_bp, filters_bp = ops.indice_conv_backward(features,
filters,
grad_output,
indice_pairs,
indice_pair_num,
True,
False,
algo=ctx.algo,
timer=timer)
except Exception as e:
msg = "[Exception|indice_conv_backward|inverse]"
msg += f"feat={features.shape},w={filters.shape},pair={indice_pairs.shape},"
msg += f"pairnum={indice_pair_num},do={grad_output.shape}"
print(msg, file=sys.stderr)
spconv_save_debug_data((indice_pairs, indice_pair_num))
raise e
return input_bp, filters_bp, None, None, None, None, None
......@@ -144,13 +181,23 @@ class SparseImplicitGemmFunction(Function):
is_train: bool,
is_subm: bool,
timer: CUDAKernelTimer = CUDAKernelTimer(False)):
out, mask_out, mask_width = ops.implicit_gemm(features, filters,
pair_fwd,
pair_mask_fwd_splits,
mask_argsort_fwd_splits,
num_activate_out, masks,
is_train, is_subm, timer)
try:
out, mask_out, mask_width = ops.implicit_gemm(features, filters,
pair_fwd,
pair_mask_fwd_splits,
mask_argsort_fwd_splits,
num_activate_out, masks,
is_train, is_subm, timer)
except Exception as e:
msg = "[Exception|implicit_gemm]"
msg += f"feat={features.shape},w={filters.shape},pair={pair_fwd.shape},"
msg += f"act={num_activate_out},issubm={is_subm},istrain={is_train}"
print(msg, file=sys.stderr)
spconv_save_debug_data((pair_fwd, pair_bwd, pair_mask_fwd_splits,
pair_mask_bwd_splits, mask_argsort_fwd_splits, mask_argsort_bwd_splits,
masks))
raise e
ctx.save_for_backward(features, filters, pair_fwd, pair_bwd)
ctx.mask_width = mask_width
ctx.mask_out = mask_out
......@@ -179,21 +226,32 @@ class SparseImplicitGemmFunction(Function):
masks = ctx.masks
is_subm = ctx.is_subm
timer = ctx.timer
input_bp, filters_bp = ops.implicit_gemm_backward(
features,
filters,
grad_output,
pair_fwd,
pair_bwd,
pair_mask_fwd_splits,
pair_mask_bwd_splits,
mask_argsort_fwd_splits,
mask_argsort_bwd_splits,
mask_output_fwd=mask_out,
masks=masks,
mask_width=mask_width,
is_subm=is_subm,
timer=timer)
try:
input_bp, filters_bp = ops.implicit_gemm_backward(
features,
filters,
grad_output,
pair_fwd,
pair_bwd,
pair_mask_fwd_splits,
pair_mask_bwd_splits,
mask_argsort_fwd_splits,
mask_argsort_bwd_splits,
mask_output_fwd=mask_out,
masks=masks,
mask_width=mask_width,
is_subm=is_subm,
timer=timer)
except Exception as e:
msg = "[Exception|implicit_gemm_backward]"
msg += f"feat={features.shape},w={filters.shape},pair={pair_fwd.shape},"
msg += f"issubm={is_subm},do={grad_output.shape}"
print(msg, file=sys.stderr)
spconv_save_debug_data((pair_fwd, pair_bwd, pair_mask_fwd_splits,
pair_mask_bwd_splits, mask_argsort_fwd_splits, mask_argsort_bwd_splits,
masks))
raise e
None_9 = [None] * 11
return (input_bp, filters_bp, *None_9)
......@@ -212,15 +270,23 @@ class SubMConvFunction(Function):
ctx.save_for_backward(indice_pairs, indice_pair_num, features, filters)
ctx.algo = algo
ctx.timer = timer
return ops.indice_conv(features,
filters,
indice_pairs,
indice_pair_num,
num_activate_out,
False,
True,
algo=algo,
timer=timer)
try:
return ops.indice_conv(features,
filters,
indice_pairs,
indice_pair_num,
num_activate_out,
False,
True,
algo=algo,
timer=timer)
except Exception as e:
msg = "[Exception|indice_conv|subm]"
msg += f"feat={features.shape},w={filters.shape},pair={indice_pairs.shape},"
msg += f"pairnum={indice_pair_num},act={num_activate_out},algo={algo}"
print(msg, file=sys.stderr)
spconv_save_debug_data((indice_pairs, indice_pair_num))
raise e
@staticmethod
@once_differentiable
......@@ -228,16 +294,24 @@ class SubMConvFunction(Function):
def backward(ctx, grad_output):
indice_pairs, indice_pair_num, features, filters = ctx.saved_tensors
timer = ctx.timer
try:
input_bp, filters_bp = ops.indice_conv_backward(features,
filters,
grad_output,
indice_pairs,
indice_pair_num,
False,
True,
algo=ctx.algo,
timer=timer)
except Exception as e:
msg = "[Exception|indice_conv_backward|subm]"
msg += f"feat={features.shape},w={filters.shape},pair={indice_pairs.shape},"
msg += f"pairnum={indice_pair_num},do={grad_output.shape}"
print(msg, file=sys.stderr)
spconv_save_debug_data((indice_pairs, indice_pair_num))
raise e
input_bp, filters_bp = ops.indice_conv_backward(features,
filters,
grad_output,
indice_pairs,
indice_pair_num,
False,
True,
algo=ctx.algo,
timer=timer)
return input_bp, filters_bp, None, None, None, None, None
......@@ -290,16 +364,98 @@ indice_maxpool = SparseMaxPoolFunction.apply
indice_maxpool_implicit_gemm = SparseMaxPoolImplicitGemmFunction.apply
def sparse_add(a: SparseConvTensor, b: SparseConvTensor):
a_th = torch.sparse_coo_tensor(a.indices.T, a.features)
b_th = torch.sparse_coo_tensor(b.indices.T, b.features)
a_shape = a.spatial_shape
b_shape = b.spatial_shape
res_shape = []
for sa, sb in zip(a_shape, b_shape):
res_shape.append(max(sa, sb))
c_th = a_th + b_th
c_th_inds = c_th.indices().T.contiguous()
assert c_th.is_contiguous()
return SparseConvTensor(c_th.values(), c_th_inds, res_shape, max(a.batch_size, b.batch_size))
def _indice_to_scalar(indices: torch.Tensor, shape: List[int]):
assert indices.shape[1] == len(shape)
stride = to_stride(np.array(shape, dtype=np.int64))
scalar_inds = indices[:, -1].clone()
for i in range(len(shape) - 1):
scalar_inds += stride[i] * indices[:, i]
return scalar_inds.contiguous()
def sparse_add_hash_based(*tens: SparseConvTensor):
""" sparse add with misaligned indices.
if you use sparse add, the indice_dict will be dropped and impossible
to use inverse.
There is only one situation that keep indices: there is one operand that
its indices is output indices.
"""
table_size = 0
max_num_indices = 0
max_num_indices_idx = 0
for i, ten in enumerate(tens):
assert ten.spatial_shape == tens[0].spatial_shape
assert ten.batch_size == tens[0].batch_size
assert ten.features.shape[1] == tens[0].features.shape[1]
table_size += ten.features.shape[0]
if max_num_indices < ten.features.shape[0]:
max_num_indices_idx = i
max_num_indices = ten.features.shape[0]
first = tens[0]
feat = first.features
shape = [first.batch_size, *first.spatial_shape]
whole_shape = int(np.prod(shape))
table_size *= 2
k_type = torch.int32
if whole_shape >= _MAX_INT32:
k_type = torch.int64
table = HashTable(first.features.device, k_type, torch.int32, table_size)
scalars: List[torch.Tensor] = []
for ten in tens:
indices = ten.indices
if whole_shape >= _MAX_INT32:
indices = indices.long()
scalar = _indice_to_scalar(indices, shape)
scalars.append(scalar)
table.insert(scalar)
# assign arange to values of hash table
count = table.assign_arange_()
count_val = count.item()
out_features = torch.zeros([int(count_val), feat.shape[1]], dtype=feat.dtype, device=feat.device)
out_indices = torch.zeros([int(count_val), first.indices.shape[1]], dtype=first.indices.dtype, device=first.indices.device)
for ten, scalar in zip(tens, scalars):
out_inds, _ = table.query(scalar)
out_inds = out_inds.long()
out_features[out_inds] += ten.features
out_indices[out_inds] = ten.indices
res = SparseConvTensor(out_features, out_indices, first.spatial_shape, first.batch_size,
benchmark=first.benchmark)
if count_val == max_num_indices:
res.indice_dict = tens[max_num_indices_idx].indice_dict
res.benchmark_record = first.benchmark_record
res._timer = first._timer
res.thrust_allocator = first.thrust_allocator
return res
def sparse_add(*tens: SparseConvTensor):
"""reuse torch.sparse. the internal is sort + unique
"""
max_num_indices = 0
max_num_indices_idx = 0
ten_ths: List[torch.Tensor] = []
first = tens[0]
res_shape = [first.batch_size, *first.spatial_shape, first.features.shape[1]]
for i, ten in enumerate(tens):
assert ten.spatial_shape == tens[0].spatial_shape
assert ten.batch_size == tens[0].batch_size
assert ten.features.shape[1] == tens[0].features.shape[1]
if max_num_indices < ten.features.shape[0]:
max_num_indices_idx = i
max_num_indices = ten.features.shape[0]
ten_ths.append(torch.sparse_coo_tensor(ten.indices.T, ten.features, res_shape, requires_grad=True))
c_th = reduce(lambda x, y: x + y, ten_ths).coalesce()
c_th_inds = c_th.indices().T.contiguous().int()
c_th_values = c_th.values()
assert c_th_values.is_contiguous()
res = SparseConvTensor(c_th_values, c_th_inds, first.spatial_shape, first.batch_size,
benchmark=first.benchmark)
if c_th_values.shape[0] == max_num_indices:
res.indice_dict = tens[max_num_indices_idx].indice_dict
res.benchmark_record = first.benchmark_record
res._timer = first._timer
res.thrust_allocator = first.thrust_allocator
return res
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import torch
from cumm import tensorview as tv
from spconv.pytorch.cppcore import torch_tensor_to_tv, get_current_stream
from spconv.core_cc.csrc.hash.core import HashTable as _HashTable
_TORCH_DTYPE_TO_ITEMSIZE = {
torch.int32: 4,
torch.int64: 8,
torch.float32: 4,
torch.float64: 8,
}
class HashTable:
"""simple hash table for 32 and 64 bit data. support both cpu and cuda.
for cuda, it's a fixed-size table, you must provide maximum size
(recommend 2 * num).
see spconv/pytorch/functional/sparse_add_hash_based, a real example
that show how to use hash table to implement
sparse add (same shape, different indices)
"""
def __init__(self, device: torch.device, key_dtype: torch.dtype,
value_dtype: torch.dtype,
max_size: int = -1) -> None:
is_cpu = device.type == "cpu"
self.is_cpu = is_cpu
self.key_dtype = key_dtype
self.value_dtype = value_dtype
key_data_tv = tv.Tensor()
value_data_tv = tv.Tensor()
if is_cpu:
self.keys_data = None
self.values_data = None
else:
assert max_size > 0, "you must provide max_size for fixed-size cuda hash table, usually *2 of num of keys"
assert device is not None, "you must specify device for cuda hash table."
self.keys_data = torch.empty([max_size], dtype=key_dtype, device=device)
self.values_data = torch.empty([max_size], dtype=value_dtype, device=device)
key_data_tv = torch_tensor_to_tv(self.keys_data)
value_data_tv = torch_tensor_to_tv(self.values_data)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
self.key_itemsize = _TORCH_DTYPE_TO_ITEMSIZE[self.key_dtype]
self.value_itemsize = _TORCH_DTYPE_TO_ITEMSIZE[self.value_dtype]
self._valid_value_dtype_for_arange = set([torch.int32, torch.int64])
self._table = _HashTable(is_cpu, self.key_itemsize, self.value_itemsize, key_data_tv, value_data_tv, stream)
def insert(self, keys: torch.Tensor, values: Optional[torch.Tensor] = None):
"""insert hash table by keys and values
if values is None, only key is inserted, the value is undefined.
"""
keys_tv = torch_tensor_to_tv(keys)
values_tv = tv.Tensor()
if values is not None:
values_tv = torch_tensor_to_tv(values)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
return self._table.insert(keys_tv, values_tv, stream)
def query(self, keys: torch.Tensor, values: Optional[torch.Tensor] = None):
"""query value by keys, if values is not None, create a new one.
return values and a uint8 tensor that whether query success.
"""
keys_tv = torch_tensor_to_tv(keys)
if values is None:
values = torch.empty([keys.shape[0]], dtype=self.value_dtype, device=keys.device)
values_tv = torch_tensor_to_tv(values)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
is_empty = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_empty_tv = torch_tensor_to_tv(is_empty)
self._table.query(keys_tv, values_tv, is_empty_tv, stream)
return values, is_empty
def insert_exist_keys(self, keys: torch.Tensor, values: torch.Tensor):
"""insert kv that k exists in table. return a uint8 tensor that
whether insert success.
"""
keys_tv = torch_tensor_to_tv(keys)
values_tv = torch_tensor_to_tv(values)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
is_success = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_success_tv = torch_tensor_to_tv(is_success)
self._table.insert_exist_keys(keys_tv, values_tv, is_success_tv, stream)
return is_success
def assign_arange_(self):
"""iterate table, assign values with "arange" value.
equivalent to 1. get key by items(), 2. use key and arange(key.shape[0]) to insert
"""
count_tv = tv.Tensor()
count = torch.Tensor()
stream = 0
if not self.is_cpu:
stream = get_current_stream()
else:
assert self.value_dtype in self._valid_value_dtype_for_arange
if not self.is_cpu:
assert self.values_data is not None
if self.key_itemsize == 4:
count = torch.zeros([1], dtype=torch.int32, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint32)
elif self.key_itemsize == 8:
count = torch.zeros([1], dtype=torch.int64, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint64)
else:
raise NotImplementedError
else:
max_size = self._table.size_cpu()
count = torch.tensor([max_size], dtype=torch.int64)
self._table.assign_arange_(count_tv, stream)
return count
def items(self, max_size: int = -1):
count_tv = tv.Tensor()
count = torch.Tensor()
stream = 0
if not self.is_cpu:
stream = get_current_stream()
if not self.is_cpu:
assert self.values_data is not None
if self.key_itemsize == 4:
count = torch.zeros([1], dtype=torch.int32, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint32)
elif self.key_itemsize == 8:
count = torch.zeros([1], dtype=torch.int64, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint64)
else:
raise NotImplementedError
if not self.is_cpu:
assert self.values_data is not None
if max_size == -1:
max_size = self.values_data.shape[0]
keys = torch.empty([max_size], dtype=self.key_dtype, device=self.values_data.device)
values = torch.empty([max_size], dtype=self.value_dtype, device=self.values_data.device)
else:
max_size = self._table.size_cpu()
count = torch.tensor([max_size], dtype=torch.int64)
keys = torch.empty([max_size], dtype=self.key_dtype)
values = torch.empty([max_size], dtype=self.value_dtype)
keys_tv = torch_tensor_to_tv(keys)
values_tv = torch_tensor_to_tv(values)
self._table.items(keys_tv, values_tv, count_tv, stream)
return keys, values, count
def main():
is_cpus = [True, False]
max_size = 1000
k_dtype = torch.int32
v_dtype = torch.int64
for is_cpu in is_cpus:
if is_cpu:
dev = torch.device("cpu")
table = HashTable(dev, k_dtype, v_dtype)
else:
dev = torch.device("cuda:0")
table = HashTable(dev, k_dtype, v_dtype, max_size=max_size)
keys = torch.tensor([5, 3, 7, 4, 6, 2, 10, 8], dtype=k_dtype, device=dev)
values = torch.tensor([1, 6, 4, 77, 23, 756, 12, 12], dtype=v_dtype, device=dev)
keys_query = torch.tensor([8, 10, 2, 6, 4, 7, 3, 5], dtype=k_dtype, device=dev)
table.insert(keys, values)
vq, _ = table.query(keys_query)
print(vq)
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
table.assign_arange_()
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -22,6 +22,7 @@ import numpy as np
import spconv
from spconv.core import AlgoHint, ConvAlgo
from typing import List, Optional, Union
from spconv.pytorch.core import ThrustSortAllocator
from spconv.pytorch.cppcore import torch_tensor_to_tv, get_current_stream
from spconv.core_cc.csrc.sparse.all import SpconvOps
import spconv.core_cc as _ext
......@@ -43,24 +44,6 @@ from spconv.tools import CUDAKernelTimer
DEBUG = False
class ThrustSortAllocator:
def __init__(self, device: torch.device) -> None:
super().__init__()
self.alloced_objs = {}
self.device = device
def alloc(self, n: int):
if n in self.alloced_objs:
return self.alloced_objs[n].data_ptr()
for n_cur, ten in self.alloced_objs.items():
if n < n_cur:
return ten.data_ptr()
ten = torch.empty([n], dtype=torch.uint8, device=self.device)
self.alloced_objs[n] = ten
return ten.data_ptr()
def get_conv_output_size(input_size, kernel_size, stride, padding, dilation):
ndim = len(input_size)
output_size = []
......@@ -1531,3 +1514,4 @@ def indice_maxpool_implicit_gemm_backward(features, out_features, out_bp,
out_bp_tv, din_tv,
indice_pairs_tv, stream)
return din
......@@ -26,7 +26,7 @@ from spconv import pytorch as spconv
from spconv.core import ConvAlgo
from spconv.pytorch import functional as Fsp
from spconv.pytorch import ops
from spconv.pytorch.core import IndiceData, ImplicitGemmIndiceData
from spconv.pytorch.core import IndiceData, ImplicitGemmIndiceData, expand_nd
from spconv.pytorch.modules import SparseModule
from spconv.cppconstants import CPU_ONLY_BUILD
from spconv.utils import nullcontext
......@@ -36,7 +36,7 @@ class SparseMaxPool(SparseModule):
def __init__(self,
ndim,
kernel_size: Union[int, List[int], Tuple[int, ...]] = 3,
stride: Union[int, List[int], Tuple[int, ...]] = 1,
stride: Optional[Union[int, List[int], Tuple[int, ...]]] = 1,
padding: Union[int, List[int], Tuple[int, ...]] = 0,
dilation: Union[int, List[int], Tuple[int, ...]] = 1,
indice_key: Optional[str] = None,
......@@ -44,22 +44,15 @@ class SparseMaxPool(SparseModule):
algo: Optional[ConvAlgo] = None,
name=None):
super(SparseMaxPool, self).__init__(name=name)
if not isinstance(kernel_size, (list, tuple)):
kernel_size = [kernel_size] * ndim
if stride is None:
stride = kernel_size.copy()
if not isinstance(stride, (list, tuple)):
stride = [stride] * ndim
if not isinstance(padding, (list, tuple)):
padding = [padding] * ndim
if not isinstance(dilation, (list, tuple)):
dilation = [dilation] * ndim
self.ndim = ndim
self.kernel_size = kernel_size
self.stride = stride
self.padding = padding
self.kernel_size = expand_nd(ndim, kernel_size)
if stride is None:
self.stride = self.kernel_size.copy()
else:
self.stride = expand_nd(ndim, stride)
self.padding = expand_nd(ndim, padding)
self.subm = subm
self.dilation = dilation
self.dilation = expand_nd(ndim, dilation)
self.indice_key = indice_key
kv = int(np.prod(kernel_size))
if algo is None:
......@@ -155,7 +148,11 @@ class SparseMaxPool(SparseModule):
spatial_shape,
out_spatial_shape,
is_subm=False,
algo=self.algo)
algo=self.algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation)
indice_dict[self.indice_key] = indice_data
else:
raise ValueError(
......@@ -204,7 +201,11 @@ class SparseMaxPool(SparseModule):
is_subm=self.subm,
spatial_shape=spatial_shape,
out_spatial_shape=out_spatial_shape,
algo=self.algo)
algo=self.algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation)
msg = f"your indice key {self.indice_key} already exists in this sparse tensor."
assert self.indice_key not in indice_dict, msg
indice_dict[self.indice_key] = indice_data
......
......@@ -19,37 +19,68 @@ from torch.autograd import Function
from spconv.pytorch.modules import SparseModule
from spconv.pytorch.core import SparseConvTensor
from typing import List
from spconv.pytorch import functional as F
class JoinTable(SparseModule): # Module):
class JoinTable(SparseModule):
def forward(self, input: List[SparseConvTensor]):
msg = "you can't use JoinTable in two sptensor with different indices."
for ten in input:
assert ten.spatial_shape == input[0].spatial_shape, msg
assert ten.batch_size == input[0].batch_size, msg
assert ten.features.shape[1] == input[0].features.shape[1], msg
assert ten.indices.shape[0] == input[0].indices.shape[0], msg
output = SparseConvTensor(torch.cat([i.features for i in input], 1),
input[0].indices, input[0].spatial_shape,
input[0].batch_size, input[0].grid,
input[0].voxel_num, input[0].indice_dict)
output.benchmark_record = input[1].benchmark_record
output.thrust_allocator = input[1].thrust_allocator
output._timer = input[1]._timer
return output
def input_spatial_size(self, out_size):
return out_size
class AddTable(SparseModule): # Module):
class AddTable(SparseModule):
def forward(self, input: List[SparseConvTensor]):
msg = "you can't use AddTable in two sptensor with different indices. use AddTableMisaligned instead."
for ten in input:
assert ten.spatial_shape == input[0].spatial_shape, msg
assert ten.batch_size == input[0].batch_size, msg
assert ten.features.shape[1] == input[0].features.shape[1], msg
assert ten.indices.shape[0] == input[0].indices.shape[0], msg
output = SparseConvTensor(sum([i.features for i in input]),
input[0].indices, input[0].spatial_shape,
input[0].batch_size, input[0].grid,
input[0].voxel_num, input[0].indice_dict)
output.benchmark_record = input[1].benchmark_record
output.thrust_allocator = input[1].thrust_allocator
output._timer = input[1]._timer
return output
def input_spatial_size(self, out_size):
return out_size
class AddTableMisaligned(SparseModule):
"""add sptensors with same shape but different indices.
slower than AddTable.
WARNING: you shouldn't use this in segmentation network such as U-Net
because add misaligned tensors will clear downsample indices and make
SparseInverseConvXd not working.
"""
def forward(self, input: List[SparseConvTensor]):
return F.sparse_add_hash_based(*input)
def input_spatial_size(self, out_size):
return out_size
class ConcatTable(SparseModule): # Module):
class ConcatTable(SparseModule):
def forward(self, input):
return [module(input) for module in self._modules.values()]
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import List
from typing import List, Union
import torch
from cumm import tensorview as tv
......@@ -140,7 +140,6 @@ class PointToVoxel(object):
num_voxels = res[0].shape[0]
else:
pc_tv = torch_tensor_to_tv(pc)
stream = get_current_stream()
voxels_tv = torch_tensor_to_tv(self.voxels)
indices_tv = torch_tensor_to_tv(self.indices)
num_per_voxel_tv = torch_tensor_to_tv(self.num_per_voxel)
......@@ -158,12 +157,16 @@ class PointToVoxel(object):
self.num_per_voxel[:num_voxels], pc_voxel_id)
def gather_features_by_pc_voxel_id(seg_res_features: torch.Tensor, pc_voxel_id: torch.Tensor):
def gather_features_by_pc_voxel_id(seg_res_features: torch.Tensor, pc_voxel_id: torch.Tensor, invalid_value: Union[int, float] = 0):
"""This function is used to gather segmentation result to match origin pc.
"""
if seg_res_features.device != pc_voxel_id.device:
pc_voxel_id = pc_voxel_id.to(seg_res_features.device)
res = torch.zeros((pc_voxel_id.shape[0], seg_res_features.shape[1]), dtype=seg_res_features.dtype, device=seg_res_features.device)
res_feature_shape = (pc_voxel_id.shape[0], *seg_res_features.shape[1:])
if invalid_value == 0:
res = torch.zeros(res_feature_shape, dtype=seg_res_features.dtype, device=seg_res_features.device)
else:
res = torch.full(res_feature_shape, invalid_value, dtype=seg_res_features.dtype, device=seg_res_features.device)
pc_voxel_id_valid = pc_voxel_id != -1
pc_voxel_id_valid_ids = torch.nonzero(pc_voxel_id_valid).view(-1)
seg_res_features_valid = seg_res_features[pc_voxel_id[pc_voxel_id_valid_ids]]
......
......@@ -16,6 +16,7 @@ import numpy as np
from cumm import tensorview as tv
from contextlib import AbstractContextManager
from spconv.cppconstants import CPU_ONLY_BUILD
from spconv.core_cc.csrc.utils.boxops import BoxOps
from spconv.core_cc.csrc.sparse.all.ops_cpu1d import Point2VoxelCPU as Point2VoxelCPU1d
from spconv.core_cc.csrc.sparse.all.ops_cpu2d import Point2VoxelCPU as Point2VoxelCPU2d
......@@ -47,3 +48,69 @@ class nullcontext(AbstractContextManager):
def __exit__(self, *excinfo):
pass
def rbbox_iou(box_corners: np.ndarray, qbox_corners: np.ndarray,
standup_iou: np.ndarray, standup_thresh: float):
if not BoxOps.has_boost():
raise NotImplementedError(
"this op require spconv built with boost, download boost, export BOOST_ROOT and rebuild."
)
N = box_corners.shape[0]
K = qbox_corners.shape[0]
overlap = np.zeros((N, K), dtype=box_corners.dtype)
BoxOps.rbbox_iou(tv.from_numpy(box_corners), tv.from_numpy(qbox_corners),
tv.from_numpy(standup_iou), tv.from_numpy(overlap),
standup_thresh, False)
return overlap
def rbbox_intersection(box_corners: np.ndarray, qbox_corners: np.ndarray,
standup_iou: np.ndarray, standup_thresh: float):
if not BoxOps.has_boost():
raise NotImplementedError(
"this op require spconv built with boost, download boost, export BOOST_ROOT and rebuild."
)
N = box_corners.shape[0]
K = qbox_corners.shape[0]
overlap = np.zeros((N, K), dtype=box_corners.dtype)
BoxOps.rbbox_iou(tv.from_numpy(box_corners), tv.from_numpy(qbox_corners),
tv.from_numpy(standup_iou), tv.from_numpy(overlap),
standup_thresh, True)
return overlap
def rbbox_iou_loss(box_corners: np.ndarray, qbox_corners: np.ndarray):
if not BoxOps.has_boost():
raise NotImplementedError(
"this op require spconv built with boost, download boost, export BOOST_ROOT and rebuild."
)
N = box_corners.shape[0]
overlap = np.zeros((N, ), dtype=box_corners.dtype)
BoxOps.rbbox_iou_aligned(tv.from_numpy(box_corners),
tv.from_numpy(qbox_corners),
tv.from_numpy(overlap), False)
return overlap
def non_max_suppression_cpu(boxes: np.ndarray,
order: np.ndarray,
thresh: float,
eps: float = 0.0):
return BoxOps.non_max_suppression_cpu(tv.from_numpy(boxes),
tv.from_numpy(order), thresh, eps)
def rotate_non_max_suppression_cpu(boxes: np.ndarray, order: np.ndarray,
standup_iou: np.ndarray, thresh: float):
if not BoxOps.has_boost():
raise NotImplementedError(
"this op require spconv built with boost, download boost, export BOOST_ROOT and rebuild."
)
return BoxOps.rotate_non_max_suppression_cpu(tv.from_numpy(boxes),
tv.from_numpy(order),
tv.from_numpy(standup_iou),
thresh)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment