Commit 7af751dc authored by yan.yan's avatar yan.yan
Browse files

sync

parent 647927ce
...@@ -23,7 +23,7 @@ class BoostGeometryLib(pccm.Class): ...@@ -23,7 +23,7 @@ class BoostGeometryLib(pccm.Class):
def __init__(self): def __init__(self):
super().__init__() super().__init__()
assert BOOST_ROOT is not None assert BOOST_ROOT is not None
self.build_meta.add_includes(BOOST_ROOT) self.build_meta.add_public_includes(BOOST_ROOT)
self.add_include("boost/geometry.hpp") self.add_include("boost/geometry.hpp")
class BoxOps(pccm.Class): class BoxOps(pccm.Class):
......
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from typing import List, Optional, Tuple, Union from typing import List, Optional, Tuple, Union, Dict
import numpy as np import numpy as np
import torch import torch
......
...@@ -14,9 +14,10 @@ ...@@ -14,9 +14,10 @@
from cumm import tensorview as tv from cumm import tensorview as tv
import torch import torch
from typing import Optional, List from typing import Dict, Optional, List, Union
from spconv.cppconstants import COMPILED_CUDA_ARCHS from spconv.cppconstants import COMPILED_CUDA_ARCHS
import sys import sys
from spconv.core_cc.csrc.sparse.alloc import ExternalAllocator
_TORCH_DTYPE_TO_TV = { _TORCH_DTYPE_TO_TV = {
torch.float32: tv.float32, torch.float32: tv.float32,
...@@ -28,7 +29,10 @@ _TORCH_DTYPE_TO_TV = { ...@@ -28,7 +29,10 @@ _TORCH_DTYPE_TO_TV = {
torch.int16: tv.int16, torch.int16: tv.int16,
torch.uint8: tv.uint8, torch.uint8: tv.uint8,
} }
_TV_DTYPE_TO_TORCH = {v: k for k, v in _TORCH_DTYPE_TO_TV.items()}
_TORCH_UINT_WORKAROUNDS = {tv.uint32: tv.int32, tv.uint16: tv.int16, tv.uint64: tv.int64}
_ALL_INTS = {tv.int32, tv.int16, tv.int8, tv.int64, tv.uint64, tv.uint8, tv.uint32, tv.uint16}
def torch_tensor_to_tv(ten: torch.Tensor, def torch_tensor_to_tv(ten: torch.Tensor,
dtype: Optional[int] = None, dtype: Optional[int] = None,
...@@ -46,7 +50,8 @@ def torch_tensor_to_tv(ten: torch.Tensor, ...@@ -46,7 +50,8 @@ def torch_tensor_to_tv(ten: torch.Tensor,
shape = list(ten.shape) shape = list(ten.shape)
if dtype is None: if dtype is None:
dtype = _TORCH_DTYPE_TO_TV[ten.dtype] dtype = _TORCH_DTYPE_TO_TV[ten.dtype]
return tv.from_blob(ptr, shape, dtype, tv_device) stride = ten.stride()
return tv.from_blob_strided(ptr, shape, list(stride), dtype, tv_device)
def torch_tensors_to_tv(*tens: torch.Tensor): def torch_tensors_to_tv(*tens: torch.Tensor):
return (torch_tensor_to_tv(t) for t in tens) return (torch_tensor_to_tv(t) for t in tens)
...@@ -62,7 +67,119 @@ def get_arch(): ...@@ -62,7 +67,119 @@ def get_arch():
f"may cause invalid device function. " f"may cause invalid device function. "
f"available: {COMPILED_CUDA_ARCHS}", file=sys.stderr) f"available: {COMPILED_CUDA_ARCHS}", file=sys.stderr)
return arch return arch
class TorchAllocator(ExternalAllocator):
def __init__(self, gpudevice: torch.device) -> None:
super().__init__()
self.gpudevice = gpudevice
self.cpudevice = torch.device("cpu:0")
self.allocated: Dict[Union[str, int], torch.Tensor] = {}
def zeros(self, name: str, shape: List[int], dtype: int, device: int) -> tv.Tensor:
# provide a name if you want to access it after c++ function exit.
torch_uint_workaround = dtype in _TORCH_UINT_WORKAROUNDS
dtype_bkp = dtype
if dtype in _TORCH_UINT_WORKAROUNDS:
assert name == "", "must be temp memory for uint dtypes"
dtype = _TORCH_UINT_WORKAROUNDS[dtype]
th_dtype = _TV_DTYPE_TO_TORCH[dtype]
if device == -1:
dev = self.cpudevice
else:
dev = self.gpudevice
ten = torch.zeros(shape, dtype=th_dtype, device=dev)
ten_tv = torch_tensor_to_tv(ten)
self.allocated[ten.data_ptr()] = ten
if name:
self.allocated[name] = ten
if torch_uint_workaround:
return ten_tv.type_view(dtype_bkp)
return ten_tv
def empty(self, name: str, shape: List[int], dtype: int, device: int) -> tv.Tensor:
torch_uint_workaround = dtype in _TORCH_UINT_WORKAROUNDS
dtype_bkp = dtype
if dtype in _TORCH_UINT_WORKAROUNDS:
assert name == "", "must be temp memory for uint dtypes"
dtype = _TORCH_UINT_WORKAROUNDS[dtype]
th_dtype = _TV_DTYPE_TO_TORCH[dtype]
if device == -1:
dev = self.cpudevice
else:
dev = self.gpudevice
ten = torch.empty(shape, dtype=th_dtype, device=dev)
ten_tv = torch_tensor_to_tv(ten)
self.allocated[ten.data_ptr()] = ten
if name:
self.allocated[name] = ten
if torch_uint_workaround:
return ten_tv.type_view(dtype_bkp)
return ten_tv
def full_int(self, name: str, shape: List[int], value: int, dtype: int, device: int) -> tv.Tensor:
if dtype in _TORCH_UINT_WORKAROUNDS and value < 0:
raise NotImplementedError("you can't use full for unsigned dtypes")
torch_uint_workaround = dtype in _TORCH_UINT_WORKAROUNDS
dtype_bkp = dtype
if dtype in _TORCH_UINT_WORKAROUNDS:
assert name == "", "must be temp memory for uint dtypes"
dtype = _TORCH_UINT_WORKAROUNDS[dtype]
th_dtype = _TV_DTYPE_TO_TORCH[dtype]
if device == -1:
dev = self.cpudevice
else:
dev = self.gpudevice
ten = torch.full(shape, value, dtype=th_dtype, device=dev)
ten_tv = torch_tensor_to_tv(ten)
self.allocated[ten.data_ptr()] = ten
if name:
self.allocated[name] = ten
if name:
self.allocated[name] = ten
if torch_uint_workaround:
return ten_tv.type_view(dtype_bkp)
return ten_tv
def full_float(self, name: str, shape: List[int], value: float, dtype: int, device: int) -> tv.Tensor:
if dtype in _TORCH_UINT_WORKAROUNDS and value < 0:
raise NotImplementedError("you can't use full for unsigned dtypes")
torch_uint_workaround = dtype in _TORCH_UINT_WORKAROUNDS
dtype_bkp = dtype
if dtype in _TORCH_UINT_WORKAROUNDS:
assert name == "", "must be temp memory for uint dtypes"
dtype = _TORCH_UINT_WORKAROUNDS[dtype]
th_dtype = _TV_DTYPE_TO_TORCH[dtype]
if device == -1:
dev = self.cpudevice
else:
dev = self.gpudevice
ten = torch.full(shape, value, dtype=th_dtype, device=dev)
ten_tv = torch_tensor_to_tv(ten)
self.allocated[ten.data_ptr()] = ten
if name:
self.allocated[name] = ten
if torch_uint_workaround:
return ten_tv.type_view(dtype_bkp)
return ten_tv
def free(self, ten: tv.Tensor):
if ten.storage_bytesize() != ten.bytesize():
raise ValueError("you can't free a sliced tensor.")
if ten.byte_pointer() in self.allocated:
self.allocated.pop(ten.byte_pointer())
return
raise ValueError("can't find your tensor in cache.")
def free_noexcept(self, ten: tv.Tensor):
# for c++ scope guard, free will be called in c++ destructor
if ten.storage_bytesize() != ten.bytesize():
return
if ten.byte_pointer() in self.allocated:
self.allocated.pop(ten.byte_pointer())
return
if __name__ == "__main__": if __name__ == "__main__":
a = torch.rand(2, 2) a = torch.rand(2, 2)
atv = torch_tensor_to_tv(a) atv = torch_tensor_to_tv(a)
......
...@@ -30,6 +30,7 @@ class HashTable: ...@@ -30,6 +30,7 @@ class HashTable:
"""simple hash table for 32 and 64 bit data. support both cpu and cuda. """simple hash table for 32 and 64 bit data. support both cpu and cuda.
for cuda, it's a fixed-size table, you must provide maximum size for cuda, it's a fixed-size table, you must provide maximum size
(recommend 2 * num). (recommend 2 * num).
key must be int32/int64.
see spconv/pytorch/functional/sparse_add_hash_based, a real example see spconv/pytorch/functional/sparse_add_hash_based, a real example
that show how to use hash table to implement that show how to use hash table to implement
sparse add (same shape, different indices) sparse add (same shape, different indices)
...@@ -91,7 +92,7 @@ class HashTable: ...@@ -91,7 +92,7 @@ class HashTable:
is_empty = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device) is_empty = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_empty_tv = torch_tensor_to_tv(is_empty) is_empty_tv = torch_tensor_to_tv(is_empty)
self._table.query(keys_tv, values_tv, is_empty_tv, stream) self._table.query(keys_tv, values_tv, is_empty_tv, stream)
return values, is_empty return values, is_empty > 0
def insert_exist_keys(self, keys: torch.Tensor, values: torch.Tensor): def insert_exist_keys(self, keys: torch.Tensor, values: torch.Tensor):
"""insert kv that k exists in table. return a uint8 tensor that """insert kv that k exists in table. return a uint8 tensor that
...@@ -105,7 +106,7 @@ class HashTable: ...@@ -105,7 +106,7 @@ class HashTable:
is_success = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device) is_success = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_success_tv = torch_tensor_to_tv(is_success) is_success_tv = torch_tensor_to_tv(is_success)
self._table.insert_exist_keys(keys_tv, values_tv, is_success_tv, stream) self._table.insert_exist_keys(keys_tv, values_tv, is_success_tv, stream)
return is_success return is_success > 0
def assign_arange_(self): def assign_arange_(self):
"""iterate table, assign values with "arange" value. """iterate table, assign values with "arange" value.
......
...@@ -21,10 +21,12 @@ import torch ...@@ -21,10 +21,12 @@ import torch
import numpy as np import numpy as np
import spconv import spconv
from spconv.core import AlgoHint, ConvAlgo from spconv.core import AlgoHint, ConvAlgo
from typing import List, Optional, Union from typing import Dict, List, Optional, Union
from spconv.pytorch.core import ThrustSortAllocator from spconv.pytorch.core import ThrustSortAllocator
from spconv.pytorch.cppcore import torch_tensor_to_tv, get_current_stream, get_arch from spconv.pytorch.cppcore import torch_tensor_to_tv, get_current_stream, get_arch
from spconv.core_cc.csrc.sparse.all import SpconvOps from spconv.core_cc.csrc.sparse.all import SpconvOps
from spconv.core_cc.csrc.sparse.alloc import ExternalAllocator
import spconv.core_cc as _ext import spconv.core_cc as _ext
from spconv.utils import nullcontext from spconv.utils import nullcontext
...@@ -42,6 +44,8 @@ from cumm.gemm import codeops ...@@ -42,6 +44,8 @@ from cumm.gemm import codeops
from spconv.tools import CUDAKernelTimer from spconv.tools import CUDAKernelTimer
DEBUG = False DEBUG = False
DEBUG_INT64_HASH_K = True
INT32_MAX = SpconvOps.get_int32_max()
def get_conv_output_size(input_size, kernel_size, stride, padding, dilation): def get_conv_output_size(input_size, kernel_size, stride, padding, dilation):
...@@ -69,6 +73,25 @@ def get_deconv_output_size(input_size, kernel_size, stride, padding, dilation, ...@@ -69,6 +73,25 @@ def get_deconv_output_size(input_size, kernel_size, stride, padding, dilation,
output_size.append(size) output_size.append(size)
return output_size return output_size
class _HashData:
def __init__(self, num: int, use_i64: bool, device: torch.device) -> None:
if use_i64:
self.hashdata_k = torch.empty((num * 2, ),
dtype=torch.int64,
device=device)
self.hashdata_v = torch.empty((num* 2, ),
dtype=torch.int32,
device=device)
self.hashdata_k_tv = torch_tensor_to_tv(self.hashdata_k)
self.hashdata_v_tv = torch_tensor_to_tv(self.hashdata_v)
else:
self.hashdata = torch.empty((2, num * 2, ),
dtype=torch.int32,
device=device)
hashdata_tv = torch_tensor_to_tv(self.hashdata)
self.hashdata_k_tv = hashdata_tv[0]
self.hashdata_v_tv = hashdata_tv[1]
def get_indice_pairs(indices: torch.Tensor, def get_indice_pairs(indices: torch.Tensor,
batch_size: int, batch_size: int,
...@@ -105,7 +128,9 @@ def get_indice_pairs(indices: torch.Tensor, ...@@ -105,7 +128,9 @@ def get_indice_pairs(indices: torch.Tensor,
) )
assert algo == ConvAlgo.Native, "TODO" assert algo == ConvAlgo.Native, "TODO"
# indices = indices.cpu() # indices = indices.cpu()
spatial_volume = functools.reduce(lambda x, y: x * y, spatial_shape, 1)
use_int64_hash_k = spatial_volume >= INT32_MAX or DEBUG_INT64_HASH_K
indice_dtype = torch.int64 if use_int64_hash_k else indices.dtype
pair = torch.full((2, kv, indices.shape[0]), pair = torch.full((2, kv, indices.shape[0]),
-1, -1,
dtype=indices.dtype, dtype=indices.dtype,
...@@ -121,14 +146,16 @@ def get_indice_pairs(indices: torch.Tensor, ...@@ -121,14 +146,16 @@ def get_indice_pairs(indices: torch.Tensor,
out_inds = indices out_inds = indices
if indices.is_cuda: if indices.is_cuda:
stream = get_current_stream() stream = get_current_stream()
hashdata = torch.empty((out_inds.shape[0] * 2, ), hashdata = _HashData(out_inds.shape[0], use_int64_hash_k, indices.device)
dtype=torch.int64, # hashdata = torch.empty((out_inds.shape[0] * 2, ),
device=indices.device) # dtype=torch.int64,
# device=indices.device)
out_inds_tv = torch_tensor_to_tv(out_inds) out_inds_tv = torch_tensor_to_tv(out_inds)
hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64) # hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64)
SpconvOps.generate_subm_conv_inds(inds_tv, SpconvOps.generate_subm_conv_inds(inds_tv,
hashdata_tv, hashdata.hashdata_k_tv,
hashdata.hashdata_v_tv,
pair_tv, pair_tv,
out_inds_tv, out_inds_tv,
indice_num_per_loc_tv, indice_num_per_loc_tv,
...@@ -154,7 +181,7 @@ def get_indice_pairs(indices: torch.Tensor, ...@@ -154,7 +181,7 @@ def get_indice_pairs(indices: torch.Tensor,
if indices.is_cuda: if indices.is_cuda:
stream = get_current_stream() stream = get_current_stream()
indice_pairs_uniq = torch.empty((pair.numel() // 2 + 1, ), indice_pairs_uniq = torch.empty((pair.numel() // 2 + 1, ),
dtype=indices.dtype, dtype=indice_dtype,
device=indices.device) device=indices.device)
indice_pairs_uniq_tv = torch_tensor_to_tv(indice_pairs_uniq) indice_pairs_uniq_tv = torch_tensor_to_tv(indice_pairs_uniq)
...@@ -183,15 +210,19 @@ def get_indice_pairs(indices: torch.Tensor, ...@@ -183,15 +210,19 @@ def get_indice_pairs(indices: torch.Tensor,
out_inds = torch.empty((num_act_out, indices.shape[1]), out_inds = torch.empty((num_act_out, indices.shape[1]),
dtype=indices.dtype, dtype=indices.dtype,
device=indices.device) device=indices.device)
hashdata = torch.empty((out_inds.shape[0] * 2, ), # hashdata = torch.empty((out_inds.shape[0] * 2, ),
dtype=torch.int64, # dtype=torch.int64,
device=indices.device) # device=indices.device)
hashdata = _HashData(out_inds.shape[0], use_int64_hash_k, indices.device)
out_inds_tv = torch_tensor_to_tv(out_inds) out_inds_tv = torch_tensor_to_tv(out_inds)
hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64) # hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64)
SpconvOps.generate_conv_inds_stage2(inds_tv, SpconvOps.generate_conv_inds_stage2(inds_tv,
hashdata_tv, hashdata.hashdata_k_tv,
hashdata.hashdata_v_tv,
pair_tv, pair_tv,
uniq_res_tv, uniq_res_tv,
indice_pairs_uniq_tv,
out_inds_tv, out_inds_tv,
num_out_act=num_act_out, num_out_act=num_act_out,
batch_size=batch_size, batch_size=batch_size,
...@@ -267,6 +298,10 @@ def get_indice_pairs_implicit_gemm( ...@@ -267,6 +298,10 @@ def get_indice_pairs_implicit_gemm(
kv: int = functools.reduce(lambda x, y: x * y, ksize, 1) kv: int = functools.reduce(lambda x, y: x * y, ksize, 1)
# TODO in future we will support up to 128 kernel volume. # TODO in future we will support up to 128 kernel volume.
assert kv <= 32, "currently only support kernel volume <= 32 to use implicit gemm" assert kv <= 32, "currently only support kernel volume <= 32 to use implicit gemm"
spatial_volume = functools.reduce(lambda x, y: x * y, spatial_shape, 1)
use_int64_hash_k = spatial_volume >= INT32_MAX or DEBUG_INT64_HASH_K
indice_dtype = torch.int64 if use_int64_hash_k else indices.dtype
if not subm: if not subm:
if transpose: if transpose:
out_shape = get_deconv_output_size(spatial_shape, ksize, stride, out_shape = get_deconv_output_size(spatial_shape, ksize, stride,
...@@ -316,19 +351,22 @@ def get_indice_pairs_implicit_gemm( ...@@ -316,19 +351,22 @@ def get_indice_pairs_implicit_gemm(
if subm: if subm:
out_inds = indices out_inds = indices
hashdata = torch.empty((out_inds.shape[0] * 2, ), # hashdata = torch.empty((out_inds.shape[0] * 2, ),
dtype=torch.int64, # dtype=torch.int64,
device=indices.device) # device=indices.device)
hashdata = _HashData(out_inds.shape[0], use_int64_hash_k, indices.device)
pair_mask = torch.empty((mask_split_count, indices.shape[0]), pair_mask = torch.empty((mask_split_count, indices.shape[0]),
dtype=torch.int32, dtype=torch.int32,
device=indices.device) device=indices.device)
out_inds_tv = torch_tensor_to_tv(out_inds) out_inds_tv = torch_tensor_to_tv(out_inds)
hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64) # hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64)
pair_mask_tv = torch_tensor_to_tv(pair_mask, dtype=tv.uint32) pair_mask_tv = torch_tensor_to_tv(pair_mask, dtype=tv.uint32)
with timer.record("gen_subm_inds", stream): with timer.record("gen_subm_inds", stream):
SpconvOps.generate_subm_conv_inds(inds_tv, SpconvOps.generate_subm_conv_inds(inds_tv,
hashdata_tv, hashdata.hashdata_k_tv,
hashdata.hashdata_v_tv,
pair_tv, pair_tv,
out_inds_tv, out_inds_tv,
indice_num_per_loc_tv, indice_num_per_loc_tv,
...@@ -380,7 +418,7 @@ def get_indice_pairs_implicit_gemm( ...@@ -380,7 +418,7 @@ def get_indice_pairs_implicit_gemm(
pair_bwd = pair pair_bwd = pair
pair_bwd_tv = pair_tv pair_bwd_tv = pair_tv
indice_pairs_uniq = torch.empty((pair.numel() + 1, ), indice_pairs_uniq = torch.empty((pair.numel() + 1, ),
dtype=indices.dtype, dtype=indice_dtype,
device=indices.device) device=indices.device)
indice_pairs_uniq_tv = torch_tensor_to_tv(indice_pairs_uniq) indice_pairs_uniq_tv = torch_tensor_to_tv(indice_pairs_uniq)
with timer.record("gen_conv_inds_stage1", stream): with timer.record("gen_conv_inds_stage1", stream):
...@@ -433,12 +471,13 @@ def get_indice_pairs_implicit_gemm( ...@@ -433,12 +471,13 @@ def get_indice_pairs_implicit_gemm(
device=indices.device) device=indices.device)
pair_mask_bwd_tv = torch_tensor_to_tv(pair_mask_bwd, pair_mask_bwd_tv = torch_tensor_to_tv(pair_mask_bwd,
dtype=tv.uint32) dtype=tv.uint32)
hashdata = _HashData(out_inds.shape[0], use_int64_hash_k, indices.device)
hashdata = torch.empty((out_inds.shape[0] * 2, ), # hashdata = torch.empty((out_inds.shape[0] * 2, ),
dtype=torch.int64, # dtype=torch.int64,
device=indices.device) # device=indices.device)
out_inds_tv = torch_tensor_to_tv(out_inds) out_inds_tv = torch_tensor_to_tv(out_inds)
hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64) # hashdata_tv = torch_tensor_to_tv(hashdata, dtype=tv.custom64)
if DEBUG: if DEBUG:
CONV.stream_synchronize(stream) CONV.stream_synchronize(stream)
...@@ -446,10 +485,12 @@ def get_indice_pairs_implicit_gemm( ...@@ -446,10 +485,12 @@ def get_indice_pairs_implicit_gemm(
t = time.time() t = time.time()
with timer.record("gen_conv_inds_stage2", stream): with timer.record("gen_conv_inds_stage2", stream):
SpconvOps.generate_conv_inds_mask_stage2(inds_tv, SpconvOps.generate_conv_inds_mask_stage2(inds_tv,
hashdata_tv, hashdata.hashdata_k_tv,
hashdata.hashdata_v_tv,
pair_fwd_tv, pair_fwd_tv,
pair_bwd_tv, pair_bwd_tv,
uniq_res_tv, uniq_res_tv,
indice_pairs_uniq_tv,
out_inds_tv, out_inds_tv,
pair_mask_fwd_tv, pair_mask_fwd_tv,
pair_mask_bwd_tv, pair_mask_bwd_tv,
...@@ -1138,6 +1179,7 @@ def implicit_gemm(features: torch.Tensor, ...@@ -1138,6 +1179,7 @@ def implicit_gemm(features: torch.Tensor,
# CONV.stream_synchronize(stream) # CONV.stream_synchronize(stream)
# t = time.time() # t = time.time()
print(tune_res.algo_desp)
with timer.record("implicit_gemm", stream): with timer.record("implicit_gemm", stream):
for j in range(num_split): for j in range(num_split):
beta = 0 if j == 0 else 1 beta = 0 if j == 0 else 1
......
import spconv
from spconv.pytorch.cppcore import TorchAllocator
print(1)
from spconv.core_cc.csrc.sparse.all import SpconvOps
import torch
print(2)
if __name__ == "__main__":
alloc = TorchAllocator(torch.device("cuda:0"))
SpconvOps.test_allocator(alloc)
...@@ -352,32 +352,36 @@ def scatter_nd(indices, updates, shape): ...@@ -352,32 +352,36 @@ def scatter_nd(indices, updates, shape):
class TestSpConv(TestCase): class TestSpConv(TestCase):
def testSpConv3d(self): def testSpConv3d(self):
np.random.seed(484) np.random.seed(71)
torch.manual_seed(48848) torch.manual_seed(705)
devices = ["cuda:0"] devices = ["cuda:0"]
shapes = [[19, 18, 17]] shapes = [[4, 4, 4]]
batchsizes = [1, 2] batchsizes = [1, 2]
in_channels = [32] in_channels = [4]
out_channels = [32, 48, 64] out_channels = [32, 48, 64]
ksizes = [2, 3] ksizes = [2, 3]
strides = [1, 2, 3] strides = [1, 2, 3]
paddings = [0, 1, 2] paddings = [0, 1, 2]
dilations = [1, 2, 3] dilations = [1, 2, 3]
ksizes = [3]
strides = [1]
paddings = [0]
dilations = [1]
algos = [ algos = [
ConvAlgo.Native, ConvAlgo.MaskImplicitGemm, ConvAlgo.MaskImplicitGemm,
ConvAlgo.MaskSplitImplicitGemm # ConvAlgo.MaskSplitImplicitGemm
] ]
algos = [ConvAlgo.MaskSplitImplicitGemm] # algos = [ConvAlgo.MaskSplitImplicitGemm]
for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid( for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid(
devices, shapes, batchsizes, in_channels, out_channels, ksizes, devices, shapes, batchsizes, in_channels, out_channels, ksizes,
strides, paddings, dilations, algos): strides, paddings, dilations, algos):
if all([s > 1, d > 1]): if all([s > 1, d > 1]):
continue # don't support this. continue # don't support this.
print(k, s, p, d)
device = torch.device(dev) device = torch.device(dev)
num_points = [1000] * bs num_points = [10] * bs
dtype = torch.float32 dtype = torch.float32
net = SparseConv3dTestTorch(1, net = SparseConv3dTestTorch(1,
3, 3,
...@@ -398,6 +402,9 @@ class TestSpConv(TestCase): ...@@ -398,6 +402,9 @@ class TestSpConv(TestCase):
np.float32) np.float32)
indices = np.ascontiguousarray( indices = np.ascontiguousarray(
sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32) sparse_dict["indices"][:, [3, 0, 1, 2]]).astype(np.int32)
# print(k, s, p, d, features.mean(), indices.mean())
# if k == 2 and s == 2 and p == 0 and d == 1:
# breakpoint()
features_dense = sparse_dict["features_dense"].astype(np.float32) features_dense = sparse_dict["features_dense"].astype(np.float32)
indices_t = torch.from_numpy(indices).int().to(device) indices_t = torch.from_numpy(indices).int().to(device)
features_t = torch.from_numpy(features).to(device).to(dtype) features_t = torch.from_numpy(features).to(device).to(dtype)
...@@ -829,4 +836,4 @@ if __name__ == '__main__': ...@@ -829,4 +836,4 @@ if __name__ == '__main__':
# main(algo=spconv.ConvAlgo.SparseConvNet, dtype=torch.float32) # main(algo=spconv.ConvAlgo.SparseConvNet, dtype=torch.float32)
# TestCase().assertAllClose(out_my, out_ref) # TestCase().assertAllClose(out_my, out_ref)
# unittest.main() # unittest.main()
TestSpConv().testSpMaxPool3d() TestSpConv().testSpConv3d()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment