Commit 21bb00ae authored by Yan Yan's avatar Yan Yan
Browse files

still working on c++ only

parent 899008fa
This diff is collapsed.
...@@ -30,6 +30,7 @@ from spconv.pytorch.core import IndiceData, ImplicitGemmIndiceData, expand_nd ...@@ -30,6 +30,7 @@ from spconv.pytorch.core import IndiceData, ImplicitGemmIndiceData, expand_nd
from spconv.pytorch.modules import SparseModule from spconv.pytorch.modules import SparseModule
from spconv.cppconstants import CPU_ONLY_BUILD from spconv.cppconstants import CPU_ONLY_BUILD
from spconv.utils import nullcontext from spconv.utils import nullcontext
from .conv import _MAX_NUM_VOXELS_DURING_TRAINING
class SparseMaxPool(SparseModule): class SparseMaxPool(SparseModule):
...@@ -42,6 +43,7 @@ class SparseMaxPool(SparseModule): ...@@ -42,6 +43,7 @@ class SparseMaxPool(SparseModule):
indice_key: Optional[str] = None, indice_key: Optional[str] = None,
subm: bool = False, subm: bool = False,
algo: Optional[ConvAlgo] = None, algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None): name=None):
super(SparseMaxPool, self).__init__(name=name) super(SparseMaxPool, self).__init__(name=name)
self.ndim = ndim self.ndim = ndim
...@@ -52,6 +54,12 @@ class SparseMaxPool(SparseModule): ...@@ -52,6 +54,12 @@ class SparseMaxPool(SparseModule):
self.stride = expand_nd(ndim, stride) self.stride = expand_nd(ndim, stride)
self.padding = expand_nd(ndim, padding) self.padding = expand_nd(ndim, padding)
self.subm = subm self.subm = subm
if record_voxel_count and not self.subm:
# we record maximum voxel num in both inference and training if
# record_voxel_count flag setting.
self.register_buffer(_MAX_NUM_VOXELS_DURING_TRAINING,
torch.zeros(1, dtype=torch.int32))
self.record_voxel_count = record_voxel_count
self.dilation = expand_nd(ndim, dilation) self.dilation = expand_nd(ndim, dilation)
self.indice_key = indice_key self.indice_key = indice_key
kv = int(np.prod(kernel_size)) kv = int(np.prod(kernel_size))
...@@ -220,6 +228,136 @@ class SparseMaxPool(SparseModule): ...@@ -220,6 +228,136 @@ class SparseMaxPool(SparseModule):
features.shape[0]) features.shape[0])
out_tensor.benchmark_record[self.name]["num_out_points"].append( out_tensor.benchmark_record[self.name]["num_out_points"].append(
out_features.shape[0]) out_features.shape[0])
if not self.subm and self.record_voxel_count:
if hasattr(self, _MAX_NUM_VOXELS_DURING_TRAINING):
ops.maximum_value_int_(
getattr(self, _MAX_NUM_VOXELS_DURING_TRAINING),
outids.shape[0])
out_tensor = out_tensor.replace_feature(out_features)
out_tensor.indices = outids
out_tensor.indice_dict = indice_dict
out_tensor.spatial_shape = out_spatial_shape
return out_tensor
class SparseAvgPool(SparseModule):
def __init__(self,
ndim,
kernel_size: Union[int, List[int], Tuple[int, ...]] = 3,
stride: Optional[Union[int, List[int], Tuple[int, ...]]] = 1,
padding: Union[int, List[int], Tuple[int, ...]] = 0,
dilation: Union[int, List[int], Tuple[int, ...]] = 1,
indice_key: Optional[str] = None,
subm: bool = False,
algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None):
super(SparseAvgPool, self).__init__(name=name)
self.ndim = ndim
self.kernel_size = expand_nd(ndim, kernel_size)
if stride is None:
self.stride = self.kernel_size.copy()
else:
self.stride = expand_nd(ndim, stride)
self.padding = expand_nd(ndim, padding)
self.subm = subm
if record_voxel_count and not self.subm:
# we record maximum voxel num in both inference and training if
# record_voxel_count flag setting.
self.register_buffer(_MAX_NUM_VOXELS_DURING_TRAINING,
torch.zeros(1, dtype=torch.int32))
self.record_voxel_count = record_voxel_count
self.dilation = expand_nd(ndim, dilation)
self.indice_key = indice_key
kv = int(np.prod(kernel_size))
assert kv <= 32, "avg pool only support implicit-gemm style indice gen with kv <= 32 limit"
self.algo = ConvAlgo.MaskImplicitGemm
def extra_repr(self):
s = ('kernel_size={kernel_size}' ', stride={stride}')
if self.padding != (0, ) * len(self.padding):
s += ', padding={padding}'
if self.dilation != (1, ) * len(self.dilation):
s += ', dilation={dilation}'
if self.algo is not None:
s += f', algo={self.algo}'
return s.format(**self.__dict__)
def forward(self, input):
assert isinstance(input, spconv.SparseConvTensor)
features = input.features
device = features.device
indices = input.indices
spatial_shape = input.spatial_shape
batch_size = input.batch_size
if not self.subm:
out_spatial_shape = ops.get_conv_output_size(
spatial_shape, self.kernel_size, self.stride, self.padding,
self.dilation)
else:
out_spatial_shape = spatial_shape
out_tensor = input.shadow_copy()
out_padding = [0] * self.ndim
indice_dict = input.indice_dict.copy()
profile_ctx = nullcontext()
if input._timer is not None and self._sparse_unique_name:
profile_ctx = input._timer.namespace(self._sparse_unique_name)
with profile_ctx:
with input._timer.namespace("gen_pairs"):
res = ops.get_indice_pairs_implicit_gemm(
indices,
batch_size,
spatial_shape,
self.algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation,
out_padding=out_padding,
subm=self.subm,
is_train=(not self.subm) or self.training,
alloc=input.thrust_allocator,
timer=input._timer)
outids = res[0]
num_inds_per_loc = res[1]
pair_fwd = res[2]
pair_bwd = res[3]
pair_mask_fwd_splits = res[4]
pair_mask_bwd_splits = res[5]
mask_argsort_fwd_splits = res[6]
mask_argsort_bwd_splits = res[7]
masks = res[8]
if self.indice_key is not None:
indice_data = ImplicitGemmIndiceData(
outids,
indices,
pair_fwd,
pair_bwd,
pair_mask_fwd_splits=pair_mask_fwd_splits,
pair_mask_bwd_splits=pair_mask_bwd_splits,
mask_argsort_fwd_splits=mask_argsort_fwd_splits,
mask_argsort_bwd_splits=mask_argsort_bwd_splits,
masks=masks,
is_subm=self.subm,
spatial_shape=spatial_shape,
out_spatial_shape=out_spatial_shape,
algo=self.algo,
ksize=self.kernel_size,
stride=self.stride,
padding=self.padding,
dilation=self.dilation)
msg = f"your indice key {self.indice_key} already exists in this sparse tensor."
assert self.indice_key not in indice_dict, msg
indice_dict[self.indice_key] = indice_data
out_features = Fsp.indice_avgpool_implicit_gemm(
features, pair_fwd, pair_bwd, outids.shape[0], self.training)
if not self.subm and self.record_voxel_count:
if hasattr(self, _MAX_NUM_VOXELS_DURING_TRAINING):
ops.maximum_value_int_(
getattr(self, _MAX_NUM_VOXELS_DURING_TRAINING),
outids.shape[0])
out_tensor = out_tensor.replace_feature(out_features) out_tensor = out_tensor.replace_feature(out_features)
out_tensor.indices = outids out_tensor.indices = outids
out_tensor.indice_dict = indice_dict out_tensor.indice_dict = indice_dict
...@@ -235,15 +373,18 @@ class SparseMaxPool1d(SparseMaxPool): ...@@ -235,15 +373,18 @@ class SparseMaxPool1d(SparseMaxPool):
dilation=1, dilation=1,
indice_key=None, indice_key=None,
algo: Optional[ConvAlgo] = None, algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None): name=None):
super(SparseMaxPool1d, self).__init__(1, super(SparseMaxPool1d,
kernel_size, self).__init__(1,
stride, kernel_size,
padding, stride,
dilation, padding,
indice_key=indice_key, dilation,
algo=algo, indice_key=indice_key,
name=name) algo=algo,
record_voxel_count=record_voxel_count,
name=name)
class SparseMaxPool2d(SparseMaxPool): class SparseMaxPool2d(SparseMaxPool):
...@@ -254,15 +395,18 @@ class SparseMaxPool2d(SparseMaxPool): ...@@ -254,15 +395,18 @@ class SparseMaxPool2d(SparseMaxPool):
dilation=1, dilation=1,
indice_key=None, indice_key=None,
algo: Optional[ConvAlgo] = None, algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None): name=None):
super(SparseMaxPool2d, self).__init__(2, super(SparseMaxPool2d,
kernel_size, self).__init__(2,
stride, kernel_size,
padding, stride,
dilation, padding,
indice_key=indice_key, dilation,
algo=algo, indice_key=indice_key,
name=name) algo=algo,
record_voxel_count=record_voxel_count,
name=name)
class SparseMaxPool3d(SparseMaxPool): class SparseMaxPool3d(SparseMaxPool):
...@@ -273,15 +417,18 @@ class SparseMaxPool3d(SparseMaxPool): ...@@ -273,15 +417,18 @@ class SparseMaxPool3d(SparseMaxPool):
dilation=1, dilation=1,
indice_key=None, indice_key=None,
algo: Optional[ConvAlgo] = None, algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None): name=None):
super(SparseMaxPool3d, self).__init__(3, super(SparseMaxPool3d,
kernel_size, self).__init__(3,
stride, kernel_size,
padding, stride,
dilation, padding,
indice_key=indice_key, dilation,
algo=algo, indice_key=indice_key,
name=name) algo=algo,
record_voxel_count=record_voxel_count,
name=name)
class SparseMaxPool4d(SparseMaxPool): class SparseMaxPool4d(SparseMaxPool):
...@@ -292,12 +439,87 @@ class SparseMaxPool4d(SparseMaxPool): ...@@ -292,12 +439,87 @@ class SparseMaxPool4d(SparseMaxPool):
dilation=1, dilation=1,
indice_key=None, indice_key=None,
algo: Optional[ConvAlgo] = None, algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None):
super(SparseMaxPool4d,
self).__init__(4,
kernel_size,
stride,
padding,
dilation,
indice_key=indice_key,
algo=algo,
record_voxel_count=record_voxel_count,
name=name)
class SparseAvgPool1d(SparseAvgPool):
"""avg pool that use real point count instead of kernel size.
"""
def __init__(self,
kernel_size,
stride=None,
padding=0,
dilation=1,
indice_key=None,
algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None):
super(SparseAvgPool1d,
self).__init__(1,
kernel_size,
stride,
padding,
dilation,
indice_key=indice_key,
algo=algo,
record_voxel_count=record_voxel_count,
name=name)
class SparseAvgPool2d(SparseAvgPool):
"""avg pool that use real point count instead of kernel size.
"""
def __init__(self,
kernel_size,
stride=None,
padding=0,
dilation=1,
indice_key=None,
algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None):
super(SparseAvgPool2d,
self).__init__(2,
kernel_size,
stride,
padding,
dilation,
indice_key=indice_key,
algo=algo,
record_voxel_count=record_voxel_count,
name=name)
class SparseAvgPool3d(SparseAvgPool):
"""avg pool that use real point count instead of kernel size.
"""
def __init__(self,
kernel_size,
stride=None,
padding=0,
dilation=1,
indice_key=None,
algo: Optional[ConvAlgo] = None,
record_voxel_count: bool = False,
name=None): name=None):
super(SparseMaxPool4d, self).__init__(4, super(SparseAvgPool3d,
kernel_size, self).__init__(3,
stride, kernel_size,
padding, stride,
dilation, padding,
indice_key=indice_key, dilation,
algo=algo, indice_key=indice_key,
name=name) algo=algo,
record_voxel_count=record_voxel_count,
name=name)
set(CATCH_HEADER ${PROJECT_SOURCE_DIR}/third_party/catch2)
add_library(catch_main OBJECT src/catch_main.cpp)
# target_compile_features(catch_main PUBLIC cxx_std_2a)
set_property(TARGET catch_main PROPERTY CXX_STANDARD 14)
target_include_directories(catch_main PRIVATE ${CATCH_HEADER})
file(GLOB files "src/test_*.cpp")
foreach(file ${files})
get_filename_component(file_basename ${file} NAME_WE)
string(REGEX REPLACE "test_([^$]+)" "test-\\1" testcase ${file_basename})
add_executable(${testcase} ${file} $<TARGET_OBJECTS:catch_main>)
set_property(TARGET ${testcase} PROPERTY CXX_STANDARD 14)
# set_target_properties(${testcase} PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
# set_property(TARGET ${testcase} PROPERTY CUDA_STANDARD 14)
target_compile_definitions(${testcase} PRIVATE
CATCH_CONFIG_FAST_COMPILE
)
target_include_directories(${testcase} PRIVATE
${CATCH_HEADER} ${ALL_INCLUDE}
)
target_link_libraries(${testcase} ${ALL_LIBS} pybind11::embed -Wl,--no-as-needed spconv)
add_test(NAME "${testcase}"
COMMAND ${testcase}
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR})
endforeach()
\ No newline at end of file
...@@ -113,7 +113,7 @@ class Net(nn.Module): ...@@ -113,7 +113,7 @@ class Net(nn.Module):
# nn.BatchNorm1d(32), # nn.BatchNorm1d(32),
# nn.ReLU(), # nn.ReLU(),
# spconv.SparseConv3d(64, 64, 2, 2, bias=False, indice_key="m0"), # spconv.SparseConv3d(64, 64, 2, 2, bias=False, indice_key="m0"),
spconv.SparseMaxPool3d(2, 2, algo=pool_algo), spconv.SparseMaxPool3d(2, 2, algo=pool_algo, record_voxel_count=True),
spconv.SubMConv3d(64, spconv.SubMConv3d(64,
96, 96,
3, 3,
...@@ -332,7 +332,7 @@ def main(): ...@@ -332,7 +332,7 @@ def main():
voxels_th = torch.from_numpy(voxels).to(device).to(dtype) voxels_th = torch.from_numpy(voxels).to(device).to(dtype)
coors_th = torch.from_numpy(coors).to(device).int() coors_th = torch.from_numpy(coors).to(device).int()
voxels_th.requires_grad = True voxels_th.requires_grad = True
algo = spconv.ConvAlgo.MaskImplicitGemm algo = spconv.ConvAlgo.Native
# 3080 Laptop # 3080 Laptop
# MaskImpGemm: 11.2ms # MaskImpGemm: 11.2ms
# MaskSplitImpGemm: 12.2ms # MaskSplitImpGemm: 12.2ms
...@@ -385,21 +385,25 @@ def main(): ...@@ -385,21 +385,25 @@ def main():
torch.cuda.synchronize() torch.cuda.synchronize()
# sort_bench() # sort_bench()
times.append(time.time() - t) times.append(time.time() - t)
# state = net.state_dict()
# state.pop("net.2.max_num_voxels_during_training")
# net.load_state_dict(state)
# breakpoint()
print("spconv time", np.mean(times[10:])) print("spconv time", np.mean(times[10:]))
times = [] # times = []
for i in range(10): # for i in range(10):
out = net(voxels_th, coors_th, 1) # out = net(voxels_th, coors_th, 1)
print("------------") # print("------------")
torch.cuda.synchronize() # torch.cuda.synchronize()
t = time.time() # t = time.time()
out.features.backward(dout_t) # out.features.backward(dout_t)
torch.cuda.synchronize() # torch.cuda.synchronize()
times.append(time.time() - t) # times.append(time.time() - t)
# # print((net.grid == -1).float().sum(), net.grid.numel()) # # # print((net.grid == -1).float().sum(), net.grid.numel())
# # print("spconv time", time.time() - t) # # # print("spconv time", time.time() - t)
print("spconv bw time", np.mean(times[5:])) # print("spconv bw time", np.mean(times[5:]))
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -248,7 +248,7 @@ def test_spconv3d(): ...@@ -248,7 +248,7 @@ def test_spconv3d():
ConvAlgo.Native, ConvAlgo.MaskImplicitGemm, ConvAlgo.Native, ConvAlgo.MaskImplicitGemm,
ConvAlgo.MaskSplitImplicitGemm ConvAlgo.MaskSplitImplicitGemm
] ]
algos = [ConvAlgo.Native] algos = [ConvAlgo.Native, ConvAlgo.MaskImplicitGemm, ConvAlgo.MaskSplitImplicitGemm]
for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid( for dev, shape, bs, IC, OC, k, s, p, d, al in params_grid(
devices, shapes, batchsizes, in_channels, out_channels, ksizes, devices, shapes, batchsizes, in_channels, out_channels, ksizes,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment