Commit 9d1e33d6 authored by yan.yan's avatar yan.yan
Browse files

temp commit

parent 0da847e2
...@@ -2,5 +2,13 @@ from .basic import bench_basic ...@@ -2,5 +2,13 @@ from .basic import bench_basic
import fire import fire
def bench_me_basic(dtype_str: str):
from spconv.benchmark.me import bench_me_basic
return bench_me_basic(dtype_str)
def bench_torchsparse_basic(dtype_str: str):
from spconv.benchmark.thsp import bench_torchsparse_basic
return bench_torchsparse_basic(dtype_str)
if __name__ == "__main__": if __name__ == "__main__":
fire.Fire() fire.Fire()
...@@ -135,10 +135,6 @@ class Net(nn.Module): ...@@ -135,10 +135,6 @@ class Net(nn.Module):
# spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo), # spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo),
) )
max_batch_size = 1 max_batch_size = 1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
self.grid = torch.full([max_batch_size, *shape], -1,
dtype=torch.int32).cuda()
# self.grid = None
self.shape = shape self.shape = shape
def forward(self, features, coors, batch_size, enable_timer: bool = False): def forward(self, features, coors, batch_size, enable_timer: bool = False):
...@@ -146,7 +142,6 @@ class Net(nn.Module): ...@@ -146,7 +142,6 @@ class Net(nn.Module):
coors, coors,
self.shape, self.shape,
batch_size, batch_size,
self.grid,
enable_timer=enable_timer) enable_timer=enable_timer)
return self.net(x) return self.net(x)
...@@ -176,24 +171,24 @@ def bench_basic(dtype_str: str): ...@@ -176,24 +171,24 @@ def bench_basic(dtype_str: str):
dout_t = torch.from_numpy(dout).to(device).to(torch_dtype) dout_t = torch.from_numpy(dout).to(device).to(torch_dtype)
times = [] times = []
with torch.no_grad(): with torch.no_grad():
for i in range(20): for i in range(100):
torch.cuda.synchronize() torch.cuda.synchronize()
t = time.time() t = time.time()
out_nograd = net(voxels_th, coors_th, 1, False) out_nograd = net(voxels_th, coors_th, 1, False)
timer = out_nograd._timer timer = out_nograd._timer
torch.cuda.synchronize() torch.cuda.synchronize()
times.append(time.time() - t) times.append(time.time() - t)
print(f"basic[{dtype_str}|{algo}|forward]", np.mean(times[10:])) print(f"basic[{dtype_str}|{algo}|forward]", np.mean(times[50:]))
times = [] times = []
for i in range(10): for i in range(50):
out = net(voxels_th, coors_th, 1) out = net(voxels_th, coors_th, 1)
torch.cuda.synchronize() torch.cuda.synchronize()
t = time.time() t = time.time()
out.features.backward(dout_t) out.features.backward(dout_t)
torch.cuda.synchronize() torch.cuda.synchronize()
times.append(time.time() - t) times.append(time.time() - t)
print(f"basic[{dtype_str}|{algo}|backward]", np.mean(times[5:])) print(f"basic[{dtype_str}|{algo}|backward]", np.mean(times[25:]))
if __name__ == "__main__": if __name__ == "__main__":
bench_basic("f16") bench_basic("f16")
\ No newline at end of file
...@@ -5,7 +5,8 @@ from io import BytesIO ...@@ -5,7 +5,8 @@ from io import BytesIO
import numpy as np import numpy as np
from spconv.constants import PACKAGE_ROOT from spconv.constants import PACKAGE_ROOT
RAW_PC_PATH = "https://raw.githubusercontent.com/traveller59/spconv/v2.1.10/test/data/test_spconv.pkl" RAW_TEST_DATA_PATH = "https://raw.githubusercontent.com/traveller59/spconv/v2.1.10/test/data/test_spconv.pkl"
RAW_PC_PATH = "https://raw.githubusercontent.com/traveller59/spconv/v2.1.10/test/data/benchmark-pc.npz"
def get_voxel_data(): def get_voxel_data():
editable_test_data_path = PACKAGE_ROOT.parent / "test/data/test_spconv.pkl" editable_test_data_path = PACKAGE_ROOT.parent / "test/data/test_spconv.pkl"
...@@ -13,7 +14,7 @@ def get_voxel_data(): ...@@ -13,7 +14,7 @@ def get_voxel_data():
with editable_test_data_path.open("rb") as f: with editable_test_data_path.open("rb") as f:
return pickle.load(f) return pickle.load(f)
ff = BytesIO() ff = BytesIO()
with requests.get(RAW_PC_PATH, stream=True) as req: with requests.get(RAW_TEST_DATA_PATH, stream=True) as req:
req.raise_for_status() req.raise_for_status()
for chunk in req.iter_content(chunk_size=8192): for chunk in req.iter_content(chunk_size=8192):
ff.write(chunk) ff.write(chunk)
......
"""Benchmark MinkowskiEngine
"""
from spconv.benchmark.core import get_voxel_data
import time
from pathlib import Path
import numpy as np
import torch
from torch import nn
from spconv.core import ConvAlgo
from cumm import dtypes
from spconv.test_utils import params_grid
_DTYPE_TO_TORCH_DTYPE = {
dtypes.float32: torch.float32,
dtypes.float16: torch.float16,
}
def bench_me_basic(dtype_str: str):
dtype = dtypes.get_dtype_by_shortcut(dtype_str)
if dtype not in _DTYPE_TO_TORCH_DTYPE:
raise NotImplementedError("only support bench f32 and f16 for now")
torch_dtype = _DTYPE_TO_TORCH_DTYPE[dtype]
"""Benchmark torchsparse
"""
from spconv.benchmark.core import get_voxel_data
import time
from pathlib import Path
import numpy as np
import torch
from torch import nn
from spconv.core import ConvAlgo
from cumm import dtypes
from spconv.test_utils import params_grid
_DTYPE_TO_TORCH_DTYPE = {
dtypes.float32: torch.float32,
dtypes.float16: torch.float16,
}
def bench_torchsparse_basic(dtype_str: str):
dtype = dtypes.get_dtype_by_shortcut(dtype_str)
if dtype not in _DTYPE_TO_TORCH_DTYPE:
raise NotImplementedError("only support bench f32 and f16 for now")
torch_dtype = _DTYPE_TO_TORCH_DTYPE[dtype]
...@@ -37,3 +37,15 @@ if SAVED_WEIGHT_LAYOUT != "": ...@@ -37,3 +37,15 @@ if SAVED_WEIGHT_LAYOUT != "":
assert SAVED_WEIGHT_LAYOUT in ["KRSC", "RSKC", "RSCK"], "please set SAVED_WEIGHT_LAYOUT to KRSC, RSKC or RSCK" assert SAVED_WEIGHT_LAYOUT in ["KRSC", "RSKC", "RSCK"], "please set SAVED_WEIGHT_LAYOUT to KRSC, RSKC or RSCK"
ALL_WEIGHT_IS_KRSC = True ALL_WEIGHT_IS_KRSC = True
SPCONV_DEBUG_SAVE_PATH = os.getenv("SPCONV_DEBUG_SAVE_PATH", "")
_BOOST_ROOT = os.getenv("BOOST_ROOT", None)
if _BOOST_ROOT is None:
BOOST_ROOT = None
else:
BOOST_ROOT = Path(_BOOST_ROOT)
assert BOOST_ROOT.exists(), "you provide BOOST_ROOT, but it not exists"
assert (BOOST_ROOT / "boost" / "geometry").exists(), "you provide BOOST_ROOT, but BOOST_ROOT/boost/geometry not exists"
...@@ -5,6 +5,20 @@ class ThrustCustomAllocatorV2: ...@@ -5,6 +5,20 @@ class ThrustCustomAllocatorV2:
alloc_func: Callable[int, int] alloc_func: Callable[int, int]
class SpconvOps: class SpconvOps:
@staticmethod @staticmethod
def cumm_version() -> str:
"""
get cumm version when build spconv.
"""
...
@staticmethod
def pccm_version() -> str:
"""
get pccm version when build spconv.
"""
...
@staticmethod
def generate_conv_inds_stage1(indices: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], transposed: bool = False, stream_int: int = 0) -> None: def generate_conv_inds_stage1(indices: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], transposed: bool = False, stream_int: int = 0) -> None:
""" """
Args: Args:
......
...@@ -18,3 +18,8 @@ if hasattr(_ext, "cumm"): ...@@ -18,3 +18,8 @@ if hasattr(_ext, "cumm"):
CPU_ONLY_BUILD = False CPU_ONLY_BUILD = False
else: else:
CPU_ONLY_BUILD = True CPU_ONLY_BUILD = True
from spconv.core_cc.csrc.sparse.all import SpconvOps
BUILD_CUMM_VERSION = SpconvOps.cumm_version()
BUILD_PCCM_VERSION = SpconvOps.pccm_version()
...@@ -12,12 +12,14 @@ ...@@ -12,12 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import cumm
from cumm.common import TensorView, TensorViewCPU, TensorViewKernel, ThrustLib from cumm.common import TensorView, TensorViewCPU, TensorViewKernel, ThrustLib
from cumm.conv.bases import ConvOpType, NHWC from cumm.conv.bases import ConvOpType, NHWC
from cumm.conv.params import ConvProblem from cumm.conv.params import ConvProblem
from cumm import dtypes from cumm import dtypes
from cumm.constants import CUMM_CPU_ONLY_BUILD from cumm.constants import CUMM_CPU_ONLY_BUILD
import pccm import pccm
from pccm.__version__ import __version__ as pccm_version
from ccimport import compat from ccimport import compat
from .pointops import Point2Voxel, Point2VoxelCPU from .pointops import Point2Voxel, Point2VoxelCPU
from .indices import SparseConvIndicesKernel, CudaCommonKernel, SparseConvIndicesCPU from .indices import SparseConvIndicesKernel, CudaCommonKernel, SparseConvIndicesCPU
...@@ -99,6 +101,28 @@ class SpconvOps(pccm.Class): ...@@ -99,6 +101,28 @@ class SpconvOps(pccm.Class):
indices, indices,
f"SpconvIndices{ndim}D") f"SpconvIndices{ndim}D")
@pccm.pybind.mark
@pccm.static_function
def cumm_version(self):
"""get cumm version when build spconv.
"""
code = pccm.FunctionCode()
code.raw(f"""
return \"{cumm.__version__}\";
""")
return code.ret("std::string")
@pccm.pybind.mark
@pccm.static_function
def pccm_version(self):
"""get pccm version when build spconv.
"""
code = pccm.FunctionCode()
code.raw(f"""
return \"{pccm_version}\";
""")
return code.ret("std::string")
@pccm.pybind.mark @pccm.pybind.mark
@pccm.cuda.static_function @pccm.cuda.static_function
def generate_conv_inds_stage1(self): def generate_conv_inds_stage1(self):
......
...@@ -16,6 +16,7 @@ import torch ...@@ -16,6 +16,7 @@ import torch
from torch import nn from torch import nn
from torch.autograd import Function from torch.autograd import Function
from typing import Optional, TypeVar from typing import Optional, TypeVar
from spconv.pytorch.core import SparseConvTensor
from spconv.tools import CUDAKernelTimer from spconv.tools import CUDAKernelTimer
from spconv.pytorch import ops from spconv.pytorch import ops
from spconv.pytorch.constants import PYTORCH_VERSION from spconv.pytorch.constants import PYTORCH_VERSION
...@@ -287,3 +288,18 @@ indice_inverse_conv = SparseInverseConvFunction.apply ...@@ -287,3 +288,18 @@ indice_inverse_conv = SparseInverseConvFunction.apply
indice_subm_conv = SubMConvFunction.apply indice_subm_conv = SubMConvFunction.apply
indice_maxpool = SparseMaxPoolFunction.apply indice_maxpool = SparseMaxPoolFunction.apply
indice_maxpool_implicit_gemm = SparseMaxPoolImplicitGemmFunction.apply indice_maxpool_implicit_gemm = SparseMaxPoolImplicitGemmFunction.apply
def sparse_add(a: SparseConvTensor, b: SparseConvTensor):
a_th = torch.sparse_coo_tensor(a.indices.T, a.features)
b_th = torch.sparse_coo_tensor(b.indices.T, b.features)
a_shape = a.spatial_shape
b_shape = b.spatial_shape
res_shape = []
for sa, sb in zip(a_shape, b_shape):
res_shape.append(max(sa, sb))
c_th = a_th + b_th
c_th_inds = c_th.indices().T.contiguous()
assert c_th.is_contiguous()
return SparseConvTensor(c_th.values(), c_th_inds, res_shape, max(a.batch_size, b.batch_size))
...@@ -77,7 +77,7 @@ class SparseConvTester: ...@@ -77,7 +77,7 @@ class SparseConvTester:
self.kv: int = np.prod(self.ksize) self.kv: int = np.prod(self.ksize)
self.num_split = 1 if algo == ConvAlgo.MaskImplicitGemm else 2 self.num_split = 1 if algo == ConvAlgo.MaskImplicitGemm else 2
sparse_dict = generate_sparse_data(shape, [1500] * bs, C) sparse_dict = generate_sparse_data(shape, [N] * bs, C)
voxels_np = np.ascontiguousarray(sparse_dict["features"]).astype( voxels_np = np.ascontiguousarray(sparse_dict["features"]).astype(
np.float32) np.float32)
...@@ -241,7 +241,13 @@ def _test_impgemm_conv_cuda(subm: bool): ...@@ -241,7 +241,13 @@ def _test_impgemm_conv_cuda(subm: bool):
for shape, bs, C, K, k, s, p, d, algo, dtype in tqdm.tqdm(params_grid( for shape, bs, C, K, k, s, p, d, algo, dtype in tqdm.tqdm(params_grid(
shapes, batchsizes, in_channels, out_channels, ksizes, shapes, batchsizes, in_channels, out_channels, ksizes,
strides, paddings, dilations, algos, dtypes)): strides, paddings, dilations, algos, dtypes)):
tester = SparseConvTester(algo, subm, shape, bs, dtype, 1500, K, C, k, s, p, d) shape_prod = np.prod(shape)
num_batch = np.random.randint(int(0.2 * shape_prod), int(0.7 * shape_prod))
C = np.random.randint(int(0.3 * C), int(0.7 * C))
K = np.random.randint(int(0.3 * K), int(0.7 * K))
# print(num_batch)
tester = SparseConvTester(algo, subm, shape, bs, dtype, num_batch, K, C, k, s, p, d)
atol, rtol = dtype_to_tol[dtype] atol, rtol = dtype_to_tol[dtype]
mask_width_to_mask_out_fwd: Dict[int, torch.Tensor] = {} mask_width_to_mask_out_fwd: Dict[int, torch.Tensor] = {}
mask_width_to_mask_out_bwd: Dict[int, torch.Tensor] = {} mask_width_to_mask_out_bwd: Dict[int, torch.Tensor] = {}
...@@ -364,7 +370,9 @@ def _test_impgemm_conv_cuda(subm: bool): ...@@ -364,7 +370,9 @@ def _test_impgemm_conv_cuda(subm: bool):
test_case.assertAllClose(out_ref, out_my, atol=atol, rtol=rtol) test_case.assertAllClose(out_ref, out_my, atol=atol, rtol=rtol)
else: else:
error_norm = np.linalg.norm(out_ref.reshape(-1) - out_my.reshape(-1)) error_norm = np.linalg.norm(out_ref.reshape(-1) - out_my.reshape(-1))
assert error_norm < 5 if (error_norm > 5):
print(f"{desp}, Error={error_norm}")
assert error_norm < 10
# print(desp, ) # print(desp, )
else: else:
din_my = inp_tv.cpu().numpy() din_my = inp_tv.cpu().numpy()
...@@ -441,7 +449,9 @@ def _test_impgemm_conv_cuda(subm: bool): ...@@ -441,7 +449,9 @@ def _test_impgemm_conv_cuda(subm: bool):
else: else:
error_norm = np.linalg.norm(dw_ref.reshape(-1) - dw_my.reshape(-1)) error_norm = np.linalg.norm(dw_ref.reshape(-1) - dw_my.reshape(-1))
# print(desp, error_norm) # print(desp, error_norm)
assert error_norm < 5 if (error_norm > 5):
print(f"{desp}, Error={error_norm}")
assert error_norm < 10
def _test_native_conv_cuda(subm: bool): def _test_native_conv_cuda(subm: bool):
ndim = 3 ndim = 3
...@@ -653,6 +663,7 @@ def _test_native_conv_cuda(subm: bool): ...@@ -653,6 +663,7 @@ def _test_native_conv_cuda(subm: bool):
def test_all_algo_unit(): def test_all_algo_unit():
# for i in range(5):
_test_impgemm_conv_cuda(True) _test_impgemm_conv_cuda(True)
_test_impgemm_conv_cuda(False) _test_impgemm_conv_cuda(False)
_test_native_conv_cuda(True) _test_native_conv_cuda(True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment