Commit 9d1e33d6 authored by yan.yan's avatar yan.yan
Browse files

temp commit

parent 0da847e2
......@@ -2,5 +2,13 @@ from .basic import bench_basic
import fire
def bench_me_basic(dtype_str: str):
from spconv.benchmark.me import bench_me_basic
return bench_me_basic(dtype_str)
def bench_torchsparse_basic(dtype_str: str):
from spconv.benchmark.thsp import bench_torchsparse_basic
return bench_torchsparse_basic(dtype_str)
if __name__ == "__main__":
fire.Fire()
......@@ -135,10 +135,6 @@ class Net(nn.Module):
# spconv.SparseInverseConv3d(128, 64, 2, indice_key="m4", bias=False, algo=algo),
)
max_batch_size = 1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
self.grid = torch.full([max_batch_size, *shape], -1,
dtype=torch.int32).cuda()
# self.grid = None
self.shape = shape
def forward(self, features, coors, batch_size, enable_timer: bool = False):
......@@ -146,7 +142,6 @@ class Net(nn.Module):
coors,
self.shape,
batch_size,
self.grid,
enable_timer=enable_timer)
return self.net(x)
......@@ -176,24 +171,24 @@ def bench_basic(dtype_str: str):
dout_t = torch.from_numpy(dout).to(device).to(torch_dtype)
times = []
with torch.no_grad():
for i in range(20):
for i in range(100):
torch.cuda.synchronize()
t = time.time()
out_nograd = net(voxels_th, coors_th, 1, False)
timer = out_nograd._timer
torch.cuda.synchronize()
times.append(time.time() - t)
print(f"basic[{dtype_str}|{algo}|forward]", np.mean(times[10:]))
print(f"basic[{dtype_str}|{algo}|forward]", np.mean(times[50:]))
times = []
for i in range(10):
for i in range(50):
out = net(voxels_th, coors_th, 1)
torch.cuda.synchronize()
t = time.time()
out.features.backward(dout_t)
torch.cuda.synchronize()
times.append(time.time() - t)
print(f"basic[{dtype_str}|{algo}|backward]", np.mean(times[5:]))
print(f"basic[{dtype_str}|{algo}|backward]", np.mean(times[25:]))
if __name__ == "__main__":
bench_basic("f16")
\ No newline at end of file
......@@ -5,7 +5,8 @@ from io import BytesIO
import numpy as np
from spconv.constants import PACKAGE_ROOT
RAW_PC_PATH = "https://raw.githubusercontent.com/traveller59/spconv/v2.1.10/test/data/test_spconv.pkl"
RAW_TEST_DATA_PATH = "https://raw.githubusercontent.com/traveller59/spconv/v2.1.10/test/data/test_spconv.pkl"
RAW_PC_PATH = "https://raw.githubusercontent.com/traveller59/spconv/v2.1.10/test/data/benchmark-pc.npz"
def get_voxel_data():
editable_test_data_path = PACKAGE_ROOT.parent / "test/data/test_spconv.pkl"
......@@ -13,7 +14,7 @@ def get_voxel_data():
with editable_test_data_path.open("rb") as f:
return pickle.load(f)
ff = BytesIO()
with requests.get(RAW_PC_PATH, stream=True) as req:
with requests.get(RAW_TEST_DATA_PATH, stream=True) as req:
req.raise_for_status()
for chunk in req.iter_content(chunk_size=8192):
ff.write(chunk)
......
"""Benchmark MinkowskiEngine
"""
from spconv.benchmark.core import get_voxel_data
import time
from pathlib import Path
import numpy as np
import torch
from torch import nn
from spconv.core import ConvAlgo
from cumm import dtypes
from spconv.test_utils import params_grid
_DTYPE_TO_TORCH_DTYPE = {
dtypes.float32: torch.float32,
dtypes.float16: torch.float16,
}
def bench_me_basic(dtype_str: str):
dtype = dtypes.get_dtype_by_shortcut(dtype_str)
if dtype not in _DTYPE_TO_TORCH_DTYPE:
raise NotImplementedError("only support bench f32 and f16 for now")
torch_dtype = _DTYPE_TO_TORCH_DTYPE[dtype]
"""Benchmark torchsparse
"""
from spconv.benchmark.core import get_voxel_data
import time
from pathlib import Path
import numpy as np
import torch
from torch import nn
from spconv.core import ConvAlgo
from cumm import dtypes
from spconv.test_utils import params_grid
_DTYPE_TO_TORCH_DTYPE = {
dtypes.float32: torch.float32,
dtypes.float16: torch.float16,
}
def bench_torchsparse_basic(dtype_str: str):
dtype = dtypes.get_dtype_by_shortcut(dtype_str)
if dtype not in _DTYPE_TO_TORCH_DTYPE:
raise NotImplementedError("only support bench f32 and f16 for now")
torch_dtype = _DTYPE_TO_TORCH_DTYPE[dtype]
......@@ -36,4 +36,16 @@ SAVED_WEIGHT_LAYOUT = os.getenv("SPCONV_SAVED_WEIGHT_LAYOUT", "")
if SAVED_WEIGHT_LAYOUT != "":
assert SAVED_WEIGHT_LAYOUT in ["KRSC", "RSKC", "RSCK"], "please set SAVED_WEIGHT_LAYOUT to KRSC, RSKC or RSCK"
ALL_WEIGHT_IS_KRSC = True
\ No newline at end of file
ALL_WEIGHT_IS_KRSC = True
SPCONV_DEBUG_SAVE_PATH = os.getenv("SPCONV_DEBUG_SAVE_PATH", "")
_BOOST_ROOT = os.getenv("BOOST_ROOT", None)
if _BOOST_ROOT is None:
BOOST_ROOT = None
else:
BOOST_ROOT = Path(_BOOST_ROOT)
assert BOOST_ROOT.exists(), "you provide BOOST_ROOT, but it not exists"
assert (BOOST_ROOT / "boost" / "geometry").exists(), "you provide BOOST_ROOT, but BOOST_ROOT/boost/geometry not exists"
......@@ -5,6 +5,20 @@ class ThrustCustomAllocatorV2:
alloc_func: Callable[int, int]
class SpconvOps:
@staticmethod
def cumm_version() -> str:
"""
get cumm version when build spconv.
"""
...
@staticmethod
def pccm_version() -> str:
"""
get pccm version when build spconv.
"""
...
@staticmethod
def generate_conv_inds_stage1(indices: Tensor, indice_pairs: Tensor, indice_pairs_uniq: Tensor, indice_num_per_loc: Tensor, batch_size: int, output_dims: List[int], input_dims: List[int], ksize: List[int], stride: List[int], padding: List[int], dilation: List[int], transposed: bool = False, stream_int: int = 0) -> None:
"""
Args:
......
......@@ -18,3 +18,8 @@ if hasattr(_ext, "cumm"):
CPU_ONLY_BUILD = False
else:
CPU_ONLY_BUILD = True
from spconv.core_cc.csrc.sparse.all import SpconvOps
BUILD_CUMM_VERSION = SpconvOps.cumm_version()
BUILD_PCCM_VERSION = SpconvOps.pccm_version()
......@@ -12,12 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import cumm
from cumm.common import TensorView, TensorViewCPU, TensorViewKernel, ThrustLib
from cumm.conv.bases import ConvOpType, NHWC
from cumm.conv.params import ConvProblem
from cumm import dtypes
from cumm.constants import CUMM_CPU_ONLY_BUILD
import pccm
from pccm.__version__ import __version__ as pccm_version
from ccimport import compat
from .pointops import Point2Voxel, Point2VoxelCPU
from .indices import SparseConvIndicesKernel, CudaCommonKernel, SparseConvIndicesCPU
......@@ -98,6 +100,28 @@ class SpconvOps(pccm.Class):
self.add_impl_only_param_class(cuda_funcs, f"ops{ndim}d",
indices,
f"SpconvIndices{ndim}D")
@pccm.pybind.mark
@pccm.static_function
def cumm_version(self):
"""get cumm version when build spconv.
"""
code = pccm.FunctionCode()
code.raw(f"""
return \"{cumm.__version__}\";
""")
return code.ret("std::string")
@pccm.pybind.mark
@pccm.static_function
def pccm_version(self):
"""get pccm version when build spconv.
"""
code = pccm.FunctionCode()
code.raw(f"""
return \"{pccm_version}\";
""")
return code.ret("std::string")
@pccm.pybind.mark
@pccm.cuda.static_function
......
......@@ -16,6 +16,7 @@ import torch
from torch import nn
from torch.autograd import Function
from typing import Optional, TypeVar
from spconv.pytorch.core import SparseConvTensor
from spconv.tools import CUDAKernelTimer
from spconv.pytorch import ops
from spconv.pytorch.constants import PYTORCH_VERSION
......@@ -287,3 +288,18 @@ indice_inverse_conv = SparseInverseConvFunction.apply
indice_subm_conv = SubMConvFunction.apply
indice_maxpool = SparseMaxPoolFunction.apply
indice_maxpool_implicit_gemm = SparseMaxPoolImplicitGemmFunction.apply
def sparse_add(a: SparseConvTensor, b: SparseConvTensor):
a_th = torch.sparse_coo_tensor(a.indices.T, a.features)
b_th = torch.sparse_coo_tensor(b.indices.T, b.features)
a_shape = a.spatial_shape
b_shape = b.spatial_shape
res_shape = []
for sa, sb in zip(a_shape, b_shape):
res_shape.append(max(sa, sb))
c_th = a_th + b_th
c_th_inds = c_th.indices().T.contiguous()
assert c_th.is_contiguous()
return SparseConvTensor(c_th.values(), c_th_inds, res_shape, max(a.batch_size, b.batch_size))
......@@ -77,7 +77,7 @@ class SparseConvTester:
self.kv: int = np.prod(self.ksize)
self.num_split = 1 if algo == ConvAlgo.MaskImplicitGemm else 2
sparse_dict = generate_sparse_data(shape, [1500] * bs, C)
sparse_dict = generate_sparse_data(shape, [N] * bs, C)
voxels_np = np.ascontiguousarray(sparse_dict["features"]).astype(
np.float32)
......@@ -241,7 +241,13 @@ def _test_impgemm_conv_cuda(subm: bool):
for shape, bs, C, K, k, s, p, d, algo, dtype in tqdm.tqdm(params_grid(
shapes, batchsizes, in_channels, out_channels, ksizes,
strides, paddings, dilations, algos, dtypes)):
tester = SparseConvTester(algo, subm, shape, bs, dtype, 1500, K, C, k, s, p, d)
shape_prod = np.prod(shape)
num_batch = np.random.randint(int(0.2 * shape_prod), int(0.7 * shape_prod))
C = np.random.randint(int(0.3 * C), int(0.7 * C))
K = np.random.randint(int(0.3 * K), int(0.7 * K))
# print(num_batch)
tester = SparseConvTester(algo, subm, shape, bs, dtype, num_batch, K, C, k, s, p, d)
atol, rtol = dtype_to_tol[dtype]
mask_width_to_mask_out_fwd: Dict[int, torch.Tensor] = {}
mask_width_to_mask_out_bwd: Dict[int, torch.Tensor] = {}
......@@ -364,7 +370,9 @@ def _test_impgemm_conv_cuda(subm: bool):
test_case.assertAllClose(out_ref, out_my, atol=atol, rtol=rtol)
else:
error_norm = np.linalg.norm(out_ref.reshape(-1) - out_my.reshape(-1))
assert error_norm < 5
if (error_norm > 5):
print(f"{desp}, Error={error_norm}")
assert error_norm < 10
# print(desp, )
else:
din_my = inp_tv.cpu().numpy()
......@@ -441,7 +449,9 @@ def _test_impgemm_conv_cuda(subm: bool):
else:
error_norm = np.linalg.norm(dw_ref.reshape(-1) - dw_my.reshape(-1))
# print(desp, error_norm)
assert error_norm < 5
if (error_norm > 5):
print(f"{desp}, Error={error_norm}")
assert error_norm < 10
def _test_native_conv_cuda(subm: bool):
ndim = 3
......@@ -653,6 +663,7 @@ def _test_native_conv_cuda(subm: bool):
def test_all_algo_unit():
# for i in range(5):
_test_impgemm_conv_cuda(True)
_test_impgemm_conv_cuda(False)
_test_native_conv_cuda(True)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment