Commit eae6a3bd authored by yan.yan's avatar yan.yan
Browse files

v2.1

parent fa995a4f
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from ...cumm.gemm.main import GemmAlgoDesp
from cumm.tensorview import Tensor
class ConvAlgoDesp(GemmAlgoDesp):
ndim: int
op_type: int
iter_algo: int
layout_i: int
layout_w: int
layout_o: int
interleave_i: int
interleave_w: int
interleave_o: int
mask_sparse: bool
increment_k_first: bool
def __init__(self, ndim: int, op_type: int) -> None:
"""
Args:
ndim:
op_type:
"""
...
def __repr__(self) -> str: ...
@staticmethod
def conv_iwo_012_to_abc(op_type: int) -> List[int]:
"""
Args:
op_type:
"""
...
@staticmethod
def gemm_abc_012_to_iwo(op_type: int) -> List[int]:
"""
Args:
op_type:
"""
...
@property
def dtype_input(self) -> int: ...
@property
def dtype_weight(self) -> int: ...
@property
def dtype_output(self) -> int: ...
def supported(self, m: int, n: int, k: int, C: int, K: int, mask_width: int) -> bool:
"""
Args:
m:
n:
k:
C:
K:
mask_width:
"""
...
def query_conv_workspace_size(self, m: int, n: int, k: int, split_k_slices: int, kv: int) -> int:
"""
Args:
m:
n:
k:
split_k_slices:
kv:
"""
...
def supported_ldx_conv(self, ldi: int, ldw: int, ldo: int) -> bool:
"""
Args:
ldi:
ldw:
ldo:
"""
...
class ConvParams:
conv_algo_desp: Any
input: Tensor
weight: Tensor
output: Tensor
split_k_slices: int
padding: List[int]
stride: List[int]
dilation: List[int]
alpha: float
beta: float
mask_width: int
mask_filter: int
reverse_mask: bool
verbose: bool
workspace: Tensor = Tensor()
mask: Tensor = Tensor()
mask_argsort: Tensor = Tensor()
indices: Tensor = Tensor()
mask_output: Tensor = Tensor()
stream: int
def __init__(self, ndim: int, op_type: int) -> None:
"""
Args:
ndim:
op_type:
"""
...
class ConvMainUnitTest:
@staticmethod
def extract_mnk(op_type: int, N: int, C: int, K: int, kernel_volume: int, in_prod: int, out_prod: int, mask_sparse: bool) -> List[int]:
"""
Args:
op_type:
N:
C:
K:
kernel_volume:
in_prod:
out_prod:
mask_sparse:
"""
...
@staticmethod
def implicit_gemm2(params: ConvParams) -> None:
"""
Args:
params:
"""
...
@staticmethod
def get_all_conv_algo_desp() -> List[ConvAlgoDesp]: ...
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class ScatterAll:
def __init__(self) -> None: ...
@staticmethod
def get_all_scatter_params() -> List[Tuple[int, int, int, int]]: ...
def supported_scatter(self, tile_m: int, tile_k_bytes: int, bytes_per_access: int, num_threads: int, channel_size: int, dtype: int) -> bool:
"""
Args:
tile_m:
tile_k_bytes:
bytes_per_access:
num_threads:
channel_size:
dtype:
"""
...
@staticmethod
def stream_synchronize(stream: int = 0) -> None:
"""
Args:
stream:
"""
...
def scatter(self, output: Tensor, input: Tensor, indices: Tensor, tile_m: int, tile_k_bytes: int, bytes_per_access: int, num_threads: int, stream: int = 0) -> None:
"""
Args:
output:
input:
indices:
tile_m:
tile_k_bytes:
bytes_per_access:
num_threads:
stream:
"""
...
def scatter2(self, output: Tensor, input: Tensor, indices: Tensor, size: int, stream: int = 0) -> None:
"""
Args:
output:
input:
indices:
size:
stream:
"""
...
class GatherAll:
def __init__(self) -> None: ...
@staticmethod
def get_all_gather_params() -> List[Tuple[int, int, int, int]]: ...
@staticmethod
def supported(bytes_per_access: int, channel_size: int, dtype: int) -> bool:
"""
Args:
bytes_per_access:
channel_size:
dtype:
"""
...
@staticmethod
def stream_synchronize(stream: int = 0) -> None:
"""
Args:
stream:
"""
...
def gather(self, output: Tensor, input: Tensor, indices: Tensor, tile_m: int, tile_k_bytes: int, bytes_per_access: int, num_threads: int, stream: int = 0) -> None:
"""
Args:
output:
input:
indices:
tile_m:
tile_k_bytes:
bytes_per_access:
num_threads:
stream:
"""
...
def gather2(self, output: Tensor, input: Tensor, indices: Tensor, size: int, stream: int = 0) -> None:
"""
Args:
output:
input:
indices:
size:
stream:
"""
...
......@@ -18,6 +18,7 @@ class GemmAlgoDesp:
element_per_access_a: int
element_per_access_b: int
element_per_access_c: int
access_per_vector: int
def __init__(self) -> None: ...
def __repr__(self) -> str: ...
@property
......
This diff is collapsed.
#!/home/yy/library/anaconda3/bin/python
import sys
from pathlib import Path
import ctypes
# _cudart = ctypes.CDLL('libcudart.so')
print(str(Path(__file__).parent.parent.parent.parent))
sys.path.append(str(Path(__file__).parent.parent.parent.parent))
from spconv import tensorview as tv
from spconv.sparse import build
import numpy as np
from pathlib import Path
from spconv.spconv_ops_cc.sparse.all.ops import Point2Voxel
from spconv.spconv_ops_cc.sparse.all import SpconvOps
import time
def main():
data = np.load("/home/yy/OneDrive/dev/spconv/test/data/benchmark-pc.npz")["pc"].astype(np.float32)
print(data.shape, data.dtype)
p2v = Point2Voxel([0.1, 0.1, 0.1], [-80, -80, -2, 80, 80, 6], 3, 150000, 1)
gs = p2v.grid_size # zyx
print(gs)
# return
data_tv = tv.from_numpy(data).cuda()
for i in range(6):
t = time.time()
voxels, indices, num_per_voxel = p2v.point_to_voxel_hash(data_tv)
print(time.time() - t)
voxels, indices, num_per_voxel = p2v.point_to_voxel_hash(data_tv)
print(voxels.shape, gs)
gs_xyz = gs
indices_np = indices.cpu().numpy()
# indices_offset = indices_np[:, 0] * gs_xyz[1] * gs_xyz[2] + indices_np[:, 1] * gs_xyz[2] + indices_np[:, 2]
# uq = np.unique(indices_offset)
# print(uq.shape, indices_offset.shape, gs_xyz)
# return
ksize = [3] * 3
kv = int(np.prod(ksize))
indices_with_bs = np.zeros((indices_np.shape[0], 4), dtype=np.int32)
indices_with_bs[:, 1:] = indices_np
print(indices_with_bs.mean(), indices_with_bs.max(), indices_with_bs.min())
indices = tv.from_numpy(indices_with_bs).cuda()
out_indices = tv.zeros([indices.dim(0) * kv, 4], tv.int32, 0)
indice_num_per_loc = tv.zeros([kv], tv.int32, 0)
points = voxels.view([-1, 3])
hashdata = tv.zeros([points.dim(0) * kv * 2], tv.custom64, 0)
hashdata_subm = tv.zeros([points.dim(0) * 2], tv.custom64, 0)
indice_pairs = tv.full([2, kv, indices.dim(0)], -1, tv.int32, 0)
indice_pairs_uniq = tv.zeros([indice_pairs.size // 2 + 1], tv.int32, 0)
# for i in range(10):
# indice_pairs.fill_int_(-1)
# np.random.shuffle(indices_with_bs)
# indices = tv.from_numpy(indices_with_bs).cuda()
# indice_num_per_loc.zero_()
# out_act = SpconvOps.generate_conv_inds(indices, hashdata, indice_pairs,
# indice_pairs_uniq, out_indices, indice_num_per_loc,
# 1, gs, gs, [3, 3, 3], [1, 1, 1], [1, 1, 1], [1, 1, 1])
# indice_num_per_loc.zero_()
# out_act = SpconvOps.generate_subm_conv_inds(indices, hashdata_subm, indice_pairs,
# out_indices, indice_num_per_loc,
# 1, gs, ksize, [1, 1, 1])
# indice_num_per_loc_cpu = indice_num_per_loc.cpu().numpy()
# indice_pairs_cpu = indice_pairs.cpu().numpy()
# indice_pairs_cpu_flat = indice_pairs_cpu.reshape(-1)
# uq, count = np.unique(indice_pairs_cpu_flat, return_counts=True)
# print(out_act, indice_pairs_cpu.shape, indice_pairs_cpu.mean(), indice_num_per_loc_cpu.tolist())
# print(indice_pairs_cpu[:, 13, :2])
# print(uq, count)
if __name__ == "__main__":
main()
\ No newline at end of file
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pccm
from cumm.common import TensorView
from typing import List
class GatherCPU(pccm.Class):
def __init__(self):
super().__init__()
self.add_dependency(TensorView)
@pccm.static_function
def gather(self):
code = pccm.FunctionCode()
code.arg("out", "tv::Tensor")
code.arg("in", "tv::Tensor")
code.arg("inds", "tv::Tensor")
code.raw(f"""
// tv::check_shape(inds, {{out.dim(0)}});
auto nhot = inds.dim(0);
int channel = in.dim(1);
tv::dispatch<float, double>(out.dtype(), [&](auto I){{
auto indices_data = inds.data_ptr<const int>();
using T = TV_DECLTYPE(I);
T *buffer_data = out.data_ptr<T>();
const T *features_data = in.data_ptr<const T>();
for (int i = 0; i < nhot; ++i) {{
std::memcpy(buffer_data + i * channel,
features_data + indices_data[i] * channel,
sizeof(T) * channel);
}}
}});
""")
return code
@pccm.static_function
def scatter_add(self):
code = pccm.FunctionCode()
code.arg("out", "tv::Tensor")
code.arg("in", "tv::Tensor")
code.arg("inds", "tv::Tensor")
code.raw(f"""
// tv::check_shape(inds, {{in.dim(0)}});
auto nhot = inds.dim(0);
int channel = in.dim(1);
tv::dispatch<float, double>(out.dtype(), [&](auto I){{
using T = TV_DECLTYPE(I);
auto indices_data = inds.data_ptr<const int>();
const T *buffer_data = in.data_ptr<const T>();
T *features_data = out.data_ptr<T>();
const T *buf = in.data_ptr<const T>();
T *out_ptr = out.data_ptr<T>();
for (int i = 0; i < nhot; ++i) {{
buf = buffer_data + i * channel;
out_ptr = features_data + indices_data[i] * channel;
for (int j = 0; j < channel; ++j) {{
out_ptr[j] = out_ptr[j] + buf[j];
}}
}}
}});
""")
return code
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -18,15 +18,17 @@ from torch.autograd import Function
import spconv.pytorch as spconv
#from torch.nn import Module
from spconv.pytorch.modules import SparseModule
from spconv.pytorch.core import SparseConvTensor
from typing import List
class JoinTable(SparseModule): # Module):
def forward(self, input):
def forward(self, input: List[SparseConvTensor]):
output = spconv.SparseConvTensor(
torch.cat([i.features for i in input], 1), input[1].indices,
input[1].spatial_shape, input[0].batch_size)
output.indice_dict = input[1].indice_dict
output.grid = input[1].grid
torch.cat([i.features for i in input], 1), input[0].indices,
input[0].spatial_shape, input[0].batch_size, input[0].grid, input[0].voxel_num,
input[0].indice_dict)
output.benchmark_record = input[1].benchmark_record
output.thrust_allocator = input[1].thrust_allocator
return output
def input_spatial_size(self, out_size):
......@@ -34,14 +36,13 @@ class JoinTable(SparseModule): # Module):
class AddTable(SparseModule): # Module):
def forward(self, input):
output = spconv.SparseConvTensor(sum([i.features for i in input]),
input[1].indices,
input[1].spatial_shape,
input[1].batch_size)
output.indice_dict = input[1].indice_dict
output.grid = input[1].grid
def forward(self, input: List[SparseConvTensor]):
output = spconv.SparseConvTensor(
sum([i.features for i in input]), input[0].indices,
input[0].spatial_shape, input[0].batch_size, input[0].grid, input[0].voxel_num,
input[0].indice_dict)
output.benchmark_record = input[1].benchmark_record
output.thrust_allocator = input[1].thrust_allocator
return output
def input_spatial_size(self, out_size):
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment