Commit e8bc31ec authored by EvernightAurora's avatar EvernightAurora
Browse files

allpy low nk params

parent 2b195e43
...@@ -487,6 +487,56 @@ IMPLGEMM_VOLTA_PARAMS = [ ...@@ -487,6 +487,56 @@ IMPLGEMM_VOLTA_PARAMS = [
] ]
IMPLGEMM_TURING_PARAMS = [ IMPLGEMM_TURING_PARAMS = [
*gen_conv_params(ConvFwdAndBwdInput, (32, 16, 16), (16, 16, 16),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 8)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=0),
*gen_conv_params(ConvFwdAndBwdInput, (32, 16, 16), (16, 16, 16),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 8)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1),
*gen_conv_params(ConvFwdAndBwdInput, (64, 32, 16), (32, 16, 16),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 8)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=0),
*gen_conv_params(ConvFwdAndBwdInput, (64, 32, 16), (32, 16, 16),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2, ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"],
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 8)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1),
*gen_conv_params(ConvFwdAndBwdInput, (32, 64, 32), (32, 32, 16), *gen_conv_params(ConvFwdAndBwdInput, (32, 64, 32), (32, 32, 16),
NDIM_DONT_CARE, NDIM_DONT_CARE,
ConvIterAlgo.Optimized, ConvIterAlgo.Optimized,
...@@ -658,6 +708,35 @@ IMPLGEMM_TURING_PARAMS = [ ...@@ -658,6 +708,35 @@ IMPLGEMM_TURING_PARAMS = [
mask_sparse=True, mask_sparse=True,
increment_k_first=True, increment_k_first=True,
access_per_vector=1), access_per_vector=1),
*gen_conv_params(ConvBwdWeight, (64, 16, 32), (32, 16, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
"f16,f16,f16,f32,f32",
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 8)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=0),
*gen_conv_params(ConvBwdWeight, (64, 16, 32), (32, 16, 32),
NDIM_DONT_CARE,
ConvIterAlgo.Optimized,
2,
"f16,f16,f16,f32,f32",
NHWC,
NHWC,
NHWC,
GemmAlgo.Turing,
TensorOp((16, 8, 8)),
mask_sparse=True,
increment_k_first=True,
access_per_vector=1),
# *gen_conv_params(ConvBwdWeight, (32, 64, 32), (32, 32, 16), NDIM_DONT_CARE, ConvIterAlgo.Optimized, 2, "f16,f16,f16,f32,f32", # *gen_conv_params(ConvBwdWeight, (32, 64, 32), (32, 32, 16), NDIM_DONT_CARE, ConvIterAlgo.Optimized, 2, "f16,f16,f16,f32,f32",
# NHWC, NHWC, NHWC, GemmAlgo.Turing, TensorOp((16, 8, 8)), mask_sparse=True, increment_k_first=True, access_per_vector=1), # NHWC, NHWC, NHWC, GemmAlgo.Turing, TensorOp((16, 8, 8)), mask_sparse=True, increment_k_first=True, access_per_vector=1),
......
...@@ -290,6 +290,49 @@ class Net2(nn.Module): ...@@ -290,6 +290,49 @@ class Net2(nn.Module):
return self.net(x) return self.net(x)
class NetSm(nn.Module):
def __init__(self, shape, algo):
super().__init__()
self.net = spconv.SparseSequential(
spconv.SubMConv3d(3,
8,
3,
bias=False,
indice_key="c0",
algo=algo),
spconv.SubMConv3d(8,
16,
3,
bias=False,
indice_key="c0",
algo=algo),
spconv.SubMConv3d(16,
32,
3,
bias=False,
indice_key="c0",
algo=algo),
spconv.SubMConv3d(32,
64,
3,
bias=False,
indice_key="c0",
algo=algo),
)
max_batch_size = 1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
self.grid = torch.full([max_batch_size, *shape], -1,
dtype=torch.int32).cuda()
# self.grid = None
self.shape = shape
def forward(self, features, coors, batch_size, enable_timer: bool = False):
x = spconv.SparseConvTensor(features, coors, self.shape, batch_size,
self.grid, enable_timer=enable_timer)
return self.net(x)
import numpy as np import numpy as np
from cumm import tensorview as tv from cumm import tensorview as tv
from spconv.core_cc.csrc.sparse.all import SpconvOps from spconv.core_cc.csrc.sparse.all import SpconvOps
...@@ -358,7 +401,7 @@ def main(): ...@@ -358,7 +401,7 @@ def main():
# MaskImpGemm: 51.0ms # MaskImpGemm: 51.0ms
# MaskSplitImpGemm: 41.1ms # MaskSplitImpGemm: 41.1ms
# algo = None # algo = None
net = Net(spatial_shape, algo).to(device).eval().to(dtype)# .train() net = NetSm(spatial_shape, algo).to(device).eval().to(dtype)# .train()
# net.load_state_dict(net.state_dict()) # net.load_state_dict(net.state_dict())
spconv.assign_name_for_sparse_modules(net) spconv.assign_name_for_sparse_modules(net)
print(coors_th.shape) print(coors_th.shape)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment