Commit a09b086a authored by traveller59's avatar traveller59
Browse files

1. fix #11: torch master don't support bool as jit operator argument.

2. fix SparseInverseConv problem
3. fix unittest problem
parent 8e7c4346
...@@ -91,7 +91,7 @@ class ExampleNet(nn.Module): ...@@ -91,7 +91,7 @@ class ExampleNet(nn.Module):
super().__init__() super().__init__()
self.net = spconv.SparseSequential( self.net = spconv.SparseSequential(
spconv.SparseConv3d(32, 64, 3, 2, indice_key="cp0"), spconv.SparseConv3d(32, 64, 3, 2, indice_key="cp0"),
spconv.SparseInverseConv3d(64, 32, indice_key="cp0"), spconv.SparseInverseConv3d(64, 32, 3, indice_key="cp0"), # need provide kernel size to create weight
) )
self.shape = shape self.shape = shape
......
...@@ -30,8 +30,10 @@ getIndicePair(torch::Tensor indices, int64_t batchSize, ...@@ -30,8 +30,10 @@ getIndicePair(torch::Tensor indices, int64_t batchSize,
std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape, std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
std::vector<int64_t> kernelSize, std::vector<int64_t> stride, std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
std::vector<int64_t> padding, std::vector<int64_t> dilation, std::vector<int64_t> padding, std::vector<int64_t> dilation,
std::vector<int64_t> outPadding, bool subM, bool transpose) { std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
// auto timer = spconv::CudaContextTimer<>(); // auto timer = spconv::CudaContextTimer<>();
bool subM = _subM != 0;
bool transpose = _transpose != 0;
auto numAct = indices.size(0); auto numAct = indices.size(0);
auto coorDim = indices.size(1) - 1; // batchIdx + xyz auto coorDim = indices.size(1) - 1; // batchIdx + xyz
TV_ASSERT_RT_ERR(NDim == coorDim, "error"); TV_ASSERT_RT_ERR(NDim == coorDim, "error");
...@@ -140,8 +142,10 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch ...@@ -140,8 +142,10 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape, std::vector<int64_t> outSpatialShape, std::vector<int64_t> spatialShape,
std::vector<int64_t> kernelSize, std::vector<int64_t> stride, std::vector<int64_t> kernelSize, std::vector<int64_t> stride,
std::vector<int64_t> padding, std::vector<int64_t> dilation, std::vector<int64_t> padding, std::vector<int64_t> dilation,
std::vector<int64_t> outPadding, bool subM, bool transpose) { std::vector<int64_t> outPadding, int64_t _subM, int64_t _transpose) {
// auto timer = spconv::CudaContextTimer<>(); // auto timer = spconv::CudaContextTimer<>();
bool subM = _subM != 0;
bool transpose = _transpose != 0;
auto numAct = indices.size(0); auto numAct = indices.size(0);
auto coorDim = indices.size(1) - 1; // batchIdx + xyz auto coorDim = indices.size(1) - 1; // batchIdx + xyz
TV_ASSERT_RT_ERR(NDim == coorDim, "error"); TV_ASSERT_RT_ERR(NDim == coorDim, "error");
...@@ -249,7 +253,9 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch ...@@ -249,7 +253,9 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
template <typename T> template <typename T>
torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters, torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum, torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, bool inverse, bool subm) { int64_t numActOut, int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type(); auto device = features.device().type();
auto ndim = filters.dim() - 2; auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0); auto kernelVolume = indicePairs.size(0);
...@@ -261,7 +267,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters, ...@@ -261,7 +267,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
int indicePairMaxOffset = indicePairMaxSizeIter - indicePairNumCpu.data<int>(); int indicePairMaxOffset = indicePairMaxSizeIter - indicePairNumCpu.data<int>();
int indicePairMaxSize = *indicePairMaxSizeIter; int indicePairMaxSize = *indicePairMaxSizeIter;
/*if (subm){ /*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume); std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset); indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
...@@ -280,7 +286,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters, ...@@ -280,7 +286,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
torch::Tensor outputBuffer = torch::Tensor outputBuffer =
torch::zeros({indicePairMaxSize, numOutPlanes}, options); torch::zeros({indicePairMaxSize, numOutPlanes}, options);
filters = filters.view({-1, numInPlanes, numOutPlanes}); filters = filters.view({-1, numInPlanes, numOutPlanes});
if (subm) { // the center index of subm conv don't need gather and scatter if (subM) { // the center index of subm conv don't need gather and scatter
// add. // add.
torch::mm_out(output, features, filters[indicePairMaxOffset]); torch::mm_out(output, features, filters[indicePairMaxOffset]);
} }
...@@ -289,7 +295,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters, ...@@ -289,7 +295,7 @@ torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
double totalSAddTime = 0; double totalSAddTime = 0;
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) { if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue; continue;
} }
// auto timer = spconv::CudaContextTimer<>(); // auto timer = spconv::CudaContextTimer<>();
...@@ -346,7 +352,10 @@ template <typename T> ...@@ -346,7 +352,10 @@ template <typename T>
std::vector<torch::Tensor> std::vector<torch::Tensor>
indiceConvBackward(torch::Tensor features, torch::Tensor filters, indiceConvBackward(torch::Tensor features, torch::Tensor filters,
torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor indiceNum, torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor indiceNum,
bool inverse, bool subm) { int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type(); auto device = features.device().type();
auto ndim = filters.dim() - 2; auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0); auto kernelVolume = indicePairs.size(0);
...@@ -368,14 +377,14 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters, ...@@ -368,14 +377,14 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
filters = filters.view({-1, numInPlanes, numOutPlanes}); filters = filters.view({-1, numInPlanes, numOutPlanes});
filtersGrad = filtersGrad.view({-1, numInPlanes, numOutPlanes}); filtersGrad = filtersGrad.view({-1, numInPlanes, numOutPlanes});
if (subm) { if (subM) {
auto filterGradSub = filtersGrad[indicePairMaxOffset]; auto filterGradSub = filtersGrad[indicePairMaxOffset];
torch::mm_out(filterGradSub, features.t(), outGrad); torch::mm_out(filterGradSub, features.t(), outGrad);
torch::mm_out(inputGrad, outGrad, filters[indicePairMaxOffset].t()); torch::mm_out(inputGrad, outGrad, filters[indicePairMaxOffset].t());
} }
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) { if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue; continue;
} }
if (device == torch::kCPU) { if (device == torch::kCPU) {
...@@ -426,7 +435,10 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters, ...@@ -426,7 +435,10 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
template <typename T> template <typename T>
torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor filters, torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum, torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, bool inverse, bool subm) { int64_t numActOut, int64_t _inverse, int64_t _subM) {
bool subM = _subM != 0;
bool inverse = _inverse != 0;
auto device = features.device().type(); auto device = features.device().type();
auto ndim = filters.dim() - 2; auto ndim = filters.dim() - 2;
auto kernelVolume = indicePairs.size(0); auto kernelVolume = indicePairs.size(0);
...@@ -443,7 +455,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil ...@@ -443,7 +455,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
indicePairNumCpu.data<int>() + kernelVolume); indicePairNumCpu.data<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset); indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
int subRuleMaxSize = *std::max_element(indicePairNumVec.begin(), indicePairNumVec.end()); int subRuleMaxSize = *std::max_element(indicePairNumVec.begin(), indicePairNumVec.end());
if (subm) { if (subM) {
indicePairMaxSize = subRuleMaxSize; indicePairMaxSize = subRuleMaxSize;
} }
auto timer = spconv::CudaContextTimer<>(); auto timer = spconv::CudaContextTimer<>();
...@@ -459,7 +471,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil ...@@ -459,7 +471,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
torch::zeros({kernelVolume, indicePairMaxSize, numOutPlanes}, options); torch::zeros({kernelVolume, indicePairMaxSize, numOutPlanes}, options);
filters = filters.view({-1, numInPlanes, numOutPlanes}); filters = filters.view({-1, numInPlanes, numOutPlanes});
std::cout << "create time " << timer.report()/1000.0 << std::endl; std::cout << "create time " << timer.report()/1000.0 << std::endl;
if (subm) { // the center index of subm conv don't need gather and scatter if (subM) { // the center index of subm conv don't need gather and scatter
// add. // add.
torch::mm_out(output, features, filters[indicePairMaxOffset]); torch::mm_out(output, features, filters[indicePairMaxOffset]);
} }
...@@ -469,7 +481,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil ...@@ -469,7 +481,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
// auto timer = spconv::CudaContextTimer<>(); // auto timer = spconv::CudaContextTimer<>();
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) { if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue; continue;
} }
// //
...@@ -499,7 +511,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil ...@@ -499,7 +511,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
// totalGatherTime += timer.report() / 1000.0; // totalGatherTime += timer.report() / 1000.0;
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) { if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue; continue;
} }
auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(), auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(),
...@@ -513,7 +525,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil ...@@ -513,7 +525,7 @@ torch::Tensor indiceConvDevelopDontUse(torch::Tensor features, torch::Tensor fil
// totalGEMMTime += timer.report() / 1000.0; // totalGEMMTime += timer.report() / 1000.0;
for (int i = 0; i < kernelVolume; ++i) { for (int i = 0; i < kernelVolume; ++i) {
auto nHot = indicePairNumCpu.data<int>()[i]; auto nHot = indicePairNumCpu.data<int>()[i];
if (nHot <= 0 || (subm && i == indicePairMaxOffset)) { if (nHot <= 0 || (subM && i == indicePairMaxOffset)) {
continue; continue;
} }
auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(), auto outputBufferBlob = torch::from_blob(outputBuffer[i].data<T>(),
......
...@@ -141,6 +141,7 @@ class SparseConvolution(SparseModule): ...@@ -141,6 +141,7 @@ class SparseConvolution(SparseModule):
if self.inverse: if self.inverse:
assert datas is not None and self.indice_key is not None assert datas is not None and self.indice_key is not None
_, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas _, outids, indice_pairs, indice_pair_num, out_spatial_shape = datas
assert indice_pairs.shape[0] == np.prod(self.kernel_size), "inverse conv must have same kernel size as its couple conv"
else: else:
if self.indice_key is not None and datas is not None: if self.indice_key is not None and datas is not None:
outids, _, indice_pairs, indice_pair_num, _ = datas outids, _, indice_pairs, indice_pair_num, _ = datas
...@@ -274,12 +275,14 @@ class SparseInverseConv2d(SparseConvolution): ...@@ -274,12 +275,14 @@ class SparseInverseConv2d(SparseConvolution):
def __init__(self, def __init__(self,
in_channels, in_channels,
out_channels, out_channels,
kernel_size,
indice_key, indice_key,
bias=True): bias=True):
super(SparseInverseConv2d, self).__init__( super(SparseInverseConv2d, self).__init__(
2, 2,
in_channels, in_channels,
out_channels, out_channels,
kernel_size,
bias=bias, bias=bias,
inverse=True, inverse=True,
indice_key=indice_key) indice_key=indice_key)
...@@ -289,12 +292,14 @@ class SparseInverseConv3d(SparseConvolution): ...@@ -289,12 +292,14 @@ class SparseInverseConv3d(SparseConvolution):
def __init__(self, def __init__(self,
in_channels, in_channels,
out_channels, out_channels,
kernel_size,
indice_key, indice_key,
bias=True): bias=True):
super(SparseInverseConv3d, self).__init__( super(SparseInverseConv3d, self).__init__(
3, 3,
in_channels, in_channels,
out_channels, out_channels,
kernel_size,
bias=bias, bias=bias,
inverse=True, inverse=True,
indice_key=indice_key) indice_key=indice_key)
......
...@@ -86,7 +86,7 @@ def get_indice_pairs(indices, ...@@ -86,7 +86,7 @@ def get_indice_pairs(indices,
else: else:
raise NotImplementedError raise NotImplementedError
return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize, return get_indice_pairs_func(indices, batch_size, out_shape, spatial_shape, ksize,
stride, padding, dilation, out_padding, subm, transpose) stride, padding, dilation, out_padding, int(subm), int(transpose))
else: else:
if ndim == 2: if ndim == 2:
get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_grid_2d get_indice_pairs_func = torch.ops.spconv.get_indice_pairs_grid_2d
...@@ -95,7 +95,7 @@ def get_indice_pairs(indices, ...@@ -95,7 +95,7 @@ def get_indice_pairs(indices,
else: else:
raise NotImplementedError raise NotImplementedError
return get_indice_pairs_func(indices, grid, batch_size, out_shape, spatial_shape, ksize, return get_indice_pairs_func(indices, grid, batch_size, out_shape, spatial_shape, ksize,
stride, padding, dilation, out_padding, subm, transpose) stride, padding, dilation, out_padding, int(subm), int(transpose))
...@@ -109,11 +109,11 @@ def indice_conv(features, ...@@ -109,11 +109,11 @@ def indice_conv(features,
if filters.dtype == torch.float32: if filters.dtype == torch.float32:
return torch.ops.spconv.indice_conv_fp32(features, filters, indice_pairs, return torch.ops.spconv.indice_conv_fp32(features, filters, indice_pairs,
indice_pair_num, num_activate_out, indice_pair_num, num_activate_out,
inverse, subm) int(inverse), int(subm))
elif filters.dtype == torch.half: elif filters.dtype == torch.half:
return torch.ops.spconv.indice_conv_half(features, filters, indice_pairs, return torch.ops.spconv.indice_conv_half(features, filters, indice_pairs,
indice_pair_num, num_activate_out, indice_pair_num, num_activate_out,
inverse, subm) int(inverse), int(subm))
else: else:
raise NotImplementedError raise NotImplementedError
...@@ -127,10 +127,10 @@ def indice_conv_backward(features, ...@@ -127,10 +127,10 @@ def indice_conv_backward(features,
subm=False): subm=False):
if filters.dtype == torch.float32: if filters.dtype == torch.float32:
return torch.ops.spconv.indice_conv_backward_fp32( return torch.ops.spconv.indice_conv_backward_fp32(
features, filters, out_bp, indice_pairs, indice_pair_num, inverse, subm) features, filters, out_bp, indice_pairs, indice_pair_num, int(inverse), int(subm))
elif filters.dtype == torch.half: elif filters.dtype == torch.half:
return torch.ops.spconv.indice_conv_backward_half( return torch.ops.spconv.indice_conv_backward_half(
features, filters, out_bp, indice_pairs, indice_pair_num, inverse, subm) features, filters, out_bp, indice_pairs, indice_pair_num, int(inverse), int(subm))
else: else:
raise NotImplementedError raise NotImplementedError
......
...@@ -20,7 +20,7 @@ import numpy as np ...@@ -20,7 +20,7 @@ import numpy as np
import time import time
from spconv.test_utils import params_grid, generate_sparse_data, TestCase from spconv.test_utils import params_grid, generate_sparse_data, TestCase
import unittest import unittest
# import sparseconvnet as scn import sparseconvnet as scn
class SparseConv3dTestTorch(nn.Module): class SparseConv3dTestTorch(nn.Module):
def __init__(self, num_layers, ndim, shape, in_channels, out_channels, kernel_size, def __init__(self, num_layers, ndim, shape, in_channels, out_channels, kernel_size,
...@@ -278,11 +278,10 @@ class SparseCoupleDeConvTest(nn.Module): ...@@ -278,11 +278,10 @@ class SparseCoupleDeConvTest(nn.Module):
stride, stride,
indice_key="cp0", indice_key="cp0",
bias=False), bias=False),
spconv.SparseConvCoupleTranspose3d( spconv.SparseInverseConv3d(
out_channels, out_channels,
in_channels, in_channels,
kernel_size, kernel_size,
stride,
indice_key="cp0", indice_key="cp0",
bias=False), bias=False),
...@@ -361,7 +360,7 @@ class TestSpConv(TestCase): ...@@ -361,7 +360,7 @@ class TestSpConv(TestCase):
net_ref.net[0].weight.data[:] = filters_t.permute(4, 3, 0, 1, 2).contiguous() net_ref.net[0].weight.data[:] = filters_t.permute(4, 3, 0, 1, 2).contiguous()
net.net[0].weight.data[:] = filters_t net.net[0].weight.data[:] = filters_t
out_ref = net_ref(features_dense_t) out_ref = net_ref(features_dense_t)
out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous() out = net(features_t, indices_t, bs).dense()
dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype) dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
dout_t = torch.from_numpy(dout).to(device) dout_t = torch.from_numpy(dout).to(device)
out.backward(dout_t) out.backward(dout_t)
...@@ -420,7 +419,7 @@ class TestSpConv(TestCase): ...@@ -420,7 +419,7 @@ class TestSpConv(TestCase):
net_ref.net[0].weight.data[:] = filters_t.permute(3, 4, 0, 1, 2).contiguous() net_ref.net[0].weight.data[:] = filters_t.permute(3, 4, 0, 1, 2).contiguous()
net.net[0].weight.data[:] = filters_t net.net[0].weight.data[:] = filters_t
out_ref = net_ref(features_dense_t) out_ref = net_ref(features_dense_t)
out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous() out = net(features_t, indices_t, bs).dense()
dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype) dout = np.random.uniform(-0.2, 0.2, out_ref.shape).astype(features.dtype)
dout_t = torch.from_numpy(dout).to(device) dout_t = torch.from_numpy(dout).to(device)
out.backward(dout_t) out.backward(dout_t)
...@@ -500,7 +499,7 @@ class TestSpConv(TestCase): ...@@ -500,7 +499,7 @@ class TestSpConv(TestCase):
def testSpMaxPool3d(self): def testSpMaxPool3d(self):
np.random.seed(484) np.random.seed(485)
devices = ["cuda:0", "cpu:0"] devices = ["cuda:0", "cpu:0"]
shapes = [[19, 18, 17]] shapes = [[19, 18, 17]]
batchsizes = [1, 2] batchsizes = [1, 2]
...@@ -538,7 +537,7 @@ class TestSpConv(TestCase): ...@@ -538,7 +537,7 @@ class TestSpConv(TestCase):
out = net(features_t, indices_t, bs) out = net(features_t, indices_t, bs)
outids = out.indices outids = out.indices
outfeatures = out.features outfeatures = out.features
out_dense = out.dense() out_dense = out.dense(channels_first=False)
out = out_dense.permute(0, 4, 1, 2, 3).contiguous() out = out_dense.permute(0, 4, 1, 2, 3).contiguous()
dout_sparse = np.random.uniform(-0.2, 0.2, outfeatures.shape).astype(features.dtype) dout_sparse = np.random.uniform(-0.2, 0.2, outfeatures.shape).astype(features.dtype)
...@@ -605,10 +604,10 @@ def main(): ...@@ -605,10 +604,10 @@ def main():
out = net(features_t, indices_t, bs) out = net(features_t, indices_t, bs)
torch.cuda.synchronize() torch.cuda.synchronize()
times.append(time.time() - t) times.append(time.time() - t)
print((net.grid == -1).float().sum(), net.grid.numel()) # print((net.grid == -1).float().sum(), net.grid.numel())
# print("spconv time", time.time() - t) # print("spconv time", time.time() - t)
print("spconv time", np.mean(times[2:])) print("spconv time", np.mean(times[2:]))
out = net(features_t, indices_t, bs).dense().permute(0, 4, 1, 2, 3).contiguous() out = net(features_t, indices_t, bs).dense()
print(np.linalg.norm(out.detach().cpu().numpy() - out_ref.detach().cpu().numpy())) print(np.linalg.norm(out.detach().cpu().numpy() - out_ref.detach().cpu().numpy()))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment