Commit 20557e83 authored by traveller59's avatar traveller59
Browse files

1. add cuda kernel error check

2. update install command
parent a09b086a
......@@ -16,7 +16,9 @@ This project only support CUDA 9.0+. If you are using cuda 8.0, please update it
2. Download cmake >= 3.13.2, then add cmake executables to PATH.
3. Ensure you have install pytorch 1.0 in your environment, run ```python setup.py install```.
3. Ensure you have install pytorch 1.0 in your environment, run ```python setup.py bdist_wheel``` (don't use ```python setup.py install```).
4. Run ```cd ./dist```, use pip to install generated whl file.
## Compare with SparseConvNet
......@@ -40,7 +42,7 @@ indices = # your indices/coordinates with shape [N, ndim + 1], batch index must
spatial_shape = # spatial shape of your sparse tensor.
batch_size = # batch size of your sparse tensor.
x = spconv.SparseConvTensor(features, indices, spatial_shape, batch_size)
x_dense_NHWC = x.dense() # convert sparse tensor to dense NCHW tensor.
x_dense_NCHW = x.dense() # convert sparse tensor to dense NCHW tensor.
print(x.sparity) # helper function to check sparity.
```
......
......@@ -56,6 +56,7 @@ struct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {
d.stream()>>>(indicesIn, indicesOut, gridsOut, indicePairs,
indiceNum, indicePairUnique, kernelSize, stride,
padding, dilation, outSpatialShape);
TV_CHECK_CUDA_ERR();
// std::cout << "p1 gene time " << timer.report() / 1000.0 << std::endl;
return 1;
}
......@@ -81,14 +82,17 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
<<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
d.stream()>>>(indicesOut, gridsOut, numAct, indicePairs,
indicePairUnique, outSpatialShape, batchSize);
TV_CHECK_CUDA_ERR();
assignIndicePairsKernel<Index, IndexGrid, NDim>
<<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
d.stream()>>>(indicesOut, gridsOut, numActIn, indicePairs,
indicePairUnique, outSpatialShape);
TV_CHECK_CUDA_ERR();
if (resetGrid) {
resetGridKernel<Index, IndexGrid, NDim>
<<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
d.stream()>>>(indicePairUnique.data(), gridsOut, numAct);
TV_CHECK_CUDA_ERR();
}
return numAct;
}
......@@ -113,15 +117,18 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
prepareSubMGridKernel<Index, IndexGrid, NDim>
<<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
d.stream()>>>(indicesIn, gridsOut, outSpatialShape);
TV_CHECK_CUDA_ERR();
getSubMIndicePairsKernel<Index, IndexGrid, NDim>
<<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
d.stream()>>>(indicesIn, gridsOut, indicePairs, indiceNum,
kernelSize, stride, padding, dilation, outSpatialShape);
TV_CHECK_CUDA_ERR();
// std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl;
if (resetGrid) {
resetGridSubMKernel<Index, IndexGrid, NDim>
<<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
d.stream()>>>(indicesIn.data(), gridsOut, outSpatialShape, numActIn);
TV_CHECK_CUDA_ERR();
}
return numActIn;
}
......
......@@ -343,6 +343,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
indices.subview(0).data() + numHotBlock,
indices.subview(1).data() + numHotBlock,
size - numHotBlock, numPlanes);
TV_CHECK_CUDA_ERR();
}
notFound = false;
}
......@@ -360,6 +361,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
outFeatures.data(), inFeatures.data(),
indices.subview(0).data(), indices.subview(1).data(),
numHotBlock, numPlanes);
TV_CHECK_CUDA_ERR();
}
if (size > numHotBlock) {
......@@ -370,6 +372,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
indices.subview(0).data() + numHotBlock,
indices.subview(1).data() + numHotBlock, size - numHotBlock,
numPlanes);
TV_CHECK_CUDA_ERR();
}
}
}
......@@ -416,6 +419,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
indices.subview(0).data() + numHotBlock,
indices.subview(1).data() + numHotBlock,
size - numHotBlock, numPlanes);
TV_CHECK_CUDA_ERR();
}
notFound = false;
}
......@@ -433,6 +437,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
outFeatures.data(), inFeatures.data(), dout.data(), din.data(),
indices.subview(0).data(), indices.subview(1).data(),
numHotBlock, numPlanes);
TV_CHECK_CUDA_ERR();
}
if (size > numHotBlock) {
......@@ -443,6 +448,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
indices.subview(0).data() + numHotBlock,
indices.subview(1).data() + numHotBlock, size - numHotBlock,
numPlanes);
TV_CHECK_CUDA_ERR();
}
}
}
......
......@@ -62,6 +62,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
d.stream()>>>(buffer.data() + nHotBlock * numPlanes,
features.data(), indices.data() + nHotBlock,
size - nHotBlock, numPlanes / vecloadFactor);
TV_CHECK_CUDA_ERR();
}
notFound = false;
}
......@@ -76,6 +77,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
tv::launch::DivUp(numPlanes, NumTLP)),
dim3(NumTLP / NumILP, NumTLP), 0, d.stream()>>>(
buffer.data(), features.data(), indices.data(), size, numPlanes);
TV_CHECK_CUDA_ERR();
}
}
};
......@@ -108,6 +110,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
d.stream()>>>(outFeatures.data(), buffer.data(),
indices.data(), nHotBlock,
numPlanes / vecloadFactor);
TV_CHECK_CUDA_ERR();
}
if (size - nHotBlock > 0) {
scatterAddGenericKernel<T, Index, int(NumTLP), NumILP>
......@@ -115,6 +118,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
0, d.stream()>>>(
outFeatures.data(), buffer.data() + nHotBlock * numPlanes,
indices.data() + nHotBlock, size - nHotBlock, numPlanes);
TV_CHECK_CUDA_ERR();
}
notFound = false;
}
......@@ -129,6 +133,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
dim3(NumTLP / NumILP, NumTLP), 0, d.stream()>>>(
outFeatures.data(), buffer.data(), indices.data(), size,
numPlanes);
TV_CHECK_CUDA_ERR();
}
}
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment