1. add cuda kernel error check

2. update install command

1. add cuda kernel error check
2. update install command
20557e83 · traveller59 · a09b086a · 20557e83 · 20557e83 · 20557e83
Commit 20557e83 authored Jan 26, 2019 by traveller59
Showing with 22 additions and 2 deletions

README.md README.md +4 -2

src/spconv/indice.cu src/spconv/indice.cu +7 -0

src/spconv/maxpool.cu src/spconv/maxpool.cu +6 -0

src/spconv/reordering.cu src/spconv/reordering.cu +5 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -16,7 +16,9 @@ This project only support CUDA 9.0+. If you are using cuda 8.0, please update it
 2. Download cmake >= 3.13.2, then add cmake executables to PATH.
-3. Ensure you have install pytorch 1.0 in your environment, run ```python setup.py install```.
+3. Ensure you have install pytorch 1.0 in your environment, run ```python setup.py bdist_wheel``` (don't use ```python setup.py install```).
+4. Run ```cd ./dist```, use pip to install generated whl file.
 ## Compare with SparseConvNet
@@ -40,7 +42,7 @@ indices = # your indices/coordinates with shape [N, ndim + 1], batch index must
 spatial_shape = # spatial shape of your sparse tensor.
 batch_size = # batch size of your sparse tensor.
 x = spconv.SparseConvTensor(features, indices, spatial_shape, batch_size)
-x_dense_NHWC = x.dense() # convert sparse tensor to dense NCHW tensor.
+x_dense_NCHW = x.dense() # convert sparse tensor to dense NCHW tensor.
 print(x.sparity) # helper function to check sparity. 
 ```

--- a/src/spconv/indice.cu
+++ b/src/spconv/indice.cu
@@ -56,6 +56,7 @@ struct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {
             d.stream()>>>(indicesIn, indicesOut, gridsOut, indicePairs,
                           indiceNum, indicePairUnique, kernelSize, stride,
                           padding, dilation, outSpatialShape);
+    TV_CHECK_CUDA_ERR();
    // std::cout << "p1 gene time " << timer.report() / 1000.0 << std::endl;
    return 1;
  }
@@ -81,14 +82,17 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
        <<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
           d.stream()>>>(indicesOut, gridsOut, numAct, indicePairs,
                         indicePairUnique, outSpatialShape, batchSize);
+    TV_CHECK_CUDA_ERR();
    assignIndicePairsKernel<Index, IndexGrid, NDim>
        <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
           d.stream()>>>(indicesOut, gridsOut, numActIn, indicePairs,
                         indicePairUnique, outSpatialShape);
+    TV_CHECK_CUDA_ERR();
    if (resetGrid) {
      resetGridKernel<Index, IndexGrid, NDim>
          <<<tv::launch::getBlocks(numAct), tv::launch::CUDA_NUM_THREADS, 0,
             d.stream()>>>(indicePairUnique.data(), gridsOut, numAct);
+      TV_CHECK_CUDA_ERR();
    }
    return numAct;
  }
@@ -113,15 +117,18 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
    prepareSubMGridKernel<Index, IndexGrid, NDim>
        <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
           d.stream()>>>(indicesIn, gridsOut, outSpatialShape);
+    TV_CHECK_CUDA_ERR();
    getSubMIndicePairsKernel<Index, IndexGrid, NDim>
        <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
           d.stream()>>>(indicesIn, gridsOut, indicePairs, indiceNum,
                         kernelSize, stride, padding, dilation, outSpatialShape);
+    TV_CHECK_CUDA_ERR();
    // std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl;
    if (resetGrid) {
      resetGridSubMKernel<Index, IndexGrid, NDim>
          <<<tv::launch::getBlocks(numActIn), tv::launch::CUDA_NUM_THREADS, 0,
             d.stream()>>>(indicesIn.data(), gridsOut, outSpatialShape, numActIn);
+      TV_CHECK_CUDA_ERR();
    }
    return numActIn;
  }

--- a/src/spconv/maxpool.cu
+++ b/src/spconv/maxpool.cu
@@ -343,6 +343,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
                                    indices.subview(0).data() + numHotBlock,
                                    indices.subview(1).data() + numHotBlock,
                                    size - numHotBlock, numPlanes);
+            TV_CHECK_CUDA_ERR();
          }
          notFound = false;
        }
@@ -360,6 +361,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
                outFeatures.data(), inFeatures.data(),
                indices.subview(0).data(), indices.subview(1).data(),
                numHotBlock, numPlanes);
+        TV_CHECK_CUDA_ERR();
      }
      if (size > numHotBlock) {
@@ -370,6 +372,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
                indices.subview(0).data() + numHotBlock,
                indices.subview(1).data() + numHotBlock, size - numHotBlock,
                numPlanes);
+        TV_CHECK_CUDA_ERR();
      }
    }
  }
@@ -416,6 +419,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
                                    indices.subview(0).data() + numHotBlock,
                                    indices.subview(1).data() + numHotBlock,
                                    size - numHotBlock, numPlanes);
+            TV_CHECK_CUDA_ERR();
          }
          notFound = false;
        }
@@ -433,6 +437,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
                outFeatures.data(), inFeatures.data(), dout.data(), din.data(),
                indices.subview(0).data(), indices.subview(1).data(),
                numHotBlock, numPlanes);
+        TV_CHECK_CUDA_ERR();
      }
      if (size > numHotBlock) {
@@ -443,6 +448,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
                indices.subview(0).data() + numHotBlock,
                indices.subview(1).data() + numHotBlock, size - numHotBlock,
                numPlanes);
+        TV_CHECK_CUDA_ERR();
      }
    }
  }

--- a/src/spconv/reordering.cu
+++ b/src/spconv/reordering.cu
@@ -62,6 +62,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
                   d.stream()>>>(buffer.data() + nHotBlock * numPlanes,
                                 features.data(), indices.data() + nHotBlock,
                                 size - nHotBlock, numPlanes / vecloadFactor);
+            TV_CHECK_CUDA_ERR();
          }
          notFound = false;
        }
@@ -76,6 +77,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
                  tv::launch::DivUp(numPlanes, NumTLP)),
             dim3(NumTLP / NumILP, NumTLP), 0, d.stream()>>>(
              buffer.data(), features.data(), indices.data(), size, numPlanes);
+      TV_CHECK_CUDA_ERR();
    }
  }
 };
@@ -108,6 +110,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
                   d.stream()>>>(outFeatures.data(), buffer.data(),
                                 indices.data(), nHotBlock,
                                 numPlanes / vecloadFactor);
+            TV_CHECK_CUDA_ERR();
          }
          if (size - nHotBlock > 0) {
            scatterAddGenericKernel<T, Index, int(NumTLP), NumILP>
@@ -115,6 +118,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
                   0, d.stream()>>>(
                    outFeatures.data(), buffer.data() + nHotBlock * numPlanes,
                    indices.data() + nHotBlock, size - nHotBlock, numPlanes);
+            TV_CHECK_CUDA_ERR();
          }
          notFound = false;
        }
@@ -129,6 +133,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
             dim3(NumTLP / NumILP, NumTLP), 0, d.stream()>>>(
              outFeatures.data(), buffer.data(), indices.data(), size,
              numPlanes);
+      TV_CHECK_CUDA_ERR();
    }
  }
 };