// Copyright 2019 Yan Yan // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #include #include #include #include #include #include #include #include #include namespace spconv { namespace functor { template struct CreateConvIndicePairFunctorP1 { Index operator()(const tv::GPU &d, tv::TensorView indicesIn, tv::TensorView indicesOut, tv::TensorView gridsOut, tv::TensorView indicePairs, tv::TensorView indiceNum, tv::TensorView indicePairUnique, const tv::SimpleVector kernelSize, const tv::SimpleVector stride, const tv::SimpleVector padding, const tv::SimpleVector dilation, const tv::SimpleVector outSpatialShape, bool transpose) { Index batchSize = gridsOut.dim(0); auto numActIn = indicesIn.dim(0); if (numActIn == 0) return 0; // auto timer = spconv::CudaContextTimer<>(); if (transpose) prepareDeConvIndicePairsKernel <<>>(indicesIn, indicesOut, gridsOut, indicePairs, indiceNum, indicePairUnique, kernelSize, stride, padding, dilation, outSpatialShape); else prepareIndicePairsKernel <<>>(indicesIn, indicesOut, gridsOut, indicePairs, indiceNum, indicePairUnique, kernelSize, stride, padding, dilation, outSpatialShape); TV_CHECK_CUDA_ERR(); // std::cout << "p1 gene time " << timer.report() / 1000.0 << std::endl; return 1; } }; template struct CreateConvIndicePairFunctorP2 { Index operator()(const tv::GPU &d, tv::TensorView indicesIn, tv::TensorView indicesOut, tv::TensorView gridsOut, tv::TensorView indicePairs, tv::TensorView indiceNum, tv::TensorView indicePairUnique, const tv::SimpleVector outSpatialShape, bool transpose, bool resetGrid) { Index batchSize = gridsOut.dim(0); auto kernelVolume = indicePairs.dim(0); auto numActIn = indicesIn.dim(0); if (numActIn == 0) return 0; Index numAct = indicePairUnique.dim(0) - 1; assignGridAndIndiceOutKernel <<>>(indicesOut, gridsOut, numAct, indicePairs, indicePairUnique, outSpatialShape, batchSize); TV_CHECK_CUDA_ERR(); assignIndicePairsKernel <<>>(indicesOut, gridsOut, numActIn, indicePairs, indicePairUnique, outSpatialShape); TV_CHECK_CUDA_ERR(); if (resetGrid) { resetGridKernel <<>>(indicePairUnique.data(), gridsOut, numAct); TV_CHECK_CUDA_ERR(); } return numAct; } }; template struct CreateSubMIndicePairFunctor { Index operator()(const tv::GPU &d, tv::TensorView indicesIn, tv::TensorView gridsOut, tv::TensorView indicePairs, tv::TensorView indiceNum, const tv::SimpleVector kernelSize, const tv::SimpleVector stride, const tv::SimpleVector padding, const tv::SimpleVector dilation, const tv::SimpleVector outSpatialShape, bool transpose, bool resetGrid) { auto numActIn = indicesIn.dim(0); if (numActIn == 0) return 0; // auto timer = spconv::CudaContextTimer<>(); prepareSubMGridKernel <<>>(indicesIn, gridsOut, outSpatialShape); TV_CHECK_CUDA_ERR(); getSubMIndicePairsKernel <<>>(indicesIn, gridsOut, indicePairs, indiceNum, kernelSize, stride, padding, dilation, outSpatialShape); TV_CHECK_CUDA_ERR(); // std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl; if (resetGrid) { resetGridSubMKernel <<>>(indicesIn.data(), gridsOut, outSpatialShape, numActIn); TV_CHECK_CUDA_ERR(); } return numActIn; } }; } // namespace functor #define DECLARE_GPU_SPECS_INDEX_NDIM(Index, NDIM) \ template struct functor::CreateConvIndicePairFunctor; \ template struct functor::CreateConvIndicePairFunctorP1; \ template struct functor::CreateConvIndicePairFunctorP2; \ template struct functor::CreateSubMIndicePairFunctor; #define DECLARE_GPU_INDEX(Index) \ DECLARE_GPU_SPECS_INDEX_NDIM(Index, 1); \ DECLARE_GPU_SPECS_INDEX_NDIM(Index, 2); \ DECLARE_GPU_SPECS_INDEX_NDIM(Index, 3); \ DECLARE_GPU_SPECS_INDEX_NDIM(Index, 4); DECLARE_GPU_INDEX(int); #undef DECLARE_GPU_INDEX #undef DECLARE_GPU_SPECS_INDEX_NDIM } // namespace spconv