Unverified Commit c1de4c9b authored by Wenhao Wu's avatar Wenhao Wu Committed by GitHub
Browse files

[Feature] Add spconv ops from mmdet3d (#1581)



* add ops (spconv) of mmdet3d

* fix typo

* refactor code

* resolve comments in #1452

* fix compile error

* fix bugs

* fix bug

* transform from 'types.h' to 'extension.h'

* fix bug

* transform from 'types.h' to 'extension.h' in parrots

* add extension.h in pybind.cpp

* add unittest

* Recover code

* (1) Remove prettyprint.h
(2) Switch `T` to `scalar_t`
(3) Remove useless lines
(4) Refine example in docstring of sparse_modules.py

* (1) rename from `cu.h` to `cuh`
(2) remove useless files
(3) move cpu files to `pytorch/cpu`

* reorganize files

* Add docstring for sparse_functional.py

* use dispatcher

* remove template

* use dispatch in cuda ops

* resolve Segmentation fault

* remove useless files

* fix lint

* fix lint

* fix lint

* fix unittest in test_build_layers.py

* add tensorview into include_dirs when compiling

* recover all deleted files

* fix lint and comments

* recover setup.py

* replace tv::GPU as tv::TorchGPU & support device guard

* fix lint
Co-authored-by: default avatarhdc <hudingchang.vendor@sensetime.com>
Co-authored-by: default avatargrimoire <yaoqian@sensetime.com>
parent 33c83b5a
...@@ -35,6 +35,7 @@ We implement common CUDA ops used in detection, segmentation, etc. ...@@ -35,6 +35,7 @@ We implement common CUDA ops used in detection, segmentation, etc.
- SigmoidFocalLoss - SigmoidFocalLoss
- SoftmaxFocalLoss - SoftmaxFocalLoss
- SoftNMS - SoftNMS
- Sparse Convolution
- Synchronized BatchNorm - Synchronized BatchNorm
- Voxelization - Voxelization
- ThreeInterpolate - ThreeInterpolate
......
...@@ -34,6 +34,7 @@ MMCV 提供了检测、分割等任务中常用的 CUDA 算子 ...@@ -34,6 +34,7 @@ MMCV 提供了检测、分割等任务中常用的 CUDA 算子
- SigmoidFocalLoss - SigmoidFocalLoss
- SoftmaxFocalLoss - SoftmaxFocalLoss
- SoftNMS - SoftNMS
- Sparse Convolution
- Synchronized BatchNorm - Synchronized BatchNorm
- Voxelization - Voxelization
- ThreeInterpolate - ThreeInterpolate
......
...@@ -53,6 +53,12 @@ from .roipoint_pool3d import RoIPointPool3d ...@@ -53,6 +53,12 @@ from .roipoint_pool3d import RoIPointPool3d
from .rotated_feature_align import rotated_feature_align from .rotated_feature_align import rotated_feature_align
from .saconv import SAConv2d from .saconv import SAConv2d
from .scatter_points import DynamicScatter, dynamic_scatter from .scatter_points import DynamicScatter, dynamic_scatter
from .sparse_conv import (SparseConv2d, SparseConv3d, SparseConvTranspose2d,
SparseConvTranspose3d, SparseInverseConv2d,
SparseInverseConv3d, SubMConv2d, SubMConv3d)
from .sparse_modules import SparseModule, SparseSequential
from .sparse_pool import SparseMaxPool2d, SparseMaxPool3d
from .sparse_structure import SparseConvTensor, scatter_nd
from .sync_bn import SyncBatchNorm from .sync_bn import SyncBatchNorm
from .three_interpolate import three_interpolate from .three_interpolate import three_interpolate
from .three_nn import three_nn from .three_nn import three_nn
...@@ -84,6 +90,10 @@ __all__ = [ ...@@ -84,6 +90,10 @@ __all__ = [
'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation', 'furthest_point_sample_with_dist', 'PointsSampler', 'Correlation',
'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization', 'boxes_iou_bev', 'nms_bev', 'nms_normal_bev', 'Voxelization',
'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d', 'voxelization', 'dynamic_scatter', 'DynamicScatter', 'RoIAwarePool3d',
'SparseConv2d', 'SparseConv3d', 'SparseConvTranspose2d',
'SparseConvTranspose3d', 'SparseInverseConv2d', 'SparseInverseConv3d',
'SubMConv2d', 'SubMConv3d', 'SparseModule', 'SparseSequential',
'SparseMaxPool2d', 'SparseMaxPool3d', 'SparseConvTensor', 'scatter_nd',
'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all', 'points_in_boxes_part', 'points_in_boxes_cpu', 'points_in_boxes_all',
'points_in_polygons', 'min_area_polygons', 'active_rotated_filter', 'points_in_polygons', 'min_area_polygons', 'active_rotated_filter',
'convex_iou', 'convex_giou' 'convex_iou', 'convex_giou'
......
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef INDICE_CU_H_
#define INDICE_CU_H_
#include <utils/spconv/spconv/geometry.h>
#include <utils/spconv/tensorview/tensorview.h>
#include <utils/spconv/tensorview/helper_kernel.cuh>
template <typename Index, typename IndexGrid, unsigned NDim,
int KernelMaxVolume = 256>
__global__ void prepareIndicePairsKernel(
tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut, tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum, tv::TensorView<Index> indicePairUnique,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape) {
auto numActIn = indicesIn.dim(0);
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index kernelVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
kernelVolume *= kernelSize[i];
}
Index numValidPoints = 0;
Index validPoints[KernelMaxVolume * (NDim + 1)];
Index *pointPtr = nullptr;
auto indicePairsDim2 = indicePairs.dim(2);
Index index;
for (int ix : tv::KernelLoopX<int>(numActIn)) {
numValidPoints = getValidOutPos<Index, NDim>(
indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),
stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),
validPoints);
for (Index i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1));
indicePairs(offset, 0, oldNum) = ix;
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) +
spatialVolume * indicesIn(ix, 0);
indicePairs(offset, 1, oldNum) = index;
indicePairUnique[offset * indicePairsDim2 + oldNum] = index;
}
}
}
template <typename Index, typename IndexGrid, unsigned NDim,
int KernelMaxVolume = 256>
__global__ void prepareDeConvIndicePairsKernel(
tv::TensorView<const Index> indicesIn, tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut, tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum, tv::TensorView<Index> indicePairUnique,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape) {
auto numActIn = indicesIn.dim(0);
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index kernelVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
kernelVolume *= kernelSize[i];
}
Index numValidPoints = 0;
Index validPoints[KernelMaxVolume * (NDim + 1)];
Index *pointPtr = nullptr;
auto indicePairsDim2 = indicePairs.dim(2);
Index index;
for (int ix : tv::KernelLoopX<int>(numActIn)) {
numValidPoints = getValidOutPosTranspose<Index, NDim>(
indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),
stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),
validPoints);
for (Index i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1));
indicePairs(offset, 0, oldNum) = ix;
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) +
spatialVolume * indicesIn(ix, 0);
indicePairs(offset, 1, oldNum) = index;
indicePairUnique[offset * indicePairsDim2 + oldNum] = index;
}
}
}
template <typename Index, typename IndexGrid, unsigned NDim>
__global__ void assignGridAndIndiceOutKernel(
tv::TensorView<Index> indicesOut, tv::TensorView<IndexGrid> gridsOut,
int numAct, tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indicePairUnique,
const tv::SimpleVector<Index, NDim> outSpatialShape, int batchSize) {
Index index;
auto indicesOutPtr = indicesOut.data();
for (int ix : tv::KernelLoopX<int>(numAct)) {
index = indicePairUnique[ix];
gridsOut[index] = ix;
index = tv::rowArrayIdxInv<Index, NDim>(
index, indicesOutPtr + ix * (NDim + 1) + 1, outSpatialShape.data());
indicesOut[ix * (NDim + 1)] = index % batchSize;
}
}
template <typename Index, typename IndexGrid, unsigned NDim>
__global__ void assignIndicePairsKernel(
tv::TensorView<Index> indicesOut, tv::TensorView<IndexGrid> gridsOut,
int numActIn, tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indicePairUnique,
const tv::SimpleVector<Index, NDim> outSpatialShape) {
Index index;
int kernelVolume = indicePairs.dim(0);
for (int ix : tv::KernelLoopX<int>(numActIn)) {
for (int i = 0; i < kernelVolume; ++i) {
index = indicePairs(i, 1, ix);
if (index > -1) {
indicePairs(i, 1, ix) = gridsOut[index];
}
}
}
}
template <typename Index, typename IndexGrid, unsigned NDim>
__global__ void prepareSubMGridKernel(
tv::TensorView<const Index> indicesIn, tv::TensorView<IndexGrid> gridsOut,
const tv::SimpleVector<Index, NDim> outSpatialShape) {
auto numActIn = indicesIn.dim(0);
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index index = 0;
for (int ix : tv::KernelLoopX<int>(numActIn)) {
index = tv::rowArrayIdx<Index, NDim>(indicesIn.data() + ix * (NDim + 1) + 1,
outSpatialShape.data()) +
spatialVolume * indicesIn(ix, 0);
gridsOut[index] = ix;
}
}
template <typename Index, typename IndexGrid, unsigned NDim,
int KernelMaxVolume = 256>
__global__ void getSubMIndicePairsKernel(
tv::TensorView<const Index> indicesIn, tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs, tv::TensorView<Index> indiceNum,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape) {
auto numActIn = indicesIn.dim(0);
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index numValidPoints = 0;
Index validPoints[KernelMaxVolume * (NDim + 1)];
Index *pointPtr = nullptr;
Index index = 0;
for (int ix : tv::KernelLoopX<int>(numActIn)) {
numValidPoints = getValidOutPos<Index, NDim>(
indicesIn.data() + ix * (NDim + 1) + 1, kernelSize.data(),
stride.data(), padding.data(), dilation.data(), outSpatialShape.data(),
validPoints);
for (int i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape.data()) +
spatialVolume * indicesIn(ix, 0);
if (gridsOut[index] > -1) {
auto oldNum = atomicAdd(indiceNum.data() + offset, Index(1));
indicePairs(offset, 1, oldNum) = gridsOut[index];
indicePairs(offset, 0, oldNum) = ix;
}
}
}
}
template <typename Index, typename IndexGrid, unsigned NDim>
__global__ void resetGridKernel(const Index *indicePairUnique,
tv::TensorView<IndexGrid> gridsOut,
int numAct) {
for (int ix : tv::KernelLoopX<int>(numAct)) {
gridsOut[indicePairUnique[ix]] = -1;
}
}
template <typename Index, typename IndexGrid, unsigned NDim>
__global__ void resetGridSubMKernel(
const Index *indices, tv::TensorView<IndexGrid> gridsOut,
const tv::SimpleVector<Index, NDim> outSpatialShape, int numAct) {
int outSpatialShapeReg[NDim];
for (int i = 0; i < NDim; ++i) {
outSpatialShapeReg[i] = outSpatialShape[i];
}
Index spatialVolume = 1;
auto indsPtr = indices;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index index;
for (int ix : tv::KernelLoopX<int>(numAct)) {
indsPtr = indices + ix * (NDim + 1);
index = tv::rowArrayIdx<Index, NDim>(indsPtr + 1, outSpatialShapeReg);
gridsOut[index + spatialVolume * indsPtr[0]] = -1;
}
}
#endif
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef REORDERING_CU_H_
#define REORDERING_CU_H_
#include <utils/spconv/tensorview/helper_kernel.cuh>
template <typename scalar_t, typename Index, int NumTLP, int NumILP>
__global__ void gatherGenericKernel(scalar_t *buffer, const scalar_t *features,
const Index *indices, int size,
int numPlanes) {
int ILPStrideX[NumILP];
Index inds[NumILP];
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++)
ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;
for (int ix : tv::KernelLoopX<int, NumILP>(size)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++) {
if (ix + ILPStrideX[ilp] < size)
inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes;
}
for (int iy : tv::KernelLoopY<int>(numPlanes)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ++ilp) {
if (ix + ILPStrideX[ilp] < size)
buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy] =
features[inds[ilp] + iy];
}
}
}
}
template <typename scalar_t, typename Index, int NumTLP, int NumILP,
typename VecType>
__global__ void gatherVecKernel(scalar_t *buffer, const scalar_t *features,
const Index *indices, int size, int numPlanes) {
int ILPStrideX[NumILP];
Index inds[NumILP];
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++)
ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;
for (int ix : tv::KernelLoopX<int, NumILP>(size)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++) {
if (ix + ILPStrideX[ilp] < size)
inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes;
}
for (int iy : tv::KernelLoopY<int>(numPlanes)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ++ilp) {
if (ix + ILPStrideX[ilp] < size)
reinterpret_cast<VecType *>(
buffer)[(ix + ILPStrideX[ilp]) * numPlanes + iy] =
reinterpret_cast<const VecType *>(features)[inds[ilp] + iy];
}
}
}
}
template <typename scalar_t, typename Index, int NumTLP, int NumILP,
typename VecType = int4>
__global__ void gatherVecBlockKernel(scalar_t *buffer, const scalar_t *features,
const Index *indices, int size,
int numPlanes) {
int ILPStrideY[NumILP];
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++)
ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y;
features += blockIdx.x * NumTLP;
buffer += blockIdx.x * NumTLP;
for (int iy : tv::KernelLoopY<int, NumILP>(size)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ++ilp) {
reinterpret_cast<VecType *>(
buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x] =
reinterpret_cast<const VecType *>(
features)[indices[iy + ILPStrideY[ilp]] * numPlanes +
threadIdx.x];
}
}
}
template <typename scalar_t, typename Index, int NumTLP, int NumILP>
__global__ void scatterAddGenericKernel(scalar_t *outFeatures,
const scalar_t *buffer,
const Index *indices, int size,
int numPlanes) {
int ILPStrideX[NumILP];
Index inds[NumILP];
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++)
ILPStrideX[ilp] = ilp * gridDim.x * blockDim.x;
for (int ix : tv::KernelLoopX<int, NumILP>(size)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++) {
if (ix + ILPStrideX[ilp] < size)
inds[ilp] = indices[ix + ILPStrideX[ilp]] * numPlanes;
}
for (int iy : tv::KernelLoopY<int>(numPlanes)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ++ilp) {
if (ix + ILPStrideX[ilp] < size) {
outFeatures[inds[ilp] + iy] +=
buffer[(ix + ILPStrideX[ilp]) * numPlanes + iy];
}
}
}
}
}
template <typename scalar_t, typename Index, int NumTLP, int NumILP,
typename VecType = int4>
__global__ void scatterAddVecBlockKernel(scalar_t *outFeatures,
const scalar_t *buffer,
const Index *indices, int size,
int numPlanes) {
int ILPStrideY[NumILP];
constexpr int vecloadFactor = sizeof(VecType) / sizeof(scalar_t);
#pragma unroll
for (int ilp = 0; ilp < NumILP; ilp++)
ILPStrideY[ilp] = ilp * gridDim.y * blockDim.y;
outFeatures += blockIdx.x * NumTLP;
buffer += blockIdx.x * NumTLP;
scalar_t buf[vecloadFactor];
scalar_t buf2[vecloadFactor];
Index idx;
for (int iy : tv::KernelLoopY<int, NumILP>(size)) {
#pragma unroll
for (int ilp = 0; ilp < NumILP; ++ilp) {
idx = indices[iy + ILPStrideY[ilp]] * numPlanes + threadIdx.x;
reinterpret_cast<VecType *>(buf)[0] =
reinterpret_cast<VecType *>(outFeatures)[idx];
reinterpret_cast<VecType *>(buf2)[0] = reinterpret_cast<const VecType *>(
buffer)[(iy + ILPStrideY[ilp]) * numPlanes + threadIdx.x];
#pragma unroll
for (int i = 0; i < vecloadFactor; i++) {
buf[i] += buf2[i];
}
reinterpret_cast<VecType *>(outFeatures)[idx] =
reinterpret_cast<VecType *>(buf)[0];
}
}
}
#endif
#ifndef PYTORCH_CPP_HELPER #ifndef PYTORCH_CPP_HELPER
#define PYTORCH_CPP_HELPER #define PYTORCH_CPP_HELPER
#include <torch/extension.h> #include <torch/types.h>
#include <vector> #include <vector>
......
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef PARAMS_GRID_H_
#define PARAMS_GRID_H_
#include <tuple>
#include <vector>
namespace detail {
template <class scalar_t>
int getTotalSize(std::vector<scalar_t> arg) {
return arg.size();
}
template <class scalar_t, class... TArgs>
int getTotalSize(std::vector<scalar_t> arg, std::vector<TArgs>... args) {
return arg.size() * getTotalSize(args...);
}
template <typename scalar_t>
int getSize(std::vector<scalar_t> arg) {
return arg.size();
}
template <int Idx, class TT, class scalar_t>
void assigner(TT &src, std::vector<int> counter, std::vector<scalar_t> &arg) {
std::get<Idx>(src) = arg[counter[Idx]];
}
template <int Idx, class TT, class scalar_t, class... TArgs>
void assigner(TT &src, std::vector<int> counter, std::vector<scalar_t> &arg,
std::vector<TArgs> &... args) {
std::get<Idx>(src) = arg[counter[Idx]];
assigner<Idx + 1>(src, counter, args...);
}
} // namespace detail
template <class... TArgs>
std::vector<std::tuple<TArgs...>> paramsGrid(std::vector<TArgs>... args) {
int length = detail::getTotalSize(args...);
std::vector<int> sizes = {detail::getSize(args)...};
int size = sizes.size();
std::vector<std::tuple<TArgs...>> params(length);
std::vector<int> counter(size);
for (int i = 0; i < length; ++i) {
detail::assigner<0>(params[i], counter, args...);
counter[size - 1] += 1;
for (int c = size - 1; c >= 0; --c) {
if (counter[c] == sizes[c] && c > 0) {
counter[c - 1] += 1;
counter[c] = 0;
}
}
}
return params;
}
#endif
// Copyright Louis Delacroix 2010 - 2014.
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
//
// A pretty printing library for C++
//
// Usage:
// Include this header, and operator<< will "just work".
#ifndef H_PRETTY_PRINT
#define H_PRETTY_PRINT
#include <cstddef>
#include <iterator>
#include <memory>
#include <ostream>
#include <set>
#include <tuple>
#include <type_traits>
#include <unordered_set>
#include <utility>
#include <valarray>
namespace pretty_print {
namespace detail {
// SFINAE type trait to detect whether T::const_iterator exists.
struct sfinae_base {
using yes = char;
using no = yes[2];
};
template <typename T>
struct has_const_iterator : private sfinae_base {
private:
template <typename C>
static yes &test(typename C::const_iterator *);
template <typename C>
static no &test(...);
public:
static const bool value = sizeof(test<T>(nullptr)) == sizeof(yes);
using type = T;
};
template <typename T>
struct has_begin_end : private sfinae_base {
private:
template <typename C>
static yes &
f(typename std::enable_if<
std::is_same<decltype(static_cast<typename C::const_iterator (C::*)()
const>(&C::begin)),
typename C::const_iterator (C::*)() const>::value>::type *);
template <typename C>
static no &f(...);
template <typename C>
static yes &g(typename std::enable_if<
std::is_same<decltype(static_cast<typename C::const_iterator (
C::*)() const>(&C::end)),
typename C::const_iterator (C::*)() const>::value,
void>::type *);
template <typename C>
static no &g(...);
public:
static bool const beg_value = sizeof(f<T>(nullptr)) == sizeof(yes);
static bool const end_value = sizeof(g<T>(nullptr)) == sizeof(yes);
};
} // namespace detail
// Holds the delimiter values for a specific character type
template <typename TChar>
struct delimiters_values {
using char_type = TChar;
const char_type *prefix;
const char_type *delimiter;
const char_type *postfix;
};
// Defines the delimiter values for a specific container and character type
template <typename T, typename TChar>
struct delimiters {
using type = delimiters_values<TChar>;
static const type values;
};
// Functor to print containers. You can use this directly if you want
// to specify a non-default delimiters type. The printing logic can
// be customized by specializing the nested template.
template <typename T, typename TChar = char,
typename TCharTraits = ::std::char_traits<TChar>,
typename TDelimiters = delimiters<T, TChar>>
struct print_container_helper {
using delimiters_type = TDelimiters;
using ostream_type = std::basic_ostream<TChar, TCharTraits>;
template <typename U>
struct printer {
static void print_body(const U &c, ostream_type &stream) {
using std::begin;
using std::end;
auto it = begin(c);
const auto the_end = end(c);
if (it != the_end) {
for (;;) {
stream << *it;
if (++it == the_end) break;
if (delimiters_type::values.delimiter != NULL)
stream << delimiters_type::values.delimiter;
}
}
}
};
print_container_helper(const T &container) : container_(container) {}
inline void operator()(ostream_type &stream) const {
if (delimiters_type::values.prefix != NULL)
stream << delimiters_type::values.prefix;
printer<T>::print_body(container_, stream);
if (delimiters_type::values.postfix != NULL)
stream << delimiters_type::values.postfix;
}
private:
const T &container_;
};
// Specialization for pairs
template <typename T, typename TChar, typename TCharTraits,
typename TDelimiters>
template <typename T1, typename T2>
struct print_container_helper<T, TChar, TCharTraits,
TDelimiters>::printer<std::pair<T1, T2>> {
using ostream_type =
typename print_container_helper<T, TChar, TCharTraits,
TDelimiters>::ostream_type;
static void print_body(const std::pair<T1, T2> &c, ostream_type &stream) {
stream << c.first;
if (print_container_helper<T, TChar, TCharTraits,
TDelimiters>::delimiters_type::values
.delimiter != NULL)
stream << print_container_helper<T, TChar, TCharTraits,
TDelimiters>::delimiters_type::values
.delimiter;
stream << c.second;
}
};
// Specialization for tuples
template <typename T, typename TChar, typename TCharTraits,
typename TDelimiters>
template <typename... Args>
struct print_container_helper<T, TChar, TCharTraits,
TDelimiters>::printer<std::tuple<Args...>> {
using ostream_type =
typename print_container_helper<T, TChar, TCharTraits,
TDelimiters>::ostream_type;
using element_type = std::tuple<Args...>;
template <std::size_t I>
struct Int {};
static void print_body(const element_type &c, ostream_type &stream) {
tuple_print(c, stream, Int<0>());
}
static void tuple_print(const element_type &, ostream_type &,
Int<sizeof...(Args)>) {}
static void tuple_print(
const element_type &c, ostream_type &stream,
typename std::conditional<sizeof...(Args) != 0, Int<0>,
std::nullptr_t>::type) {
stream << std::get<0>(c);
tuple_print(c, stream, Int<1>());
}
template <std::size_t N>
static void tuple_print(const element_type &c, ostream_type &stream, Int<N>) {
if (print_container_helper<T, TChar, TCharTraits,
TDelimiters>::delimiters_type::values
.delimiter != NULL)
stream << print_container_helper<T, TChar, TCharTraits,
TDelimiters>::delimiters_type::values
.delimiter;
stream << std::get<N>(c);
tuple_print(c, stream, Int<N + 1>());
}
};
// Prints a print_container_helper to the specified stream.
template <typename T, typename TChar, typename TCharTraits,
typename TDelimiters>
inline std::basic_ostream<TChar, TCharTraits> &operator<<(
std::basic_ostream<TChar, TCharTraits> &stream,
const print_container_helper<T, TChar, TCharTraits, TDelimiters> &helper) {
helper(stream);
return stream;
}
// Basic is_container template; specialize to derive from std::true_type for all
// desired container types
template <typename T>
struct is_container
: public std::integral_constant<bool,
detail::has_const_iterator<T>::value &&
detail::has_begin_end<T>::beg_value &&
detail::has_begin_end<T>::end_value> {};
template <typename T, std::size_t N>
struct is_container<T[N]> : std::true_type {};
template <std::size_t N>
struct is_container<char[N]> : std::false_type {};
template <typename T>
struct is_container<std::valarray<T>> : std::true_type {};
template <typename T1, typename T2>
struct is_container<std::pair<T1, T2>> : std::true_type {};
template <typename... Args>
struct is_container<std::tuple<Args...>> : std::true_type {};
// Default delimiters
template <typename T>
struct delimiters<T, char> {
static const delimiters_values<char> values;
};
template <typename T>
const delimiters_values<char> delimiters<T, char>::values = {"[", ", ", "]"};
template <typename T>
struct delimiters<T, wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename T>
const delimiters_values<wchar_t> delimiters<T, wchar_t>::values = {L"[", L", ",
L"]"};
// Delimiters for (multi)set and unordered_(multi)set
template <typename T, typename TComp, typename TAllocator>
struct delimiters<::std::set<T, TComp, TAllocator>, char> {
static const delimiters_values<char> values;
};
template <typename T, typename TComp, typename TAllocator>
const delimiters_values<char>
delimiters<::std::set<T, TComp, TAllocator>, char>::values = {"{", ", ",
"}"};
template <typename T, typename TComp, typename TAllocator>
struct delimiters<::std::set<T, TComp, TAllocator>, wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename T, typename TComp, typename TAllocator>
const delimiters_values<wchar_t>
delimiters<::std::set<T, TComp, TAllocator>, wchar_t>::values = {
L"{", L", ", L"}"};
template <typename T, typename TComp, typename TAllocator>
struct delimiters<::std::multiset<T, TComp, TAllocator>, char> {
static const delimiters_values<char> values;
};
template <typename T, typename TComp, typename TAllocator>
const delimiters_values<char>
delimiters<::std::multiset<T, TComp, TAllocator>, char>::values = {
"{", ", ", "}"};
template <typename T, typename TComp, typename TAllocator>
struct delimiters<::std::multiset<T, TComp, TAllocator>, wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename T, typename TComp, typename TAllocator>
const delimiters_values<wchar_t>
delimiters<::std::multiset<T, TComp, TAllocator>, wchar_t>::values = {
L"{", L", ", L"}"};
template <typename T, typename THash, typename TEqual, typename TAllocator>
struct delimiters<::std::unordered_set<T, THash, TEqual, TAllocator>, char> {
static const delimiters_values<char> values;
};
template <typename T, typename THash, typename TEqual, typename TAllocator>
const delimiters_values<char> delimiters<
::std::unordered_set<T, THash, TEqual, TAllocator>, char>::values = {
"{", ", ", "}"};
template <typename T, typename THash, typename TEqual, typename TAllocator>
struct delimiters<::std::unordered_set<T, THash, TEqual, TAllocator>, wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename T, typename THash, typename TEqual, typename TAllocator>
const delimiters_values<wchar_t> delimiters<
::std::unordered_set<T, THash, TEqual, TAllocator>, wchar_t>::values = {
L"{", L", ", L"}"};
template <typename T, typename THash, typename TEqual, typename TAllocator>
struct delimiters<::std::unordered_multiset<T, THash, TEqual, TAllocator>,
char> {
static const delimiters_values<char> values;
};
template <typename T, typename THash, typename TEqual, typename TAllocator>
const delimiters_values<char> delimiters<
::std::unordered_multiset<T, THash, TEqual, TAllocator>, char>::values = {
"{", ", ", "}"};
template <typename T, typename THash, typename TEqual, typename TAllocator>
struct delimiters<::std::unordered_multiset<T, THash, TEqual, TAllocator>,
wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename T, typename THash, typename TEqual, typename TAllocator>
const delimiters_values<wchar_t>
delimiters<::std::unordered_multiset<T, THash, TEqual, TAllocator>,
wchar_t>::values = {L"{", L", ", L"}"};
// Delimiters for pair and tuple
template <typename T1, typename T2>
struct delimiters<std::pair<T1, T2>, char> {
static const delimiters_values<char> values;
};
template <typename T1, typename T2>
const delimiters_values<char> delimiters<std::pair<T1, T2>, char>::values = {
"(", ", ", ")"};
template <typename T1, typename T2>
struct delimiters<::std::pair<T1, T2>, wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename T1, typename T2>
const delimiters_values<wchar_t>
delimiters<::std::pair<T1, T2>, wchar_t>::values = {L"(", L", ", L")"};
template <typename... Args>
struct delimiters<std::tuple<Args...>, char> {
static const delimiters_values<char> values;
};
template <typename... Args>
const delimiters_values<char> delimiters<std::tuple<Args...>, char>::values = {
"(", ", ", ")"};
template <typename... Args>
struct delimiters<::std::tuple<Args...>, wchar_t> {
static const delimiters_values<wchar_t> values;
};
template <typename... Args>
const delimiters_values<wchar_t>
delimiters<::std::tuple<Args...>, wchar_t>::values = {L"(", L", ", L")"};
// Type-erasing helper class for easy use of custom delimiters.
// Requires TCharTraits = std::char_traits<TChar> and TChar = char or wchar_t,
// and MyDelims needs to be defined for TChar. Usage: "cout <<
// pretty_print::custom_delims<MyDelims>(x)".
struct custom_delims_base {
virtual ~custom_delims_base() {}
virtual std::ostream &stream(::std::ostream &) = 0;
virtual std::wostream &stream(::std::wostream &) = 0;
};
template <typename T, typename Delims>
struct custom_delims_wrapper : custom_delims_base {
custom_delims_wrapper(const T &t_) : t(t_) {}
std::ostream &stream(std::ostream &s) {
return s << print_container_helper<T, char, std::char_traits<char>, Delims>(
t);
}
std::wostream &stream(std::wostream &s) {
return s << print_container_helper<T, wchar_t, std::char_traits<wchar_t>,
Delims>(t);
}
private:
const T &t;
};
template <typename Delims>
struct custom_delims {
template <typename Container>
custom_delims(const Container &c)
: base(new custom_delims_wrapper<Container, Delims>(c)) {}
std::unique_ptr<custom_delims_base> base;
};
template <typename TChar, typename TCharTraits, typename Delims>
inline std::basic_ostream<TChar, TCharTraits> &operator<<(
std::basic_ostream<TChar, TCharTraits> &s, const custom_delims<Delims> &p) {
return p.base->stream(s);
}
// A wrapper for a C-style array given as pointer-plus-size.
// Usage: std::cout << pretty_print_array(arr, n) << std::endl;
template <typename T>
struct array_wrapper_n {
typedef const T *const_iterator;
typedef T value_type;
array_wrapper_n(const T *const a, size_t n) : _array(a), _n(n) {}
inline const_iterator begin() const { return _array; }
inline const_iterator end() const { return _array + _n; }
private:
const T *const _array;
size_t _n;
};
// A wrapper for hash-table based containers that offer local iterators to each
// bucket. Usage: std::cout << bucket_print(m, 4) << std::endl; (Prints bucket
// 5 of container m.)
template <typename T>
struct bucket_print_wrapper {
typedef typename T::const_local_iterator const_iterator;
typedef typename T::size_type size_type;
const_iterator begin() const { return m_map.cbegin(n); }
const_iterator end() const { return m_map.cend(n); }
bucket_print_wrapper(const T &m, size_type bucket) : m_map(m), n(bucket) {}
private:
const T &m_map;
const size_type n;
};
} // namespace pretty_print
// Global accessor functions for the convenience wrappers
template <typename T>
inline pretty_print::array_wrapper_n<T> pretty_print_array(const T *const a,
size_t n) {
return pretty_print::array_wrapper_n<T>(a, n);
}
template <typename T>
pretty_print::bucket_print_wrapper<T> bucket_print(const T &m,
typename T::size_type n) {
return pretty_print::bucket_print_wrapper<T>(m, n);
}
// Main magic entry point: An overload snuck into namespace std.
// Can we do better?
namespace std {
// Prints a container to the stream using default delimiters
template <typename T, typename TChar, typename TCharTraits>
inline typename enable_if<::pretty_print::is_container<T>::value,
basic_ostream<TChar, TCharTraits> &>::type
operator<<(basic_ostream<TChar, TCharTraits> &stream, const T &container) {
return stream
<< ::pretty_print::print_container_helper<T, TChar, TCharTraits>(
container);
}
} // namespace std
#endif // H_PRETTY_PRINT
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <pybind11/embed.h>
#include <pybind11/functional.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <spconv/tensorview/tensorview.h>
#include <algorithm>
#include <iostream>
namespace py = pybind11;
template <typename scalar_t, typename TPyObject>
std::vector<scalar_t> array2Vector(TPyObject arr) {
py::array arr_np = arr;
size_t size = arr.attr("size").template cast<size_t>();
py::array_t<scalar_t> arr_cc = arr_np;
std::vector<scalar_t> data(arr_cc.data(), arr_cc.data() + size);
return data;
}
template <typename scalar_t>
std::vector<scalar_t> arrayT2Vector(py::array_t<scalar_t> arr) {
std::vector<scalar_t> data(arr.data(), arr.data() + arr.size());
return data;
}
template <typename scalar_t, typename TPyObject>
tv::TensorView<scalar_t> array2TensorView(TPyObject arr) {
py::array arr_np = arr;
py::array_t<scalar_t> arr_cc = arr_np;
tv::Shape shape;
for (int i = 0; i < arr_cc.ndim(); ++i) {
shape.push_back(arr_cc.shape(i));
}
return tv::TensorView<scalar_t>(arr_cc.mutable_data(), shape);
}
template <typename scalar_t>
tv::TensorView<scalar_t> arrayT2TensorView(py::array_t<scalar_t> arr) {
tv::Shape shape;
for (int i = 0; i < arr.ndim(); ++i) {
shape.push_back(arr.shape(i));
}
return tv::TensorView<scalar_t>(arr.mutable_data(), shape);
}
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPCONV_GEOMETRY_H_
#define SPCONV_GEOMETRY_H_
#include <utils/spconv/tensorview/tensorview.h>
#include <iostream>
#include <limits>
template <typename Index, unsigned NDim>
TV_HOST_DEVICE Index getValidOutPos(const Index *input_pos,
const Index *kernelSize,
const Index *stride, const Index *padding,
const Index *dilation,
const Index *outSpatialShape, Index *out) {
Index lowers[NDim];
Index uppers[NDim];
Index counter[NDim];
Index counterSize[NDim];
Index pointCounter = 0;
Index val;
Index numPoints = 1;
Index m, offset;
bool valid = false;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
lowers[i] = (input_pos[i] - (kernelSize[i] - 1) * dilation[i] - 1 +
stride[i] + padding[i]) /
stride[i];
uppers[i] = (input_pos[i] + padding[i]) / stride[i];
}
#pragma unroll
for (unsigned i = 0; i < NDim; ++i) {
counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);
numPoints *= counterSize[i];
}
#pragma unroll
for (int i = 0; i < NDim; ++i) {
counter[i] = 0;
}
for (int i = 0; i < numPoints; ++i) {
valid = true;
m = 1;
offset = 0;
#pragma unroll
for (int j = NDim - 1; j >= 0; --j) {
val = uppers[j] - counter[j] * dilation[j];
out[pointCounter * (NDim + 1) + j] = val;
if (val < 0 || (val > outSpatialShape[j] - 1)) {
valid = false;
// break;
}
offset += m * (input_pos[j] - val * stride[j] + padding[j]) / dilation[j];
m *= kernelSize[j];
}
out[pointCounter * (NDim + 1) + NDim] = offset;
if (valid) ++pointCounter;
counter[NDim - 1] += 1;
#pragma unroll
for (int c = NDim - 1; c >= 0; --c) {
if (counter[c] == counterSize[c] && c > 0) {
counter[c - 1] += 1;
counter[c] = 0;
}
}
}
return pointCounter;
}
template <typename Index, unsigned NDim>
TV_HOST_DEVICE Index getValidOutPosTranspose(
const Index *input_pos, const Index *kernelSize, const Index *stride,
const Index *padding, const Index *dilation, const Index *outSpatialShape,
Index *out) {
Index lowers[NDim];
Index uppers[NDim];
Index counter[NDim];
Index counterSize[NDim];
Index pointCounter = 0;
Index val;
Index numPoints = 1;
Index m, offset;
bool valid = false;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
lowers[i] = input_pos[i] * stride[i] - padding[i];
uppers[i] = lowers[i] + (kernelSize[i] - 1) * dilation[i];
}
#pragma unroll
for (unsigned i = 0; i < NDim; ++i) {
counterSize[i] = ((uppers[i] - lowers[i]) / dilation[i] + 1);
numPoints *= counterSize[i];
}
#pragma unroll
for (int i = 0; i < NDim; ++i) {
counter[i] = 0;
}
for (int i = 0; i < numPoints; ++i) {
valid = true;
m = 1;
offset = 0;
#pragma unroll
for (int j = NDim - 1; j >= 0; --j) {
val = uppers[j] - counter[j] * dilation[j];
out[pointCounter * (NDim + 1) + j] = val;
if (val < 0 || (val > outSpatialShape[j] - 1)) {
valid = false;
}
offset += m * (val - lowers[j]) / dilation[j];
m *= kernelSize[j];
}
out[pointCounter * (NDim + 1) + NDim] = offset;
if (valid) ++pointCounter;
counter[NDim - 1] += 1;
#pragma unroll
for (int c = NDim - 1; c >= 0; --c) {
if (counter[c] == counterSize[c] && c > 0) {
counter[c - 1] += 1;
counter[c] = 0;
}
}
}
return pointCounter;
}
template <typename Index, typename IndexGrid, unsigned NDim>
Index getIndicePairsConv(tv::TensorView<const Index> indicesIn,
tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const Index *kernelSize, const Index *stride,
const Index *padding, const Index *dilation,
const Index *outSpatialShape) {
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index numAct = 0;
auto numActIn = indicesIn.dim(0);
Index batchIdx = 0;
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index kernelVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
kernelVolume *= kernelSize[i];
}
Index numValidPoints = 0;
std::vector<Index> validPoints_(kernelVolume * (NDim + 1));
Index *validPoints = validPoints_.data();
Index *pointPtr = nullptr;
for (int j = 0; j < numActIn; ++j) {
batchIdx = indicesIn(j, 0);
numValidPoints = getValidOutPos<Index, NDim>(
indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,
dilation, outSpatialShape, validPoints);
for (Index i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
auto index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +
spatialVolume * batchIdx;
if (gridsOut[index] == -1) {
for (unsigned k = 1; k < NDim + 1; ++k) {
indicesOut(numAct, k) = pointPtr[k - 1];
}
indicesOut(numAct, 0) = batchIdx;
gridsOut[index] = numAct++;
}
// indicePairs: [K, 2, L]
indicePairs(offset, 0, indiceNum[offset]) = j;
indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];
}
}
return numAct;
}
template <typename Index, typename IndexGrid, unsigned NDim>
Index getIndicePairsDeConv(tv::TensorView<const Index> indicesIn,
tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const Index *kernelSize, const Index *stride,
const Index *padding, const Index *dilation,
const Index *outSpatialShape) {
Index numAct = 0;
auto numActIn = indicesIn.dim(0);
Index batchIdx = 0;
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index kernelVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
kernelVolume *= kernelSize[i];
}
Index numValidPoints = 0;
std::vector<Index> validPoints_(kernelVolume * (NDim + 1));
Index *validPoints = validPoints_.data();
Index *pointPtr = nullptr;
for (int j = 0; j < numActIn; ++j) {
batchIdx = indicesIn(j, 0);
numValidPoints = getValidOutPosTranspose<Index, NDim>(
indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,
dilation, outSpatialShape, validPoints);
for (Index i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
auto index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +
spatialVolume * batchIdx;
if (gridsOut[index] == -1) {
for (unsigned k = 1; k < NDim + 1; ++k) {
indicesOut(numAct, k) = pointPtr[k - 1];
}
indicesOut(numAct, 0) = batchIdx;
gridsOut[index] = numAct++;
}
// indicePairs: [K, 2, L]
indicePairs(offset, 0, indiceNum[offset]) = j;
indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];
}
}
return numAct;
}
template <typename Index, typename IndexGrid, unsigned NDim>
Index getIndicePairsSubM(tv::TensorView<const Index> indicesIn,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const Index *const kernelSize,
const Index *const stride, const Index *const padding,
const Index *dilation,
const Index *const outSpatialShape) {
Index numAct = 0;
auto numActIn = indicesIn.dim(0);
Index batchIdx = 0;
Index spatialVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
spatialVolume *= outSpatialShape[i];
}
Index kernelVolume = 1;
#pragma unroll
for (int i = 0; i < NDim; ++i) {
kernelVolume *= kernelSize[i];
}
Index numValidPoints = 0;
// Index validPoints[kernelVolume * (NDim + 1)];
std::vector<Index> validPoints_(kernelVolume * (NDim + 1));
Index *validPoints = validPoints_.data();
Index *pointPtr = nullptr;
Index index = 0;
for (int j = 0; j < numActIn; ++j) {
index = tv::rowArrayIdx<Index, NDim>(indicesIn.data() + j * (NDim + 1) + 1,
outSpatialShape) +
spatialVolume * indicesIn(j, 0);
gridsOut[index] = j;
}
for (int j = 0; j < numActIn; ++j) {
numValidPoints = getValidOutPos<Index, NDim>(
indicesIn.data() + j * (NDim + 1) + 1, kernelSize, stride, padding,
dilation, outSpatialShape, validPoints);
for (Index i = 0; i < numValidPoints; ++i) {
pointPtr = validPoints + i * (NDim + 1);
auto offset = pointPtr[NDim];
index = tv::rowArrayIdx<Index, NDim>(pointPtr, outSpatialShape) +
spatialVolume * indicesIn(j, 0);
if (gridsOut[index] > -1) {
indicePairs(offset, 0, indiceNum[offset]) = j;
indicePairs(offset, 1, indiceNum[offset]++) = gridsOut[index];
}
}
}
return numActIn;
}
#endif
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_CONV_INDICE_FUNCTOR_H_
#define SPARSE_CONV_INDICE_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace functor {
template <typename Device, typename Index, typename IndexGrid, unsigned NDim>
struct CreateConvIndicePairFunctorP1 {
Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,
tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
tv::TensorView<Index> indicePairUnique,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape,
bool transpose);
};
template <typename Device, typename Index, typename IndexGrid, unsigned NDim>
struct CreateConvIndicePairFunctorP2 {
Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,
tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
tv::TensorView<Index> indicePairUnique,
const tv::SimpleVector<Index, NDim> outSpatialShape,
bool transpose, bool resetGrid = false);
};
template <typename Device, typename Index, typename IndexGrid, unsigned NDim>
struct CreateConvIndicePairFunctor {
Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,
tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape,
bool transpose, bool resetGrid = false);
};
template <typename Device, typename Index, typename IndexGrid, unsigned NDim>
struct CreateSubMIndicePairFunctor {
Index operator()(const Device& d, tv::TensorView<const Index> indicesIn,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape,
bool transpose, bool resetGrid = false);
};
} // namespace functor
#endif
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_MAXPOOL_FUNCTOR_H_
#define SPARSE_MAXPOOL_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace functor {
template <typename Device, typename scalar_t, typename Index>
struct SparseMaxPoolForwardFunctor {
void operator()(const Device& d, tv::TensorView<scalar_t> outFeatures,
tv::TensorView<const scalar_t> inFeatures,
tv::TensorView<const Index> indices, int size);
};
template <typename Device, typename scalar_t, typename Index>
struct SparseMaxPoolBackwardFunctor {
void operator()(const Device& d, tv::TensorView<const scalar_t> outFeatures,
tv::TensorView<const scalar_t> inFeatures,
tv::TensorView<const scalar_t> fout,
tv::TensorView<scalar_t> fin,
tv::TensorView<const Index> indices, int size);
};
} // namespace functor
#endif
#ifndef MP_HELPER_H_
#define MP_HELPER_H_
#include <type_traits>
#include <utility>
template <class... T>
struct mp_list {};
template <class T, T... I>
using mp_list_c = mp_list<std::integral_constant<T, I>...>;
namespace detail {
template <class... T, class F>
constexpr F mp_for_each_impl(mp_list<T...>, F &&f) {
return std::initializer_list<int>{(f(T()), 0)...}, std::forward<F>(f);
}
template <class F>
constexpr F mp_for_each_impl(mp_list<>, F &&f) {
return std::forward<F>(f);
}
} // namespace detail
namespace detail {
template <class A, template <class...> class B>
struct mp_rename_impl {
// An error "no type named 'type'" here means that the first argument to
// mp_rename is not a list
};
template <template <class...> class A, class... T, template <class...> class B>
struct mp_rename_impl<A<T...>, B> {
using type = B<T...>;
};
} // namespace detail
template <class A, template <class...> class B>
using mp_rename = typename ::detail::mp_rename_impl<A, B>::type;
template <class L, class F>
constexpr F mp_for_each(F &&f) {
return ::detail::mp_for_each_impl(mp_rename<L, mp_list>(),
std::forward<F>(f));
}
#endif
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <math.h>
#include <pybind11/numpy.h>
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <algorithm>
#include <iostream>
namespace py = pybind11;
using namespace pybind11::literals;
template <typename DType, int NDim>
int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<int> coors,
py::array_t<int> num_points_per_voxel,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range, int max_points,
int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed) continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels) continue;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
num_points_per_voxel_rw(voxelidx) += 1;
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
}
return voxel_num;
}
template <typename DType, int NDim>
int points_to_voxel_3d_np_mean(py::array_t<DType> points,
py::array_t<DType> voxels,
py::array_t<DType> means, py::array_t<int> coors,
py::array_t<int> num_points_per_voxel,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range, int max_points,
int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto means_rw = means.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed) continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels) continue;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
num_points_per_voxel_rw(voxelidx) += 1;
for (int k = 0; k < num_features; ++k) {
means_rw(voxelidx, k) +=
(points_rw(i, k) - means_rw(voxelidx, k)) / DType(num + 1);
}
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
num = num_points_per_voxel_rw(i);
for (int j = num; j < max_points; ++j) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(i, j, k) = means_rw(i, k);
}
}
}
return voxel_num;
}
template <typename DType, int NDim>
int points_to_voxel_3d_np_height(
py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<DType> height, py::array_t<DType> maxs, py::array_t<int> coors,
py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size, std::vector<DType> coors_range,
int max_points, int max_voxels) {
auto points_rw = points.template mutable_unchecked<2>();
auto height_rw = height.template mutable_unchecked<2>();
auto maxs_rw = maxs.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed) continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels) continue;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
height_rw(voxelidx, k) =
std::min(points_rw(i, k), height_rw(voxelidx, k));
maxs_rw(voxelidx, k) = std::max(points_rw(i, k), maxs_rw(voxelidx, k));
}
num_points_per_voxel_rw(voxelidx) += 1;
}
}
for (int i = 0; i < voxel_num; ++i) {
coor_to_voxelidx_rw(coors_rw(i, 0), coors_rw(i, 1), coors_rw(i, 2)) = -1;
for (int k = 0; k < num_features; ++k) {
height_rw(i, k) = maxs_rw(i, k) - height_rw(i, k);
}
}
return voxel_num;
}
template <typename DType, int NDim>
int block_filtering(py::array_t<DType> points, py::array_t<int> mask,
py::array_t<DType> height, py::array_t<DType> maxs,
py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size,
std::vector<DType> coors_range, int max_voxels, DType eps) {
auto points_rw = points.template mutable_unchecked<2>();
auto height_rw = height.template mutable_unchecked<1>();
auto maxs_rw = maxs.template mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int voxelidx, num;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed) continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
}
height_rw(voxelidx) = std::min(points_rw(i, 2), height_rw(voxelidx));
maxs_rw(voxelidx) = std::max(points_rw(i, 2), maxs_rw(voxelidx));
}
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed) continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if ((maxs_rw(voxelidx) - height_rw(voxelidx, 2)) < eps) {
mask(i) = 0;
}
}
}
template <typename DType, int NDim>
int points_to_voxel_3d_with_filtering(
py::array_t<DType> points, py::array_t<DType> voxels,
py::array_t<int> voxel_mask, py::array_t<DType> mins,
py::array_t<DType> maxs, py::array_t<int> coors,
py::array_t<int> num_points_per_voxel, py::array_t<int> coor_to_voxelidx,
std::vector<DType> voxel_size, std::vector<DType> coors_range,
int max_points, int max_voxels, int block_factor, int block_size,
DType height_threshold) {
auto points_rw = points.template mutable_unchecked<2>();
auto mins_rw = mins.template mutable_unchecked<2>();
auto maxs_rw = maxs.template mutable_unchecked<2>();
auto voxels_rw = voxels.template mutable_unchecked<3>();
auto voxel_mask_rw = voxel_mask.template mutable_unchecked<1>();
auto coors_rw = coors.mutable_unchecked<2>();
auto num_points_per_voxel_rw = num_points_per_voxel.mutable_unchecked<1>();
auto coor_to_voxelidx_rw = coor_to_voxelidx.mutable_unchecked<NDim>();
auto N = points_rw.shape(0);
auto num_features = points_rw.shape(1);
constexpr int ndim_minus_1 = NDim - 1;
int voxel_num = 0;
bool failed = false;
int coor[NDim];
int c;
int grid_size[NDim];
DType max_value, min_value;
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
int block_shape_H = grid_size[1] / block_factor;
int block_shape_W = grid_size[0] / block_factor;
int voxelidx, num;
int block_coor[2];
int startx, stopx, starty, stopy;
for (int i = 0; i < N; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points_rw(i, j) - coors_range[j]) / voxel_size[j]);
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed) continue;
voxelidx = coor_to_voxelidx_rw(coor[0], coor[1], coor[2]);
if (voxelidx == -1) {
voxelidx = voxel_num;
if (voxel_num >= max_voxels) continue;
voxel_num += 1;
coor_to_voxelidx_rw(coor[0], coor[1], coor[2]) = voxelidx;
for (int k = 0; k < NDim; ++k) {
coors_rw(voxelidx, k) = coor[k];
}
}
num = num_points_per_voxel_rw(voxelidx);
if (num < max_points) {
for (int k = 0; k < num_features; ++k) {
voxels_rw(voxelidx, num, k) = points_rw(i, k);
}
block_coor[0] = coor[1] / block_factor;
block_coor[1] = coor[2] / block_factor;
mins_rw(block_coor[0], block_coor[1]) =
std::min(points_rw(i, 2), mins_rw(block_coor[0], block_coor[1]));
maxs_rw(block_coor[0], block_coor[1]) =
std::max(points_rw(i, 2), maxs_rw(block_coor[0], block_coor[1]));
num_points_per_voxel_rw(voxelidx) += 1;
}
}
for (int i = 0; i < voxel_num; ++i) {
coor[1] = coors_rw(i, 1);
coor[2] = coors_rw(i, 2);
coor_to_voxelidx_rw(coors_rw(i, 0), coor[1], coor[2]) = -1;
block_coor[0] = coor[1] / block_factor;
block_coor[1] = coor[2] / block_factor;
min_value = mins_rw(block_coor[0], block_coor[1]);
max_value = maxs_rw(block_coor[0], block_coor[1]);
startx = std::max(0, block_coor[0] - block_size / 2);
stopx =
std::min(block_shape_H, block_coor[0] + block_size - block_size / 2);
starty = std::max(0, block_coor[1] - block_size / 2);
stopy =
std::min(block_shape_W, block_coor[1] + block_size - block_size / 2);
for (int j = startx; j < stopx; ++j) {
for (int k = starty; k < stopy; ++k) {
min_value = std::min(min_value, mins_rw(j, k));
max_value = std::max(max_value, maxs_rw(j, k));
}
}
voxel_mask_rw(i) = (max_value - min_value) > height_threshold;
}
return voxel_num;
}
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#ifndef SPARSE_REORDERING_FUNCTOR_H_
#define SPARSE_REORDERING_FUNCTOR_H_
#include <utils/spconv/tensorview/tensorview.h>
namespace functor {
template <typename Device, typename scalar_t, typename Index>
struct SparseGatherFunctor {
void operator()(const Device& d, tv::TensorView<scalar_t> buffer,
tv::TensorView<const scalar_t> features,
tv::TensorView<const Index> indices, int size);
};
template <typename Device, typename scalar_t, typename Index>
struct SparseScatterAddFunctor {
void operator()(const Device& d, tv::TensorView<scalar_t> out_features,
tv::TensorView<const scalar_t> buffer,
tv::TensorView<const Index> indices, int size,
bool stable = false);
};
} // namespace functor
#endif
#pragma once
namespace tv {
namespace detail {
template <typename scalar_t>
class KernelLoop {
struct Iterator {
__forceinline__ __device__ Iterator(scalar_t index, scalar_t delta)
: index_(index), delta_(delta) {}
__forceinline__ __device__ scalar_t operator*() const { return index_; }
__forceinline__ __device__ Iterator &operator++() {
index_ += delta_;
return *this;
}
__forceinline__ __device__ bool operator!=(const Iterator &other) const {
bool greater = index_ > other.index_;
bool less = index_ < other.index_;
if (!other.delta_) {
return less;
}
if (!delta_) {
return greater;
}
return less || greater;
}
private:
scalar_t index_;
const scalar_t delta_;
};
public:
__forceinline__ __device__ KernelLoop(scalar_t begin, scalar_t delta,
scalar_t end)
: begin_(begin), delta_(delta), end_(end) {}
__forceinline__ __device__ Iterator begin() const {
return Iterator{begin_, delta_};
}
__forceinline__ __device__ Iterator end() const { return Iterator{end_, 0}; }
private:
scalar_t begin_;
scalar_t delta_;
scalar_t end_;
};
} // namespace detail
template <typename scalar_t, int NumILP = 1>
__forceinline__ __device__ detail::KernelLoop<scalar_t> KernelLoopX(
scalar_t count) {
return detail::KernelLoop<scalar_t>(blockIdx.x * blockDim.x + threadIdx.x,
gridDim.x * blockDim.x * NumILP, count);
}
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
template <typename scalar_t, int NumILP = 1>
__forceinline__ __device__ detail::KernelLoop<scalar_t> KernelLoopY(
scalar_t count) {
return detail::KernelLoop<scalar_t>(blockIdx.y * blockDim.y + threadIdx.y,
gridDim.y * blockDim.y * NumILP, count);
}
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
template <typename scalar_t, int NumILP = 1>
__forceinline__ __device__ detail::KernelLoop<scalar_t> KernelLoopZ(
scalar_t count) {
return detail::KernelLoop<scalar_t>(blockIdx.z * blockDim.z + threadIdx.z,
gridDim.z * blockDim.z * NumILP, count);
}
} // namespace tv
#pragma once
// from pytorch.aten
#include "tensorview.h"
namespace tv {
namespace launch {
template <typename T1, typename T2>
inline int DivUp(const T1 a, const T2 b) {
return (a + b - 1) / b;
}
constexpr int CUDA_NUM_THREADS = 1024;
inline int getBlocks(const int N) {
TV_ASSERT_RT_ERR(N > 0,
"CUDA kernel launch blocks must be positive, but got N=", N);
return DivUp(N, CUDA_NUM_THREADS);
}
} // namespace launch
} // namespace tv
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <iostream>
#include <memory>
#include <sstream>
#include <type_traits>
#include <vector>
#include "pytorch_cpp_helper.hpp"
namespace tv {
#ifdef __NVCC__
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__ __host__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#define TV_ASSERT(expr) assert(expr)
#elif defined(__CUDACC_RTC__)
#define TV_ASSERT(expr) assert(expr)
#define TV_HOST_DEVICE_INLINE __forceinline__ __device__
#define TV_DEVICE_INLINE __forceinline__ __device__
#define TV_HOST_DEVICE __device__ __host__
#else
#define TV_ASSERT(x) assert(x)
#define TV_HOST_DEVICE_INLINE inline
#define TV_HOST_DEVICE
#endif
#define TV_REQUIRE(expr, ...) \
{ \
if (!(expr)) { \
printf(__VA_ARGS__); \
assert(expr); \
} \
}
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
assert(expr); \
}
template <class SStream, class T>
void sstream_print(SStream &ss, T val) {
ss << val;
}
template <class SStream, class T, class... TArgs>
void sstream_print(SStream &ss, T val, TArgs... args) {
ss << val << " ";
sstream_print(ss, args...);
}
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert failed. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert failed. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
#define TV_CHECK_CUDA_ERR() \
{ \
auto err = cudaGetLastError(); \
if (err != cudaSuccess) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << err; \
throw std::runtime_error(__macro_s.str()); \
} \
}
struct CPU {};
#define TV_MAX_DIM 6
template <typename scalar_t, size_t MaxDim = TV_MAX_DIM>
struct SimpleVector {
public:
TV_HOST_DEVICE_INLINE SimpleVector(){};
TV_HOST_DEVICE_INLINE SimpleVector(std::initializer_list<scalar_t> q) {
TV_ASSERT(q.size() <= MaxDim);
mSize = 0;
for (scalar_t s : q) {
mArray[mSize++] = s;
}
mSize = q.size();
}
SimpleVector(const std::vector<scalar_t> &arr) {
TV_ASSERT(arr.size() <= MaxDim);
for (size_t i = 0; i < arr.size(); ++i) {
mArray[i] = arr[i];
}
mSize = arr.size();
}
TV_HOST_DEVICE_INLINE SimpleVector(
const SimpleVector<scalar_t, MaxDim> &arr) {
TV_ASSERT(arr.size() <= MaxDim);
for (size_t i = 0; i < arr.size(); ++i) {
mArray[i] = arr[i];
}
mSize = arr.size();
}
TV_HOST_DEVICE_INLINE scalar_t &operator[](int idx) {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < mSize);
#endif
return mArray[idx];
}
TV_HOST_DEVICE_INLINE const scalar_t &operator[](int idx) const {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < mSize);
#endif
return mArray[idx];
}
TV_HOST_DEVICE_INLINE void push_back(scalar_t s) {
#ifdef TV_DEBUG
TV_ASSERT(mSize < MaxDim);
#endif
mArray[mSize] = s;
mSize++;
}
TV_HOST_DEVICE_INLINE void pop_back() {
#ifdef TV_DEBUG
TV_ASSERT(mSize > 0);
#endif
mSize--;
}
TV_HOST_DEVICE_INLINE size_t size() const { return mSize; }
TV_HOST_DEVICE_INLINE const scalar_t *data() const { return mArray; }
TV_HOST_DEVICE_INLINE size_t empty() const { return mSize == 0; }
typedef size_t size_type;
class iterator {
public:
typedef iterator self_type;
typedef scalar_t value_type;
typedef scalar_t &reference;
typedef scalar_t *pointer;
typedef std::forward_iterator_tag iterator_category;
typedef std::ptrdiff_t difference_type;
TV_HOST_DEVICE_INLINE iterator(pointer ptr) : ptr_(ptr) {}
TV_HOST_DEVICE_INLINE self_type operator++(int junk) {
self_type i = *this;
ptr_++;
return i;
}
TV_HOST_DEVICE_INLINE self_type operator++() {
ptr_++;
return *this;
}
TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }
TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }
TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {
return ptr_ == rhs.ptr_;
}
TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {
return ptr_ != rhs.ptr_;
}
private:
pointer ptr_;
};
class const_iterator {
public:
typedef const_iterator self_type;
typedef scalar_t value_type;
typedef const scalar_t &reference;
typedef const scalar_t *pointer;
typedef std::ptrdiff_t difference_type;
typedef std::forward_iterator_tag iterator_category;
TV_HOST_DEVICE_INLINE const_iterator(pointer ptr) : ptr_(ptr) {}
TV_HOST_DEVICE_INLINE self_type operator++(int junk) {
self_type i = *this;
ptr_++;
return i;
}
TV_HOST_DEVICE_INLINE self_type operator++() {
ptr_++;
return *this;
}
TV_HOST_DEVICE_INLINE reference operator*() { return *ptr_; }
TV_HOST_DEVICE_INLINE pointer operator->() { return ptr_; }
TV_HOST_DEVICE_INLINE bool operator==(const self_type &rhs) {
return ptr_ == rhs.ptr_;
}
TV_HOST_DEVICE_INLINE bool operator!=(const self_type &rhs) {
return ptr_ != rhs.ptr_;
}
private:
pointer ptr_;
};
TV_HOST_DEVICE_INLINE iterator begin() { return iterator(mArray); }
TV_HOST_DEVICE_INLINE iterator end() { return iterator(mArray + mSize); }
TV_HOST_DEVICE_INLINE const_iterator begin() const {
return const_iterator(mArray);
}
TV_HOST_DEVICE_INLINE const_iterator end() const {
return const_iterator(mArray + mSize);
}
TV_HOST_DEVICE_INLINE const_iterator cbegin() const {
return const_iterator(mArray);
}
TV_HOST_DEVICE_INLINE const_iterator cend() const {
return const_iterator(mArray + mSize);
}
protected:
scalar_t mArray[MaxDim];
size_t mSize = 0;
};
template <typename scalar_t, size_t MaxDim>
bool operator==(const SimpleVector<scalar_t, MaxDim> &lfs,
const SimpleVector<scalar_t, MaxDim> &rfs) {
if (lfs.size() != rfs.size()) return false;
for (size_t i = 0; i < lfs.size(); ++i) {
if (lfs[i] != rfs[i]) return false;
}
return true;
}
template <typename scalar_t, size_t MaxDim>
bool operator!=(const SimpleVector<scalar_t, MaxDim> &lfs,
const SimpleVector<scalar_t, MaxDim> &rfs) {
return !(lfs == rfs);
}
struct Slice {
template <class... Integers>
TV_HOST_DEVICE_INLINE Slice(Integers... ints) {
static_assert(sizeof...(ints) <= 3, "slice init must smaller than 3");
SimpleVector<int, 3> slices{int(ints)...};
mSlices[0] = -1;
mSlices[1] = -1;
mSlices[2] = -1;
for (size_t i = 0; i < slices.size(); ++i) {
mSlices[i] = slices[i];
}
}
TV_HOST_DEVICE_INLINE Slice() {
mSlices[0] = -1;
mSlices[1] = -1;
mSlices[2] = -1;
}
template <typename scalar_t>
TV_HOST_DEVICE_INLINE Slice(std::initializer_list<scalar_t> slice) {
mSlices[0] = -1;
mSlices[1] = -1;
mSlices[2] = -1;
TV_ASSERT(slice.size() <= 3);
int idx = 0;
for (scalar_t s : slice) {
mSlices[idx] = int(s);
++idx;
}
}
TV_HOST_DEVICE_INLINE int &operator[](int idx) {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < 3);
#endif
return mSlices[idx];
}
TV_HOST_DEVICE_INLINE const int &operator[](int idx) const {
#ifdef TV_DEBUG
TV_ASSERT(idx >= 0 && idx < 3);
#endif
return mSlices[idx];
}
protected:
int mSlices[3];
};
template <size_t MaxDim = TV_MAX_DIM>
struct ShapeBase : public SimpleVector<int, MaxDim> {
TV_HOST_DEVICE_INLINE ShapeBase() : SimpleVector<int, MaxDim>(){};
TV_HOST_DEVICE_INLINE ShapeBase(std::initializer_list<int> shape)
: SimpleVector<int, MaxDim>(shape) {}
template <typename scalar_t, template <class...> class Container>
ShapeBase(Container<scalar_t> shape) : SimpleVector<int, MaxDim>(shape) {}
TV_HOST_DEVICE_INLINE ShapeBase(const ShapeBase<MaxDim> &shape)
: SimpleVector<int, MaxDim>(shape) {}
ShapeBase(const std::vector<int> &arr) : SimpleVector<int, MaxDim>(arr) {}
ShapeBase<MaxDim> &operator=(const ShapeBase<MaxDim> &shape) = default;
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start, int end) const {
#ifdef TV_DEBUG
TV_ASSERT(start >= 0 && end < this->mSize && end > start);
#endif
ShapeBase<MaxDim> shape;
for (int i = start; i < end; ++i) {
shape.push_back(this->mArray[i]);
}
return shape;
}
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> subshape(int start) const {
#ifdef TV_DEBUG
TV_ASSERT(start >= 0 && start <= this->mSize);
#endif
ShapeBase<MaxDim> shape;
for (int i = start; i < this->mSize; ++i) {
shape.push_back(this->mArray[i]);
}
return shape;
}
TV_HOST_DEVICE_INLINE size_t size() const {
if (this->mSize == 0) return 0;
size_t s = 1;
for (int i = 0; i < int(this->mSize); ++i) {
s *= this->mArray[i];
}
return s;
}
TV_HOST_DEVICE_INLINE size_t ndim() const { return this->mSize; }
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze() const {
ShapeBase<MaxDim> shape;
for (int i = 0; i < this->mSize; ++i) {
if (this->mArray[i] != 1) shape.push_back(this->mArray[i]);
}
return shape;
}
TV_HOST_DEVICE_INLINE ShapeBase<MaxDim> squeeze(int dim) const {
ShapeBase<MaxDim> shape;
for (int i = 0; i < this->mSize; ++i) {
if (i != dim || this->mArray[i] != 1) shape.push_back(this->mArray[i]);
}
return shape;
}
};
using Shape = ShapeBase<TV_MAX_DIM>;
template <class... Inds>
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
Inds... indexes) {
unsigned offset = 0;
unsigned m = 1;
int indexes_vec[sizeof...(indexes)] = {indexes...};
#ifdef TV_DEBUG
TV_ASSERT(sizeof...(indexes) == shape.size());
#endif
#pragma unroll
for (int i = sizeof...(indexes) - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(std::vector<int> &shape,
std::vector<int> &indexes_vec) {
unsigned offset = 0;
unsigned m = 1;
for (int i = shape.size() - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
template <class... Inds>
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
Inds... indexes) {
unsigned offset = 0;
unsigned m = 1;
int indexes_vec[sizeof...(indexes)] = {indexes...};
#pragma unroll
for (int i = sizeof...(indexes) - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Shape &shape,
const Shape &indexes_vec) {
unsigned offset = 0;
unsigned m = 1;
for (int i = indexes_vec.ndim() - 1; i >= 0; --i) {
offset += m * indexes_vec[i];
m *= shape[i];
}
return offset;
}
template <typename Index, unsigned NDim>
TV_HOST_DEVICE_INLINE unsigned rowArrayIdx(const Index *indexes,
const Index *shape) {
unsigned offset = 0;
unsigned m = 1;
#pragma unroll
for (int i = NDim - 1; i >= 0; --i) {
offset += m * indexes[i];
m *= shape[i];
}
return offset;
}
template <typename Index, unsigned NDim>
TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
const Index *shape) {
#pragma unroll
for (int i = NDim - 1; i >= 0; --i) {
output[i] = index % shape[i];
index -= output[i];
index /= shape[i];
}
return index;
}
template <int N>
struct ArrayIndexRowMajor {
TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,
const Shape &indexes) {
return indexes[N - 1] +
shape[N - 1] * ArrayIndexRowMajor<N - 1>::run(shape, indexes);
}
};
template <>
struct ArrayIndexRowMajor<0> {
TV_HOST_DEVICE_INLINE static unsigned run(const Shape &shape,
const Shape &indexes) {
return 0;
}
};
namespace detail {
template <typename scalar_t>
constexpr const char *simpleTypeName(scalar_t val = scalar_t());
template <>
constexpr const char *simpleTypeName(float val) {
return "float32";
}
template <>
constexpr const char *simpleTypeName(double val) {
return "float64";
}
template <>
constexpr const char *simpleTypeName(int val) {
return "int32";
}
template <>
constexpr const char *simpleTypeName(unsigned val) {
return "uint32";
}
template <>
constexpr const char *simpleTypeName(long val) {
return "int64";
}
template <>
constexpr const char *simpleTypeName(unsigned long val) {
return "uint64";
}
}; // namespace detail
template <typename scalar_t, int Rank = -1>
struct TensorView {
TV_HOST_DEVICE_INLINE TensorView() {}
explicit TV_HOST_DEVICE_INLINE TensorView(scalar_t *ptr, Shape shape)
: mPtr(ptr), mShape(shape) {}
template <class... Integers>
explicit TV_HOST_DEVICE_INLINE TensorView(scalar_t *ptr, Integers... shapes)
: mPtr(ptr) {
mShape = {int(shapes)...};
}
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> &assign(
const TensorView<scalar_t, Rank> &tensor) {
TV_REQUIRE(tensor.shape() == shape(), "you must provide same input size%s",
"\n");
scalar_t *ptr = mPtr;
const scalar_t *other_ptr = tensor.data();
for (size_t i = 0; i < size(); ++i) *(ptr++) = *(other_ptr++);
return *this;
}
template <typename T1>
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> &assign(
std::initializer_list<T1> seq) {
TV_REQUIRE(seq.size() == size(), "you must provide same input size%s",
"\n");
scalar_t *ptr = mPtr;
for (const T1 &s : seq) *(ptr++) = scalar_t(s);
return *this;
}
template <class... Inds>
TV_HOST_DEVICE_INLINE scalar_t &operator()(Inds... inds) {
#ifdef TV_DEBUG
int idxes[sizeof...(Inds)]{int(inds)...};
TV_REQUIRE(sizeof...(inds) == mShape.ndim(),
"you provide %d indexes, but dim is %d\n", sizeof...(inds),
mShape.ndim());
for (int i = 0; i < sizeof...(inds); ++i) {
TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],
"index-%d(%d) out-of-range: [0, %d)\n", i, idxes[i],
mShape[i]);
}
#endif
return mPtr[rowArrayIdx(mShape, int(inds)...)];
}
template <class... Inds>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(Inds... inds) const {
#ifdef TV_DEBUG
int idxes[sizeof...(Inds)]{int(inds)...};
TV_REQUIRE(sizeof...(inds) == mShape.ndim(),
"you provide %d indexes, but dim is %d\n", sizeof...(inds),
mShape.ndim());
for (int i = 0; i < sizeof...(inds); ++i) {
TV_REQUIRE(idxes[i] >= 0 && idxes[i] < mShape[i],
"index-%d(%d) out-of-range: [0, %d)\n", i, idxes[i],
mShape[i]);
}
#endif
return mPtr[rowArrayIdx(mShape, int(inds)...)];
}
TV_HOST_DEVICE_INLINE scalar_t &operator()() {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
"you provide 0 indexes, but dim is %ld\n", mShape.ndim());
#else
TV_REQUIRE(mPtr != nullptr, "you want get value but the view is empty.%s",
"\n");
TV_REQUIRE(mShape.ndim() == 0, "you provide 0 indexes, but dim is %ld\n",
mShape.ndim());
#endif
#endif
return mPtr[0];
}
TV_HOST_DEVICE_INLINE const scalar_t &operator()() const {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mPtr != nullptr,
"you want get value but the view is empty.%s", "\n");
TV_DEVICE_REQUIRE(mShape.ndim() == 0,
"you provide 0 indexes, but dim is %ld\n", mShape.ndim());
#else
TV_REQUIRE(mPtr != nullptr, "you want get value but the view is empty.%s",
"\n");
TV_REQUIRE(mShape.ndim() == 0, "you provide 0 indexes, but dim is %ld\n",
mShape.ndim());
#endif
#endif
return mPtr[0];
}
template <class T1>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1) {
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, i1, mShape[0]);
#else
TV_REQUIRE(mShape.ndim() == 1, "you provide 1 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, i1, mShape[0]);
#endif
#endif
return mPtr[i1];
}
template <class T1, class T2>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
#else
TV_REQUIRE(mShape.ndim() == 2, "you provide 2 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
#endif
#endif
return mPtr[i1 * mShape[1] + i2];
}
template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
#else
TV_REQUIRE(mShape.ndim() == 3, "you provide 3 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
#endif
#endif
return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];
}
template <class T1, class T2, class T3, class T4>
TV_HOST_DEVICE_INLINE scalar_t &operator()(T1 i1, T2 i2, T3 i3, T4 i4) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4),
mShape[3]);
#else
TV_REQUIRE(mShape.ndim() == 4, "you provide 4 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
TV_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4), mShape[3]);
#endif
#endif
return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
}
template <class T1>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 1,
"you provide 1 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
#else
TV_REQUIRE(mShape.ndim() == 1, "you provide 1 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
#endif
#endif
return mPtr[i1];
}
template <class T1, class T2>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 2,
"you provide 2 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
#else
TV_REQUIRE(mShape.ndim() == 2, "you provide 2 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
#endif
#endif
return mPtr[i1 * mShape[1] + i2];
}
template <class T1, class T2, class T3>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 3,
"you provide 3 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
#else
TV_REQUIRE(mShape.ndim() == 3, "you provide 3 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
#endif
#endif
return mPtr[(i1 * mShape[1] + i2) * mShape[2] + i3];
}
template <class T1, class T2, class T3, class T4>
TV_HOST_DEVICE_INLINE const scalar_t &operator()(T1 i1, T2 i2, T3 i3,
T4 i4) const {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(mShape.ndim() == 4,
"you provide 4 indexes, but dim is %ld\n", mShape.ndim());
TV_DEVICE_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1),
mShape[0]);
TV_DEVICE_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2),
mShape[1]);
TV_DEVICE_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3),
mShape[2]);
TV_DEVICE_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4),
mShape[3]);
#else
TV_REQUIRE(mShape.ndim() == 4, "you provide 4 indexes, but dim is %ld\n",
mShape.ndim());
TV_REQUIRE(i1 >= 0 && i1 < mShape[0],
"index-%d(%d) out-of-range: [0, %d)\n", 0, int(i1), mShape[0]);
TV_REQUIRE(i2 >= 0 && i2 < mShape[1],
"index-%d(%d) out-of-range: [0, %d)\n", 1, int(i2), mShape[1]);
TV_REQUIRE(i3 >= 0 && i3 < mShape[2],
"index-%d(%d) out-of-range: [0, %d)\n", 2, int(i3), mShape[2]);
TV_REQUIRE(i4 >= 0 && i4 < mShape[3],
"index-%d(%d) out-of-range: [0, %d)\n", 3, int(i4), mShape[3]);
#endif
#endif
return mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
}
TV_HOST_DEVICE_INLINE scalar_t &operator[](int idx) {
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE(idx >= 0 && idx < size(),
"index(%d) out-of-range: [0, %ld)\n", int(idx), size());
#else
TV_REQUIRE(idx >= 0 && idx < size(), "index(%d) out-of-range: [0, %ld)\n",
int(idx), size());
#endif
#endif
return mPtr[idx];
}
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> operator[](
SimpleVector<Slice> slice_vec) {
return _subview(slice_vec);
}
TV_HOST_DEVICE_INLINE const TensorView<scalar_t, Rank> operator[](
SimpleVector<Slice> slice_vec) const {
return _subview(slice_vec);
}
TV_HOST_DEVICE_INLINE bool empty() const { return mPtr == nullptr; }
TV_HOST_DEVICE_INLINE scalar_t *data() { return mPtr; }
TV_HOST_DEVICE_INLINE const scalar_t *data() const { return mPtr; }
TV_HOST_DEVICE_INLINE const Shape &shape() const { return mShape; }
TV_HOST_DEVICE_INLINE int dim(int idx) const { return mShape[idx]; }
TV_HOST_DEVICE_INLINE int ndim() const { return mShape.ndim(); }
template <class... Inds>
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> &reshape(Inds... newShapes) {
Shape shapes{int(newShapes)...};
TV_ASSERT(shapes.size() == size());
mShape = shapes;
return *this;
}
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> &reshape(Shape shapes) {
TV_ASSERT(shapes.size() == size());
mShape = shapes;
return *this;
}
template <class... Inds>
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> view(
Inds... newShapes) const {
Shape shapes{int(newShapes)...};
for (size_t i = 0; i < shapes.ndim(); ++i) {
if (shapes[i] == -1) {
shapes[i] = 1;
shapes[i] = size() / shapes.size();
break;
}
}
TV_ASSERT(shapes.size() == size());
return TensorView<scalar_t, Rank>(mPtr, shapes);
}
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> view(Shape shapes) const {
TV_ASSERT(shapes.size() == size());
return TensorView<scalar_t, Rank>(mPtr, shapes);
}
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> squeeze() const {
return TensorView<scalar_t, Rank>(mPtr, mShape.squeeze());
}
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> squeeze(int dim) const {
return TensorView<scalar_t, Rank>(mPtr, mShape.squeeze(dim));
}
TV_HOST_DEVICE_INLINE size_t size() const { return mShape.size(); }
template <class... Slices>
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> subview(
Slice slice, Slices... slices) const {
return subview<float, Slice, Slices...>(slice, slices...);
}
template <class T2 = float, class... Slices>
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> subview(
Slices... slices) const {
Slice slice_vec[sizeof...(Slices)] = {to_slice(slices)...};
Shape new_shape{to_slice(slices)[0]...};
Shape start{to_slice(slices)[0]...};
TV_ASSERT(new_shape.ndim() <= mShape.ndim());
TV_ASSERT(new_shape.ndim() != 0);
size_t idxsize = new_shape.ndim();
for (size_t i = idxsize; i < mShape.ndim(); ++i) {
new_shape.push_back(0);
start.push_back(0);
}
#pragma unroll
for (size_t i = 0; i < sizeof...(Slices); ++i) {
if (slice_vec[i][1] != -1) {
new_shape[i] = slice_vec[i][1] - slice_vec[i][0];
TV_ASSERT(new_shape[i] >= 0);
} else {
new_shape[i] = 1;
}
}
auto offset = rowArrayIdx(mShape, start);
#pragma unroll
for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {
new_shape[i] = mShape[i];
TV_ASSERT(new_shape[i] >= 0);
}
Shape reduced_shape;
#pragma unroll
for (size_t i = 0; i < sizeof...(Slices); ++i) {
if (slice_vec[i][1] != -1) {
reduced_shape.push_back(new_shape[i]);
}
}
#pragma unroll
for (size_t i = sizeof...(Slices); i < mShape.ndim(); ++i) {
reduced_shape.push_back(new_shape[i]);
}
return TensorView<scalar_t, Rank>(mPtr + offset, reduced_shape);
}
template <class... Integers>
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> subview(int id,
Integers... ints) {
Shape start = {id, ints...};
for (int i = 1 + sizeof...(ints); i < ndim(); ++i) {
start.push_back(0);
}
return TensorView<scalar_t, Rank>(mPtr + rowArrayIdx(mShape, start),
mShape.subshape(sizeof...(ints) + 1));
}
std::string repr() const {
std::ostringstream ss;
if (empty()) return "";
if (mShape.ndim() == 0) {
ss << *mPtr;
ss << "Tensor: dtype=" << detail::simpleTypeName<scalar_t>();
return ss.str();
}
Shape counter = mShape;
auto tensor_flat = this->view(-1);
for (int i = 0; i < counter.ndim(); ++i) {
counter[i] = 0;
ss << "[";
}
for (size_t i = 0; i < this->size(); ++i) {
ss << tensor_flat(rowArrayIdx(mShape, counter));
counter[counter.ndim() - 1] += 1;
int inc_count = 0;
bool print_comma = true;
for (int c = counter.ndim() - 1; c >= 0; --c) {
if (counter[c] == this->dim(c) && c > 0) {
++inc_count;
counter[c - 1] += 1;
counter[c] = 0;
print_comma = false;
}
}
if (print_comma && i != this->size() - 1) ss << ", ";
for (int j = 0; j < inc_count; ++j) {
ss << "]";
}
if (i != this->size() - 1) {
if (inc_count != 0) ss << "\n";
for (int j = 0; j < inc_count; ++j) {
ss << "[";
}
}
}
ss << "]";
ss << "Tensor: dtype=" << detail::simpleTypeName<scalar_t>();
return ss.str();
}
protected:
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE TensorView<scalar_t, Rank> _subview(
SimpleVector<Slice> slice_vec) {
Shape new_shape;
for (int i = 0; i < slice_vec.size(); ++i) {
new_shape.push_back(slice_vec[i][0]);
}
Shape start = new_shape;
TV_ASSERT(new_shape.ndim() <= mShape.ndim());
TV_ASSERT(new_shape.ndim() != 0);
size_t idxsize = new_shape.ndim();
for (size_t i = idxsize; i < mShape.ndim(); ++i) {
new_shape.push_back(0);
start.push_back(0);
}
for (size_t i = 0; i < slice_vec.size(); ++i) {
if (slice_vec[i][1] != -1) {
new_shape[i] = slice_vec[i][1] - slice_vec[i][0];
TV_ASSERT(new_shape[i] >= 0);
} else {
new_shape[i] = 1; // reduce dim
}
}
auto offset = rowArrayIdx(mShape, start);
for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {
new_shape[i] = mShape[i];
TV_ASSERT(new_shape[i] >= 0);
}
Shape reduced_shape;
for (size_t i = 0; i < slice_vec.size(); ++i) {
if (slice_vec[i][1] != -1) {
reduced_shape.push_back(new_shape[i]);
}
}
for (size_t i = slice_vec.size(); i < mShape.ndim(); ++i) {
reduced_shape.push_back(new_shape[i]);
}
return TensorView<scalar_t, Rank>(mPtr + offset, reduced_shape);
}
template <typename T1>
TV_HOST_DEVICE_INLINE Slice to_slice(T1 s) const {
return Slice{int(s), -1, -1};
}
TV_HOST_DEVICE_INLINE Slice to_slice(Slice s) const { return Slice(s); }
scalar_t *mPtr = nullptr;
Shape mShape;
};
template <typename Os, typename scalar_t, int Rank>
Os &operator<<(Os &os, const TensorView<scalar_t, Rank> &dt) {
os << dt.repr();
return os;
}
template <typename Os, typename scalar_t, int Rank>
Os &operator<<(Os &os, const TensorView<const scalar_t, Rank> &dt) {
os << dt.repr();
return os;
}
namespace detail {
template <typename scalar_t>
constexpr const char *printfTypeFormat(scalar_t val = scalar_t());
template <>
constexpr const char *printfTypeFormat(float val) {
return "%.2f";
}
template <>
constexpr const char *printfTypeFormat(double val) {
return "%.2f";
}
template <>
constexpr const char *printfTypeFormat(int val) {
return "%d";
}
template <>
constexpr const char *printfTypeFormat(unsigned val) {
return "%u";
}
template <>
constexpr const char *printfTypeFormat(long val) {
return "%ld";
}
template <>
constexpr const char *printfTypeFormat(unsigned long val) {
return "%lu";
}
}; // namespace detail
template <typename scalar_t>
TV_HOST_DEVICE void printTensorView(const TensorView<scalar_t> tensor,
const char *format) {
if (tensor.empty()) return;
if (tensor.ndim() == 0) {
printf(format, tensor());
printf("\n");
return;
}
Shape counter = tensor.shape();
auto tensor_flat = tensor.view(-1);
for (int i = 0; i < counter.ndim(); ++i) {
counter[i] = 0;
printf("[");
}
for (size_t i = 0; i < tensor.size(); ++i) {
printf(format, tensor_flat(rowArrayIdx(tensor.shape(), counter)));
counter[counter.ndim() - 1] += 1;
int inc_count = 0;
bool print_comma = true;
for (int c = counter.ndim() - 1; c >= 0; --c) {
if (counter[c] == tensor.dim(c) && c > 0) {
++inc_count;
counter[c - 1] += 1;
counter[c] = 0;
print_comma = false;
}
}
if (print_comma && i != tensor.size() - 1) printf(", ");
for (int j = 0; j < inc_count; ++j) {
printf("]");
}
if (i != tensor.size() - 1) {
if (inc_count != 0) printf("\n");
for (int j = 0; j < inc_count; ++j) {
printf("[");
}
}
}
printf("]\n");
}
template <typename scalar_t>
TV_HOST_DEVICE void printTensorView(TensorView<scalar_t> tensor) {
using Traw = typename std::remove_const<scalar_t>::type;
return printTensorView(tensor, detail::printfTypeFormat<Traw>());
}
template <typename scalar_t>
TV_HOST_DEVICE void printTensorView(const scalar_t *ptr, Shape shape) {
using Traw = typename std::remove_const<scalar_t>::type;
return printTensorView(TensorView<const scalar_t>(ptr, shape),
detail::printfTypeFormat<Traw>());
}
template <typename scalar_t>
TV_HOST_DEVICE void printTensorView(const scalar_t *ptr, Shape shape,
const char *format) {
return printTensorView(TensorView<const scalar_t>(ptr, shape), format);
}
} // namespace tv
// Copyright (c) OpenMMLab. All rights reserved // Copyright (c) OpenMMLab. All rights reserved
// It is modified from https://github.com/WenmuZhou/PAN.pytorch // It is modified from https://github.com/WenmuZhou/PAN.pytorch
#include <queue>
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp" #include "pytorch_device_registry.hpp"
......
// Copyright 2019 Yan Yan
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <torch/script.h>
#include <utils/spconv/spconv/geometry.h>
#include <utils/spconv/spconv/indice.h>
#include "pytorch_cpp_helper.hpp"
namespace functor {
template <typename Index, typename IndexGrid, unsigned NDim>
struct CreateConvIndicePairFunctor<tv::CPU, Index, IndexGrid, NDim> {
Index operator()(const tv::CPU& d, tv::TensorView<const Index> indicesIn,
tv::TensorView<Index> indicesOut,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape,
bool transpose, bool resetGrid) {
if (transpose)
return getIndicePairsDeConv<Index, IndexGrid, NDim>(
indicesIn, indicesOut, gridsOut, indicePairs, indiceNum,
kernelSize.data(), stride.data(), padding.data(), dilation.data(),
outSpatialShape.data());
else
return getIndicePairsConv<Index, IndexGrid, NDim>(
indicesIn, indicesOut, gridsOut, indicePairs, indiceNum,
kernelSize.data(), stride.data(), padding.data(), dilation.data(),
outSpatialShape.data());
}
};
template <typename Index, typename IndexGrid, unsigned NDim>
struct CreateSubMIndicePairFunctor<tv::CPU, Index, IndexGrid, NDim> {
Index operator()(const tv::CPU& d, tv::TensorView<const Index> indicesIn,
tv::TensorView<IndexGrid> gridsOut,
tv::TensorView<Index> indicePairs,
tv::TensorView<Index> indiceNum,
const tv::SimpleVector<Index, NDim> kernelSize,
const tv::SimpleVector<Index, NDim> stride,
const tv::SimpleVector<Index, NDim> padding,
const tv::SimpleVector<Index, NDim> dilation,
const tv::SimpleVector<Index, NDim> outSpatialShape,
bool transpose, bool resetGrid) {
return getIndicePairsSubM<Index, IndexGrid, NDim>(
indicesIn, gridsOut, indicePairs, indiceNum, kernelSize.data(),
stride.data(), padding.data(), dilation.data(), outSpatialShape.data());
}
};
} // namespace functor
#define DECLARE_CPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::CPU, Index, int, \
NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::CPU, Index, int, \
NDIM>;
#define DECLARE_CPU_INDEX(Index) \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 1); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 2); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 3); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_CPU_INDEX(int);
DECLARE_CPU_INDEX(long);
#undef DECLARE_CPU_INDEX
#undef DECLARE_CPU_SPECS_INDEX_NDIM
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment