Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
3517290c
Commit
3517290c
authored
Jul 09, 2020
by
yanyan
Browse files
format code, add benchmark per layer
parent
540a2209
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
548 additions
and
330 deletions
+548
-330
include/spconv/fused_conv.cu.h
include/spconv/fused_conv.cu.h
+3
-3
include/spconv/indice.cu.h
include/spconv/indice.cu.h
+8
-7
include/spconv/minkowski.cu.h
include/spconv/minkowski.cu.h
+0
-1
include/spconv/point2voxel.cu.h
include/spconv/point2voxel.cu.h
+19
-21
include/spconv/point2voxel_ops.h
include/spconv/point2voxel_ops.h
+5
-10
include/spconv/points2voxels.h
include/spconv/points2voxels.h
+12
-16
include/spconv/spconv_ops.h
include/spconv/spconv_ops.h
+9
-3
include/tensorview/cc17.h
include/tensorview/cc17.h
+93
-63
include/tensorview/tensor.h
include/tensorview/tensor.h
+26
-21
include/tensorview/tools.h
include/tensorview/tools.h
+3
-4
include/tensorview/torch_utils.h
include/tensorview/torch_utils.h
+2
-2
spconv/__init__.py
spconv/__init__.py
+1
-79
spconv/conv.py
spconv/conv.py
+107
-39
spconv/core.py
spconv/core.py
+111
-0
spconv/modules.py
spconv/modules.py
+3
-1
spconv/ops.py
spconv/ops.py
+2
-1
spconv/pool.py
spconv/pool.py
+83
-12
spconv/spatial.py
spconv/spatial.py
+5
-3
spconv/utils/__init__.py
spconv/utils/__init__.py
+38
-27
src/spconv/fused_conv.cu
src/spconv/fused_conv.cu
+18
-17
No files found.
include/spconv/fused_conv.cu.h
View file @
3517290c
...
@@ -198,7 +198,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w,
...
@@ -198,7 +198,7 @@ dConvolution_KMxKN_forwardB(T *inFeatures, T *outFeatures, T *w,
if (nHot > o) \
if (nHot > o) \
dConvolution_KMxKN_forwardB<T, K, V> \
dConvolution_KMxKN_forwardB<T, K, V> \
<<<dim3(1, output_nPlanes / K, nGroups), dim3(K, K / V), 0, s>>>( \
<<<dim3(1, output_nPlanes / K, nGroups), dim3(K, K / V), 0, s>>>( \
inFeatures, outFeatures, w, rulesIn + o, rulesOut + o, \
inFeatures, outFeatures, w, rulesIn + o, rulesOut + o,
\
nHot - o, input_nPlanes, input_stride, output_nPlanes, \
nHot - o, input_nPlanes, input_stride, output_nPlanes, \
output_stride); \
output_stride); \
return; \
return; \
...
@@ -400,8 +400,8 @@ __global__ void dConvolution_KMxKN_backward_dW_B(
...
@@ -400,8 +400,8 @@ __global__ void dConvolution_KMxKN_backward_dW_B(
if (nHot > o) \
if (nHot > o) \
dConvolution_KMxKN_backward_dW_B<T, K, V> \
dConvolution_KMxKN_backward_dW_B<T, K, V> \
<<<dim3(1, input_nPlanes / K, nGroups), dim3(K, K / V), 0, s>>>( \
<<<dim3(1, input_nPlanes / K, nGroups), dim3(K, K / V), 0, s>>>( \
inFeatures, dInFeatures, dOutFeatures, w, dw, rulesIn + o, \
inFeatures, dInFeatures, dOutFeatures, w, dw, rulesIn + o,
\
rulesOut + o, nHot - o, input_nPlanes, input_stride, \
rulesOut + o, nHot - o, input_nPlanes, input_stride,
\
output_nPlanes, output_stride); \
output_nPlanes, output_stride); \
return; \
return; \
} \
} \
...
...
include/spconv/indice.cu.h
View file @
3517290c
...
@@ -21,15 +21,16 @@
...
@@ -21,15 +21,16 @@
namespace
spconv
{
namespace
spconv
{
template
<
bool
UseDeconv
,
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
;
template
<
bool
UseDeconv
,
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
;
template
<
typename
Index
,
unsigned
NDim
>
template
<
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
<
true
,
Index
,
NDim
>
{
struct
ConvIndiceDispatch
<
true
,
Index
,
NDim
>
{
constexpr
static
auto
*
func
=
getValidOutPosTranspose
<
Index
,
NDim
>
;
constexpr
static
auto
*
func
=
getValidOutPosTranspose
<
Index
,
NDim
>
;
};
};
template
<
typename
Index
,
unsigned
NDim
>
template
<
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
<
false
,
Index
,
NDim
>
{
struct
ConvIndiceDispatch
<
false
,
Index
,
NDim
>
{
constexpr
static
auto
*
func
=
getValidOutPos
<
Index
,
NDim
>
;
constexpr
static
auto
*
func
=
getValidOutPos
<
Index
,
NDim
>
;
};
};
template
<
typename
Index
,
unsigned
NDim
,
bool
UseDeconv
,
template
<
typename
Index
,
unsigned
NDim
,
bool
UseDeconv
,
...
@@ -61,8 +62,8 @@ __global__ void prepareIndicePairsKernel(
...
@@ -61,8 +62,8 @@ __global__ void prepareIndicePairsKernel(
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
numValidPoints
=
ConvIndiceDispatch
<
UseDeconv
,
Index
,
NDim
>::
func
(
numValidPoints
=
ConvIndiceDispatch
<
UseDeconv
,
Index
,
NDim
>::
func
(
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
kernelSize
.
data
(),
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
(),
outSpatialShape
.
data
(),
validPoints
);
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
offset
=
pointPtr
[
NDim
];
...
...
include/spconv/minkowski.cu.h
View file @
3517290c
...
@@ -89,7 +89,6 @@ __global__ void matmul(const Dtype *A, const int wA, const int hA,
...
@@ -89,7 +89,6 @@ __global__ void matmul(const Dtype *A, const int wA, const int hA,
// C[wB * out_row + x] += Csub;
// C[wB * out_row + x] += Csub;
}
}
template
<
typename
Dtype
,
typename
Itype
,
int
BLOCK_SIZE
>
template
<
typename
Dtype
,
typename
Itype
,
int
BLOCK_SIZE
>
__global__
void
matmul2
(
const
Dtype
*
A
,
const
int
wA
,
const
int
hA
,
__global__
void
matmul2
(
const
Dtype
*
A
,
const
int
wA
,
const
int
hA
,
const
Dtype
*
B
,
const
int
wB
,
const
int
hB
,
const
Dtype
*
B
,
const
int
wB
,
const
int
hB
,
...
...
include/spconv/point2voxel.cu.h
View file @
3517290c
...
@@ -7,10 +7,8 @@
...
@@ -7,10 +7,8 @@
namespace
spconv
{
namespace
spconv
{
template
<
typename
Index
,
unsigned
NDim
>
template
<
typename
Index
,
unsigned
NDim
>
__global__
void
scatterPointToGridKernel
(
__global__
void
scatterPointToGridKernel
(
tv
::
TensorView
<
const
float
>
points
,
tv
::
TensorView
<
const
float
>
points
,
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndex
,
tv
::
TensorView
<
Index
>
pointIndex
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
Index
index
;
Index
index
;
...
@@ -19,24 +17,25 @@ __global__ void scatterPointToGridKernel(
...
@@ -19,24 +17,25 @@ __global__ void scatterPointToGridKernel(
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numPoints
))
{
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numPoints
))
{
index
=
tv
::
ArrayIndexRowMajor
<
NDim
,
NDim
>::
runPtrs
(
index
=
tv
::
ArrayIndexRowMajor
<
NDim
,
NDim
>::
runPtrs
(
indexes
.
data
()
+
ix
*
NDim
,
gridShape
.
data
(),
0
);
indexes
.
data
()
+
ix
*
NDim
,
gridShape
.
data
(),
0
);
pointIndex
(
ix
)
=
index
;
pointIndex
(
ix
)
=
index
;
atomicAdd
(
numPointsPerGrid
.
data
()
+
index
,
Index
(
1
));
atomicAdd
(
numPointsPerGrid
.
data
()
+
index
,
Index
(
1
));
#pragma unroll
#pragma unroll
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
atomicAdd
(
grids
.
data
()
+
index
*
numFeatures
+
k
,
*
(
points
.
data
()
+
ix
*
numFeatures
+
k
));
atomicAdd
(
grids
.
data
()
+
index
*
numFeatures
+
k
,
*
(
points
.
data
()
+
ix
*
numFeatures
+
k
));
}
}
}
}
}
}
template
<
typename
Index
,
unsigned
NDim
>
template
<
typename
Index
,
unsigned
NDim
>
__global__
void
gatherPointFromGridKernel
(
__global__
void
tv
::
TensorView
<
const
float
>
grids
,
gatherPointFromGridKernel
(
tv
::
TensorView
<
const
float
>
grids
,
tv
::
TensorView
<
const
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
const
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
const
Index
>
pointIndexUnique
,
tv
::
TensorView
<
const
Index
>
pointIndexUnique
,
tv
::
TensorView
<
float
>
voxels
,
tv
::
TensorView
<
float
>
voxels
,
tv
::
TensorView
<
Index
>
coors
,
tv
::
TensorView
<
Index
>
coors
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
Index
index
;
Index
index
;
int
numVoxels
=
voxels
.
dim
(
0
);
int
numVoxels
=
voxels
.
dim
(
0
);
int
numFeatures
=
grids
.
dim
(
1
);
int
numFeatures
=
grids
.
dim
(
1
);
...
@@ -47,16 +46,15 @@ __global__ void gatherPointFromGridKernel(
...
@@ -47,16 +46,15 @@ __global__ void gatherPointFromGridKernel(
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
voxels
(
ix
,
k
)
=
grids
(
index
,
k
)
/
numPointsPerGrid
(
index
);
voxels
(
ix
,
k
)
=
grids
(
index
,
k
)
/
numPointsPerGrid
(
index
);
}
}
index
=
tv
::
rowArrayIdxInv
<
Index
,
NDim
>
(
index
=
tv
::
rowArrayIdxInv
<
Index
,
NDim
>
(
index
,
coors
.
data
()
+
ix
*
NDim
,
index
,
coors
.
data
()
+
ix
*
NDim
,
gridShape
.
data
());
gridShape
.
data
());
}
}
}
}
template
<
typename
Index
>
template
<
typename
Index
>
__global__
void
resetGridKernel
(
__global__
void
resetGridKernel
(
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndexUnique
)
{
tv
::
TensorView
<
Index
>
pointIndexUnique
)
{
Index
index
;
Index
index
;
int
numVoxels
=
pointIndexUnique
.
dim
(
0
)
-
1
;
int
numVoxels
=
pointIndexUnique
.
dim
(
0
)
-
1
;
int
numFeatures
=
grids
.
dim
(
1
);
int
numFeatures
=
grids
.
dim
(
1
);
...
@@ -72,8 +70,8 @@ __global__ void resetGridKernel(
...
@@ -72,8 +70,8 @@ __global__ void resetGridKernel(
}
}
template
<
typename
Index
>
template
<
typename
Index
>
__global__
void
resetPointIndexKernel
(
__global__
void
resetPointIndexKernel
(
tv
::
TensorView
<
Index
>
pointIndex
,
tv
::
TensorView
<
Index
>
pointIndex
,
const
Index
gridVolume
)
{
const
Index
gridVolume
)
{
int
num_max_points
=
pointIndex
.
dim
(
0
)
-
1
;
int
num_max_points
=
pointIndex
.
dim
(
0
)
-
1
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
num_max_points
))
{
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
num_max_points
))
{
...
...
include/spconv/point2voxel_ops.h
View file @
3517290c
...
@@ -21,15 +21,10 @@
...
@@ -21,15 +21,10 @@
namespace
spconv
{
namespace
spconv
{
int64_t
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
grids
,
torch
::
Tensor
indexes
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
torch
::
Tensor
grids
,
const
int64_t
ndim
);
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
);
}
// namespace spconv
}
// namespace spconv
include/spconv/points2voxels.h
View file @
3517290c
...
@@ -3,24 +3,20 @@
...
@@ -3,24 +3,20 @@
#include <tensorview/tensorview.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
#include <torch/script.h>
namespace
spconv
{
namespace
spconv
{
void
scatter_point_to_grid_cuda
(
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
points
,
torch
::
Tensor
grids
,
torch
::
Tensor
indexes
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
grids
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
numPointsPerGrid
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
torch
::
Tensor
pointIndex
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
void
gather_point_from_grid_cuda
(
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
const
int
ndim
);
}
// namespace spconv
}
// namespace spconv
include/spconv/spconv_ops.h
View file @
3517290c
...
@@ -23,9 +23,15 @@
...
@@ -23,9 +23,15 @@
namespace
spconv
{
namespace
spconv
{
enum
ConvAlgo
{
kNative
=
0
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
};
enum
ConvAlgo
{
using
all_conv_algos_t
=
kNative
=
0
,
tv
::
mp_list_c
<
int
,
kNative
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
>
;
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
};
using
all_conv_algos_t
=
tv
::
mp_list_c
<
int
,
kNative
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
>
;
// torch.jit's doc says only support int64, so we need to convert to int32.
// torch.jit's doc says only support int64, so we need to convert to int32.
std
::
vector
<
torch
::
Tensor
>
std
::
vector
<
torch
::
Tensor
>
...
...
include/tensorview/cc17.h
View file @
3517290c
...
@@ -7,9 +7,10 @@ Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
...
@@ -7,9 +7,10 @@ Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
Copyright (c) 2011-2013 NYU (Clement Farabet)
Copyright (c) 2011-2013 NYU (Clement Farabet)
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
Samy Bengio, Johnny Mariethoz)
From Caffe2:
From Caffe2:
...
@@ -17,23 +18,23 @@ Copyright (c) 2016-present, Facebook Inc. All rights reserved.
...
@@ -17,23 +18,23 @@ Copyright (c) 2016-present, Facebook Inc. All rights reserved.
All contributions by Facebook:
All contributions by Facebook:
Copyright (c) 2016 Facebook Inc.
Copyright (c) 2016 Facebook Inc.
All contributions by Google:
All contributions by Google:
Copyright (c) 2015 Google Inc.
Copyright (c) 2015 Google Inc.
All rights reserved.
All rights reserved.
All contributions by Yangqing Jia:
All contributions by Yangqing Jia:
Copyright (c) 2015 Yangqing Jia
Copyright (c) 2015 Yangqing Jia
All rights reserved.
All rights reserved.
All contributions from Caffe:
All contributions from Caffe:
Copyright(c) 2013, 2014, 2015, the respective contributors
Copyright(c) 2013, 2014, 2015, the respective contributors
All rights reserved.
All rights reserved.
All other contributions:
All other contributions:
Copyright(c) 2015, 2016 the respective contributors
Copyright(c) 2015, 2016 the respective contributors
All rights reserved.
All rights reserved.
Caffe2 uses a copyright model similar to Caffe: each contributor holds
Caffe2 uses a copyright model similar to Caffe: each contributor holds
copyright over their contributions to Caffe2. The project versioning records
copyright over their contributions to Caffe2. The project versioning records
all such contribution and copyright details. If a contributor wants to further
all such contribution and copyright details. If a contributor wants to further
...
@@ -53,8 +54,8 @@ modification, are permitted provided that the following conditions are met:
...
@@ -53,8 +54,8 @@ modification, are permitted provided that the following conditions are met:
notice, this list of conditions and the following disclaimer in the
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
documentation and/or other materials provided with the distribution.
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
America
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
and IDIAP Research Institute nor the names of its contributors may be
America
and IDIAP Research Institute nor the names of its contributors may be
used to endorse or promote products derived from this software without
used to endorse or promote products derived from this software without
specific prior written permission.
specific prior written permission.
...
@@ -75,7 +76,7 @@ POSSIBILITY OF SUCH DAMAGE.
...
@@ -75,7 +76,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include <utility>
#include <utility>
namespace
tv
{
namespace
tv
{
#ifdef __cpp_lib_void_t
#ifdef __cpp_lib_void_t
template
<
class
T
>
using
void_t
=
std
::
void_t
<
T
>
;
template
<
class
T
>
using
void_t
=
std
::
void_t
<
T
>
;
...
@@ -97,47 +98,67 @@ struct _identity final {
...
@@ -97,47 +98,67 @@ struct _identity final {
return
std
::
forward
<
T
>
(
arg
);
return
std
::
forward
<
T
>
(
arg
);
}
}
};
};
template
<
class
Func
,
class
Enable
=
void
>
template
<
class
Func
,
class
Enable
=
void
>
struct
function_takes_identity_argument
:
std
::
false_type
{};
struct
function_takes_identity_argument
:
std
::
false_type
{};
#if defined(_MSC_VER)
#if defined(_MSC_VER)
// For some weird reason, MSVC shows a compiler error when using guts::void_t instead of std::void_t.
// For some weird reason, MSVC shows a compiler error when using guts::void_t
// But we're only building on MSVC versions that have std::void_t, so let's just use that one.
// instead of std::void_t. But we're only building on MSVC versions that have
template
<
class
Func
>
// std::void_t, so let's just use that one.
struct
function_takes_identity_argument
<
Func
,
std
::
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
template
<
class
Func
>
struct
function_takes_identity_argument
<
Func
,
std
::
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
#else
#else
template
<
class
Func
>
template
<
class
Func
>
struct
function_takes_identity_argument
<
Func
,
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
struct
function_takes_identity_argument
<
Func
,
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
#endif
#endif
template
<
bool
Condition
>
template
<
bool
Condition
>
struct
_if_constexpr
;
struct
_if_constexpr
;
template
<
>
template
<
>
struct
_if_constexpr
<
true
>
final
{
struct
_if_constexpr
<
true
>
final
{
template
<
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
class
ThenCallback
,
class
ElseCallback
,
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
std
::
enable_if_t
<
function_takes_identity_argument
<
ThenCallback
>
::
value
,
// The _identity instance passed in can be used to delay evaluation of an expression,
void
*>
=
nullptr
>
// because the compiler can't know that it's just the identity we're passing in.
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
// The _identity instance passed in can be used to delay evaluation of an
// expression, because the compiler can't know that it's just the identity
// we're passing in.
return
thenCallback
(
_identity
());
return
thenCallback
(
_identity
());
}
}
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
template
<
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
return
thenCallback
();
return
thenCallback
();
}
}
};
};
template
<
>
template
<
>
struct
_if_constexpr
<
false
>
final
{
struct
_if_constexpr
<
false
>
final
{
template
<
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
class
ThenCallback
,
class
ElseCallback
,
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
std
::
enable_if_t
<
function_takes_identity_argument
<
ElseCallback
>
::
value
,
// The _identity instance passed in can be used to delay evaluation of an expression,
void
*>
=
nullptr
>
// because the compiler can't know that it's just the identity we're passing in.
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
// The _identity instance passed in can be used to delay evaluation of an
// expression, because the compiler can't know that it's just the identity
// we're passing in.
return
elseCallback
(
_identity
());
return
elseCallback
(
_identity
());
}
}
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
template
<
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
return
elseCallback
();
return
elseCallback
();
}
}
};
};
...
@@ -173,33 +194,40 @@ struct _if_constexpr<false> final {
...
@@ -173,33 +194,40 @@ struct _if_constexpr<false> final {
* template <class T>
* template <class T>
* int func(T t) {
* int func(T t) {
* return if_constexpr<std::is_same<T, MyClass1>::value>(
* return if_constexpr<std::is_same<T, MyClass1>::value>(
* [&](auto _) { return _(t).value; }, // this code is invalid for T == MyClass2, so a regular non-constexpr if statement wouldn't compile
* [&](auto _) { return _(t).value; }, // this code is invalid for T ==
* [&](auto _) { return _(t).val; } // this code is invalid for T == MyClass1
* MyClass2, so a regular non-constexpr if statement wouldn't compile
* [&](auto _) { return _(t).val; } // this code is invalid for T ==
* MyClass1
* );
* );
* }
* }
*
*
* Note: The _ argument passed in Example 3 is the identity function, i.e. it does nothing.
* Note: The _ argument passed in Example 3 is the identity function, i.e. it
* It is used to force the compiler to delay type checking, because the compiler
* does nothing. It is used to force the compiler to delay type checking,
* doesn't know what kind of _ is passed in. Without it, the compiler would fail
* because the compiler doesn't know what kind of _ is passed in. Without it,
* when you try to access t.value but the member doesn't exist.
* the compiler would fail when you try to access t.value but the member doesn't
* exist.
*
*
* Note: In Example 3, both branches return int, so func() returns int. This is not necessary.
* Note: In Example 3, both branches return int, so func() returns int. This is
* If func() had a return type of "auto", then both branches could return different
* not necessary. If func() had a return type of "auto", then both branches
* types, say func<MyClass1>() could return int and func<MyClass2>() could return string.
* could return different types, say func<MyClass1>() could return int and
* func<MyClass2>() could return string.
*/
*/
template
<
bool
Condition
,
class
ThenCallback
,
class
ElseCallback
>
template
<
bool
Condition
,
class
ThenCallback
,
class
ElseCallback
>
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
elseCallback
)
{
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
elseCallback
)
{
#if defined(__cpp_if_constexpr)
#if defined(__cpp_if_constexpr)
// If we have C++17, just use it's "if constexpr" feature instead of wrapping it.
// If we have C++17, just use it's "if constexpr" feature instead of wrapping
// This will give us better error messages.
// it. This will give us better error messages.
if
constexpr
(
Condition
)
{
if
constexpr
(
Condition
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)(
detail
::
_identity
());
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)(
detail
::
_identity
());
}
else
{
}
else
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)();
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)();
}
}
}
else
{
}
else
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ElseCallback
>::
value
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ElseCallback
>::
value
)
{
return
std
::
forward
<
ElseCallback
>
(
elseCallback
)(
detail
::
_identity
());
return
std
::
forward
<
ElseCallback
>
(
elseCallback
)(
detail
::
_identity
());
}
else
{
}
else
{
return
std
::
forward
<
ElseCallback
>
(
elseCallback
)();
return
std
::
forward
<
ElseCallback
>
(
elseCallback
)();
...
@@ -207,18 +235,20 @@ decltype(auto) if_constexpr(ThenCallback&& thenCallback, ElseCallback&& elseCall
...
@@ -207,18 +235,20 @@ decltype(auto) if_constexpr(ThenCallback&& thenCallback, ElseCallback&& elseCall
}
}
#else
#else
// C++14 implementation of if constexpr
// C++14 implementation of if constexpr
return
detail
::
_if_constexpr
<
Condition
>::
call
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
return
detail
::
_if_constexpr
<
Condition
>::
call
(
std
::
forward
<
ElseCallback
>
(
elseCallback
));
std
::
forward
<
ThenCallback
>
(
thenCallback
),
std
::
forward
<
ElseCallback
>
(
elseCallback
));
#endif
#endif
}
}
template
<
bool
Condition
,
class
ThenCallback
>
template
<
bool
Condition
,
class
ThenCallback
>
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
)
{
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
)
{
#if defined(__cpp_if_constexpr)
#if defined(__cpp_if_constexpr)
// If we have C++17, just use it's "if constexpr" feature instead of wrapping it.
// If we have C++17, just use it's "if constexpr" feature instead of wrapping
// This will give us better error messages.
// it. This will give us better error messages.
if
constexpr
(
Condition
)
{
if
constexpr
(
Condition
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)(
detail
::
_identity
());
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)(
detail
::
_identity
());
}
else
{
}
else
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)();
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)();
...
@@ -226,9 +256,9 @@ decltype(auto) if_constexpr(ThenCallback&& thenCallback) {
...
@@ -226,9 +256,9 @@ decltype(auto) if_constexpr(ThenCallback&& thenCallback) {
}
}
#else
#else
// C++14 implementation of if constexpr
// C++14 implementation of if constexpr
return
if_constexpr
<
Condition
>
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
[]
(
auto
)
{});
return
if_constexpr
<
Condition
>
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
[](
auto
)
{});
#endif
#endif
}
}
}
// namespace tv
}
include/tensorview/tensor.h
View file @
3517290c
...
@@ -22,13 +22,13 @@ If you can use libtorch, dont use tv::Tensor.
...
@@ -22,13 +22,13 @@ If you can use libtorch, dont use tv::Tensor.
*/
*/
#pragma once
#pragma once
#include "cc17.h"
#include "mp_helper.h"
#include "mp_helper.h"
#include "tensorview.h"
#include "tensorview.h"
#include <cstring>
#include <cstring>
#include <iomanip>
#include <iomanip>
#include <memory>
#include <memory>
#include <type_traits>
#include <type_traits>
#include "cc17.h"
#ifdef TV_CUDA
#ifdef TV_CUDA
#include <cuda_fp16.h>
#include <cuda_fp16.h>
#include <cuda_runtime.h>
#include <cuda_runtime.h>
...
@@ -632,26 +632,31 @@ struct Tensor {
...
@@ -632,26 +632,31 @@ struct Tensor {
tview
()
const
{
tview
()
const
{
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
return
if_constexpr
<
(
Rank
>
0
)
>
([
&
](
auto
_
){
return
if_constexpr
<
(
Rank
>
0
)
>
(
TV_ASSERT_RT_ERR
(
Rank
==
ndim
(),
"error"
);
[
&
](
auto
_
)
{
ShapeBase
<
_
(
Rank
)
==
-
1
?
TV_MAX_DIM
:
Rank
,
Tindex
>
shape
(
Rank
),
stride
(
Rank
);
TV_ASSERT_RT_ERR
(
Rank
==
ndim
(),
"error"
);
for
(
int
i
=
0
;
i
<
Rank
;
++
i
)
{
ShapeBase
<
_
(
Rank
)
==
-
1
?
TV_MAX_DIM
:
Rank
,
Tindex
>
shape
(
Rank
),
shape
[
i
]
=
shape_
[
i
];
stride
(
Rank
);
stride
[
i
]
=
stride_
[
i
];
for
(
int
i
=
0
;
i
<
Rank
;
++
i
)
{
}
shape
[
i
]
=
shape_
[
i
];
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
stride
[
i
]
=
stride_
[
i
];
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
}
stride
);
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
},
[
&
](
auto
_
){
Tindex
>
(
ShapeBase
<
TV_MAX_DIM
,
Tindex
>
shape
(
_
(
ndim
())),
stride
(
ndim
());
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
for
(
int
i
=
0
;
i
<
int
(
ndim
());
++
i
)
{
shape
,
stride
);
shape
[
i
]
=
shape_
[
i
];
},
stride
[
i
]
=
stride_
[
i
];
[
&
](
auto
_
)
{
}
ShapeBase
<
TV_MAX_DIM
,
Tindex
>
shape
(
_
(
ndim
())),
stride
(
ndim
());
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
for
(
int
i
=
0
;
i
<
int
(
ndim
());
++
i
)
{
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
shape
[
i
]
=
shape_
[
i
];
stride
);
stride
[
i
]
=
stride_
[
i
];
});
}
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
});
}
}
template
<
class
...
Inds
>
Tensor
view
(
Inds
...
newShapes
)
const
{
template
<
class
...
Inds
>
Tensor
view
(
Inds
...
newShapes
)
const
{
static_assert
(
sizeof
...(
newShapes
)
>
0
,
"dont support empty for now"
);
static_assert
(
sizeof
...(
newShapes
)
>
0
,
"dont support empty for now"
);
...
...
include/tensorview/tools.h
View file @
3517290c
...
@@ -36,22 +36,21 @@ template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
...
@@ -36,22 +36,21 @@ template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
return
res
;
return
res
;
}
}
template
<
int
Count
,
typename
F
>
template
<
int
Count
,
typename
F
>
double
benchmark
(
F
&&
f
,
int
start
=
int
(
Count
)
*
0.3
){
double
benchmark
(
F
&&
f
,
int
start
=
int
(
Count
)
*
0.3
)
{
// std::vector<TimeT::rep> times;
// std::vector<TimeT::rep> times;
auto
res
=
typename
TimeT
::
rep
();
auto
res
=
typename
TimeT
::
rep
();
int
count
=
0
;
int
count
=
0
;
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
for
(
int
i
=
0
;
i
<
Count
;
++
i
){
for
(
int
i
=
0
;
i
<
Count
;
++
i
)
{
std
::
forward
<
F
>
(
f
)();
std
::
forward
<
F
>
(
f
)();
auto
time
=
report
();
auto
time
=
report
();
if
(
i
>=
start
){
if
(
i
>=
start
)
{
// times.push_back(time)
// times.push_back(time)
res
+=
time
;
res
+=
time
;
count
+=
1
;
count
+=
1
;
}
}
}
}
return
res
/
double
(
count
);
return
res
/
double
(
count
);
}
}
private:
private:
...
...
include/tensorview/torch_utils.h
View file @
3517290c
...
@@ -14,9 +14,9 @@
...
@@ -14,9 +14,9 @@
#pragma once
#pragma once
#include "mp_helper.h"
#include "mp_helper.h"
#include <tensorview/tensorview.h>
#include <tensorview/tensor.h>
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <tensorview/tensor.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
#include <torch/script.h>
#ifdef TV_CUDA
#ifdef TV_CUDA
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAContext.h>
...
...
spconv/__init__.py
View file @
3517290c
...
@@ -22,6 +22,7 @@ from spconv import ops, utils
...
@@ -22,6 +22,7 @@ from spconv import ops, utils
from
spconv.conv
import
(
SparseConv2d
,
SparseConv3d
,
SparseConvTranspose2d
,
from
spconv.conv
import
(
SparseConv2d
,
SparseConv3d
,
SparseConvTranspose2d
,
SparseConvTranspose3d
,
SparseInverseConv2d
,
SparseConvTranspose3d
,
SparseInverseConv2d
,
SparseInverseConv3d
,
SubMConv2d
,
SubMConv3d
)
SparseInverseConv3d
,
SubMConv2d
,
SubMConv3d
)
from
spconv.core
import
SparseConvTensor
from
spconv.identity
import
Identity
from
spconv.identity
import
Identity
from
spconv.modules
import
SparseModule
,
SparseSequential
from
spconv.modules
import
SparseModule
,
SparseSequential
from
spconv.ops
import
ConvAlgo
from
spconv.ops
import
ConvAlgo
...
@@ -35,85 +36,6 @@ _LIB_PATH = str(Path(__file__).parent / _LIB_FILE_NAME)
...
@@ -35,85 +36,6 @@ _LIB_PATH = str(Path(__file__).parent / _LIB_FILE_NAME)
torch
.
ops
.
load_library
(
_LIB_PATH
)
torch
.
ops
.
load_library
(
_LIB_PATH
)
def
scatter_nd
(
indices
,
updates
,
shape
):
"""pytorch edition of tensorflow scatter_nd.
this function don't contain except handle code. so use this carefully
when indice repeats, don't support repeat add which is supported
in tensorflow.
"""
ret
=
torch
.
zeros
(
*
shape
,
dtype
=
updates
.
dtype
,
device
=
updates
.
device
)
ndim
=
indices
.
shape
[
-
1
]
output_shape
=
list
(
indices
.
shape
[:
-
1
])
+
shape
[
indices
.
shape
[
-
1
]:]
flatted_indices
=
indices
.
view
(
-
1
,
ndim
)
slices
=
[
flatted_indices
[:,
i
]
for
i
in
range
(
ndim
)]
slices
+=
[
Ellipsis
]
ret
[
slices
]
=
updates
.
view
(
*
output_shape
)
return
ret
class
SparseConvTensor
(
object
):
def
__init__
(
self
,
features
,
indices
,
spatial_shape
,
batch_size
,
grid
=
None
):
"""
Args:
features: [num_points, num_features] feature tensor
indices: [num_points, ndim + 1] indice tensor. batch index saved in indices[:, 0]
spatial_shape: spatial shape of your sparse data
batch_size: batch size of your sparse data
grid: pre-allocated grid tensor. should be used when the volume of spatial shape
is very large.
"""
self
.
features
=
features
self
.
indices
=
indices
self
.
spatial_shape
=
spatial_shape
self
.
batch_size
=
batch_size
self
.
indice_dict
=
{}
if
grid
is
None
:
grid
=
torch
.
Tensor
()
# empty tensor
self
.
grid
=
grid
@
classmethod
def
from_dense
(
cls
,
x
:
torch
.
Tensor
):
"""create sparse tensor fron channel last dense tensor by to_sparse
x must be NHWC tensor, channel last
"""
x
=
x
.
to_sparse
(
x
.
ndim
-
1
)
spatial_shape
=
x
.
shape
[
1
:
-
1
]
batch_size
=
x
.
shape
[
0
]
indices_th
=
x
.
indices
().
permute
(
1
,
0
).
contiguous
().
int
()
features_th
=
x
.
values
()
return
cls
(
features_th
,
indices_th
,
spatial_shape
,
batch_size
)
@
property
def
spatial_size
(
self
):
return
np
.
prod
(
self
.
spatial_shape
)
def
find_indice_pair
(
self
,
key
):
if
key
is
None
:
return
None
if
key
in
self
.
indice_dict
:
return
self
.
indice_dict
[
key
]
return
None
def
dense
(
self
,
channels_first
=
True
):
output_shape
=
[
self
.
batch_size
]
+
list
(
self
.
spatial_shape
)
+
[
self
.
features
.
shape
[
1
]]
res
=
scatter_nd
(
self
.
indices
.
to
(
self
.
features
.
device
).
long
(),
self
.
features
,
output_shape
)
if
not
channels_first
:
return
res
ndim
=
len
(
self
.
spatial_shape
)
trans_params
=
list
(
range
(
0
,
ndim
+
1
))
trans_params
.
insert
(
1
,
ndim
+
1
)
return
res
.
permute
(
*
trans_params
).
contiguous
()
@
property
def
sparity
(
self
):
return
self
.
indices
.
shape
[
0
]
/
np
.
prod
(
self
.
spatial_shape
)
/
self
.
batch_size
class
ToDense
(
SparseModule
):
class
ToDense
(
SparseModule
):
"""convert SparseConvTensor to NCHW dense tensor.
"""convert SparseConvTensor to NCHW dense tensor.
"""
"""
...
...
spconv/conv.py
View file @
3517290c
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
import
spconv
import
spconv
import
spconv.functional
as
Fsp
import
spconv.functional
as
Fsp
from
spconv
import
ops
from
spconv
import
ops
from
spconv.core
import
IndiceData
,
SparseConvTensor
from
spconv.modules
import
SparseModule
from
spconv.modules
import
SparseModule
...
@@ -72,8 +73,9 @@ class SparseConvolution(SparseModule):
...
@@ -72,8 +73,9 @@ class SparseConvolution(SparseModule):
indice_key
=
None
,
indice_key
=
None
,
fused_bn
=
False
,
fused_bn
=
False
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
super
(
SparseConvolution
,
self
).
__init__
()
name
=
None
):
super
(
SparseConvolution
,
self
).
__init__
(
name
=
name
)
assert
groups
==
1
assert
groups
==
1
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
kernel_size
=
[
kernel_size
]
*
ndim
kernel_size
=
[
kernel_size
]
*
ndim
...
@@ -123,8 +125,8 @@ class SparseConvolution(SparseModule):
...
@@ -123,8 +125,8 @@ class SparseConvolution(SparseModule):
bound
=
1
/
math
.
sqrt
(
fan_in
)
bound
=
1
/
math
.
sqrt
(
fan_in
)
init
.
uniform_
(
self
.
bias
,
-
bound
,
bound
)
init
.
uniform_
(
self
.
bias
,
-
bound
,
bound
)
def
forward
(
self
,
input
):
def
forward
(
self
,
input
:
SparseConvTensor
):
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
assert
isinstance
(
input
,
SparseConvTensor
)
features
=
input
.
features
features
=
input
.
features
device
=
features
.
device
device
=
features
.
device
indices
=
input
.
indices
indices
=
input
.
indices
...
@@ -143,29 +145,58 @@ class SparseConvolution(SparseModule):
...
@@ -143,29 +145,58 @@ class SparseConvolution(SparseModule):
out_spatial_shape
=
spatial_shape
out_spatial_shape
=
spatial_shape
# input.update_grid(out_spatial_shape)
# input.update_grid(out_spatial_shape)
# t = time.time()
# t = time.time()
out_tensor
=
input
.
shadow_copy
()
if
input
.
benchmark
:
if
self
.
name
is
None
:
raise
ValueError
(
"you need to assign name to spmodules before benchmark (spconv.utils.bench.assign_name_to_spmod)"
)
if
self
.
name
not
in
input
.
benchmark_record
:
input
.
benchmark_record
[
self
.
name
]
=
{
"type"
:
"SparseConvolution"
,
"indice_gen_time"
:
[],
"time"
:
[],
"num_points"
:
[],
"num_out_points"
:
[],
"params"
:
{
"kernel_size"
:
self
.
kernel_size
,
"stride"
:
self
.
stride
,
"padding"
:
self
.
padding
,
"dilation"
:
self
.
dilation
,
"output_padding"
:
self
.
output_padding
,
"subm"
:
self
.
subm
,
"transposed"
:
self
.
transposed
,
"input_channels"
:
self
.
in_channels
,
"out_channels"
:
self
.
out_channels
,
}
}
if
self
.
conv1x1
:
if
self
.
conv1x1
:
features
=
torch
.
mm
(
features
=
torch
.
mm
(
input
.
features
,
input
.
features
,
self
.
weight
.
view
(
self
.
in_channels
,
self
.
out_channels
))
self
.
weight
.
view
(
self
.
in_channels
,
self
.
out_channels
))
if
self
.
bias
is
not
None
:
if
self
.
bias
is
not
None
:
features
+=
self
.
bias
features
+=
self
.
bias
out_tensor
=
spconv
.
SparseConvTensor
(
features
,
input
.
indices
,
out_tensor
.
features
=
features
input
.
spatial_shape
,
input
.
batch_size
)
out_tensor
.
indice_dict
=
input
.
indice_dict
out_tensor
.
grid
=
input
.
grid
return
out_tensor
return
out_tensor
datas
=
input
.
find_indice_pair
(
self
.
indice_key
)
datas
=
input
.
find_indice_pair
(
self
.
indice_key
)
if
self
.
inverse
:
if
self
.
inverse
:
assert
datas
is
not
None
and
self
.
indice_key
is
not
None
assert
datas
is
not
None
and
self
.
indice_key
is
not
None
_
,
outids
,
indice_pairs
,
indice_pair_num
,
out_spatial_shape
=
datas
outids
=
datas
.
indices
indice_pairs
=
datas
.
indice_pairs
indice_pair_num
=
datas
.
indice_pair_num
out_spatial_shape
=
datas
.
out_spatial_shape
assert
indice_pair_num
.
shape
[
0
]
==
np
.
prod
(
assert
indice_pair_num
.
shape
[
0
]
==
np
.
prod
(
self
.
kernel_size
self
.
kernel_size
),
"inverse conv must have same kernel size as its couple conv"
),
"inverse conv must have same kernel size as its couple conv"
else
:
else
:
if
self
.
indice_key
is
not
None
and
datas
is
not
None
:
if
self
.
indice_key
is
not
None
and
datas
is
not
None
:
outids
,
_
,
indice_pairs
,
indice_pair_num
,
_
=
datas
outids
=
datas
.
out_indices
indice_pairs
=
datas
.
indice_pairs
indice_pair_num
=
datas
.
indice_pair_num
else
:
else
:
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
outids
,
indice_pairs
,
indice_pair_num
=
ops
.
get_indice_pairs
(
outids
,
indice_pairs
,
indice_pair_num
=
ops
.
get_indice_pairs
(
indices
,
indices
,
batch_size
,
batch_size
,
...
@@ -179,10 +210,19 @@ class SparseConvolution(SparseModule):
...
@@ -179,10 +210,19 @@ class SparseConvolution(SparseModule):
self
.
transposed
,
self
.
transposed
,
grid
=
input
.
grid
,
grid
=
input
.
grid
,
use_hash
=
self
.
use_hash
)
use_hash
=
self
.
use_hash
)
input
.
indice_dict
[
self
.
indice_key
]
=
(
outids
,
indices
,
if
input
.
benchmark
:
indice_pairs
,
torch
.
cuda
.
synchronize
()
indice_pair_num
,
interval
=
time
.
time
()
-
t
spatial_shape
)
out_tensor
.
benchmark_record
[
self
.
name
][
"indice_gen_time"
].
append
(
interval
)
indice_data
=
IndiceData
(
outids
,
indices
,
indice_pairs
,
indice_pair_num
,
spatial_shape
)
input
.
indice_dict
[
self
.
indice_key
]
=
indice_data
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
if
self
.
fused_bn
:
if
self
.
fused_bn
:
assert
self
.
bias
is
not
None
assert
self
.
bias
is
not
None
out_features
=
ops
.
fused_indice_conv
(
features
,
self
.
weight
,
out_features
=
ops
.
fused_indice_conv
(
features
,
self
.
weight
,
...
@@ -210,10 +250,18 @@ class SparseConvolution(SparseModule):
...
@@ -210,10 +250,18 @@ class SparseConvolution(SparseModule):
if
self
.
bias
is
not
None
:
if
self
.
bias
is
not
None
:
out_features
+=
self
.
bias
out_features
+=
self
.
bias
out_tensor
=
spconv
.
SparseConvTensor
(
out_features
,
outids
,
if
input
.
benchmark
:
out_spatial_shape
,
batch_size
)
torch
.
cuda
.
synchronize
()
out_tensor
.
indice_dict
=
input
.
indice_dict
interval
=
time
.
time
()
-
t
out_tensor
.
grid
=
input
.
grid
out_tensor
.
benchmark_record
[
self
.
name
][
"time"
].
append
(
interval
)
out_tensor
.
benchmark_record
[
self
.
name
][
"num_points"
].
append
(
features
.
shape
[
0
])
out_tensor
.
benchmark_record
[
self
.
name
][
"num_out_points"
].
append
(
out_features
.
shape
[
0
])
out_tensor
.
features
=
out_features
out_tensor
.
indices
=
outids
out_tensor
.
spatial_shape
=
out_spatial_shape
return
out_tensor
return
out_tensor
...
@@ -229,7 +277,8 @@ class SparseConv2d(SparseConvolution):
...
@@ -229,7 +277,8 @@ class SparseConv2d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConv2d
,
self
).
__init__
(
2
,
super
(
SparseConv2d
,
self
).
__init__
(
2
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -241,7 +290,8 @@ class SparseConv2d(SparseConvolution):
...
@@ -241,7 +290,8 @@ class SparseConv2d(SparseConvolution):
bias
,
bias
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConv3d
(
SparseConvolution
):
class
SparseConv3d
(
SparseConvolution
):
...
@@ -256,7 +306,8 @@ class SparseConv3d(SparseConvolution):
...
@@ -256,7 +306,8 @@ class SparseConv3d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConv3d
,
self
).
__init__
(
3
,
super
(
SparseConv3d
,
self
).
__init__
(
3
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -268,7 +319,8 @@ class SparseConv3d(SparseConvolution):
...
@@ -268,7 +319,8 @@ class SparseConv3d(SparseConvolution):
bias
,
bias
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConv4d
(
SparseConvolution
):
class
SparseConv4d
(
SparseConvolution
):
...
@@ -283,7 +335,8 @@ class SparseConv4d(SparseConvolution):
...
@@ -283,7 +335,8 @@ class SparseConv4d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConv4d
,
self
).
__init__
(
4
,
super
(
SparseConv4d
,
self
).
__init__
(
4
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -295,7 +348,8 @@ class SparseConv4d(SparseConvolution):
...
@@ -295,7 +348,8 @@ class SparseConv4d(SparseConvolution):
bias
,
bias
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConvTranspose2d
(
SparseConvolution
):
class
SparseConvTranspose2d
(
SparseConvolution
):
...
@@ -310,7 +364,8 @@ class SparseConvTranspose2d(SparseConvolution):
...
@@ -310,7 +364,8 @@ class SparseConvTranspose2d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvTranspose2d
,
self
).
__init__
(
2
,
super
(
SparseConvTranspose2d
,
self
).
__init__
(
2
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -323,7 +378,8 @@ class SparseConvTranspose2d(SparseConvolution):
...
@@ -323,7 +378,8 @@ class SparseConvTranspose2d(SparseConvolution):
transposed
=
True
,
transposed
=
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConvTranspose3d
(
SparseConvolution
):
class
SparseConvTranspose3d
(
SparseConvolution
):
...
@@ -338,7 +394,8 @@ class SparseConvTranspose3d(SparseConvolution):
...
@@ -338,7 +394,8 @@ class SparseConvTranspose3d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvTranspose3d
,
self
).
__init__
(
3
,
super
(
SparseConvTranspose3d
,
self
).
__init__
(
3
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -351,7 +408,8 @@ class SparseConvTranspose3d(SparseConvolution):
...
@@ -351,7 +408,8 @@ class SparseConvTranspose3d(SparseConvolution):
transposed
=
True
,
transposed
=
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseInverseConv2d
(
SparseConvolution
):
class
SparseInverseConv2d
(
SparseConvolution
):
...
@@ -361,7 +419,8 @@ class SparseInverseConv2d(SparseConvolution):
...
@@ -361,7 +419,8 @@ class SparseInverseConv2d(SparseConvolution):
kernel_size
,
kernel_size
,
indice_key
,
indice_key
,
bias
=
True
,
bias
=
True
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseInverseConv2d
,
self
).
__init__
(
2
,
super
(
SparseInverseConv2d
,
self
).
__init__
(
2
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -369,7 +428,8 @@ class SparseInverseConv2d(SparseConvolution):
...
@@ -369,7 +428,8 @@ class SparseInverseConv2d(SparseConvolution):
bias
=
bias
,
bias
=
bias
,
inverse
=
True
,
inverse
=
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseInverseConv3d
(
SparseConvolution
):
class
SparseInverseConv3d
(
SparseConvolution
):
...
@@ -379,7 +439,8 @@ class SparseInverseConv3d(SparseConvolution):
...
@@ -379,7 +439,8 @@ class SparseInverseConv3d(SparseConvolution):
kernel_size
,
kernel_size
,
indice_key
,
indice_key
,
bias
=
True
,
bias
=
True
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseInverseConv3d
,
self
).
__init__
(
3
,
super
(
SparseInverseConv3d
,
self
).
__init__
(
3
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -387,7 +448,8 @@ class SparseInverseConv3d(SparseConvolution):
...
@@ -387,7 +448,8 @@ class SparseInverseConv3d(SparseConvolution):
bias
=
bias
,
bias
=
bias
,
inverse
=
True
,
inverse
=
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SubMConv2d
(
SparseConvolution
):
class
SubMConv2d
(
SparseConvolution
):
...
@@ -402,7 +464,8 @@ class SubMConv2d(SparseConvolution):
...
@@ -402,7 +464,8 @@ class SubMConv2d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SubMConv2d
,
self
).
__init__
(
2
,
super
(
SubMConv2d
,
self
).
__init__
(
2
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -415,7 +478,8 @@ class SubMConv2d(SparseConvolution):
...
@@ -415,7 +478,8 @@ class SubMConv2d(SparseConvolution):
True
,
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SubMConv3d
(
SparseConvolution
):
class
SubMConv3d
(
SparseConvolution
):
...
@@ -430,7 +494,8 @@ class SubMConv3d(SparseConvolution):
...
@@ -430,7 +494,8 @@ class SubMConv3d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SubMConv3d
,
self
).
__init__
(
3
,
super
(
SubMConv3d
,
self
).
__init__
(
3
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -443,7 +508,8 @@ class SubMConv3d(SparseConvolution):
...
@@ -443,7 +508,8 @@ class SubMConv3d(SparseConvolution):
True
,
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SubMConv4d
(
SparseConvolution
):
class
SubMConv4d
(
SparseConvolution
):
...
@@ -458,7 +524,8 @@ class SubMConv4d(SparseConvolution):
...
@@ -458,7 +524,8 @@ class SubMConv4d(SparseConvolution):
bias
=
True
,
bias
=
True
,
indice_key
=
None
,
indice_key
=
None
,
use_hash
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SubMConv4d
,
self
).
__init__
(
4
,
super
(
SubMConv4d
,
self
).
__init__
(
4
,
in_channels
,
in_channels
,
out_channels
,
out_channels
,
...
@@ -471,4 +538,5 @@ class SubMConv4d(SparseConvolution):
...
@@ -471,4 +538,5 @@ class SubMConv4d(SparseConvolution):
True
,
True
,
indice_key
=
indice_key
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
spconv/core.py
0 → 100644
View file @
3517290c
from
typing
import
Optional
import
numpy
as
np
import
torch
class
IndiceData
(
object
):
def
__init__
(
self
,
out_indices
,
indices
,
indice_pairs
,
indice_pair_num
,
out_spatial_shape
):
self
.
out_indices
=
out_indices
self
.
indices
=
indices
self
.
indice_pairs
=
indice_pairs
self
.
indice_pair_num
=
indice_pair_num
self
.
out_spatial_shape
=
out_spatial_shape
def
scatter_nd
(
indices
,
updates
,
shape
):
"""pytorch edition of tensorflow scatter_nd.
this function don't contain except handle code. so use this carefully
when indice repeats, don't support repeat add which is supported
in tensorflow.
"""
ret
=
torch
.
zeros
(
*
shape
,
dtype
=
updates
.
dtype
,
device
=
updates
.
device
)
ndim
=
indices
.
shape
[
-
1
]
output_shape
=
list
(
indices
.
shape
[:
-
1
])
+
shape
[
indices
.
shape
[
-
1
]:]
flatted_indices
=
indices
.
view
(
-
1
,
ndim
)
slices
=
[
flatted_indices
[:,
i
]
for
i
in
range
(
ndim
)]
slices
+=
[
Ellipsis
]
ret
[
slices
]
=
updates
.
view
(
*
output_shape
)
return
ret
class
SparseConvTensor
(
object
):
def
__init__
(
self
,
features
,
indices
,
spatial_shape
,
batch_size
,
grid
=
None
,
benchmark
=
False
):
"""
Args:
features: [num_points, num_features] feature tensor
indices: [num_points, ndim + 1] indice tensor. batch index saved in indices[:, 0]
spatial_shape: spatial shape of your sparse data
batch_size: batch size of your sparse data
grid: pre-allocated grid tensor. should be used when the volume of spatial shape
is very large.
benchmark: whether to enable benchmark. if enabled, all sparse operators will be record to
SparseConvTensor.
"""
self
.
features
=
features
self
.
indices
=
indices
self
.
spatial_shape
=
spatial_shape
self
.
batch_size
=
batch_size
self
.
indice_dict
=
{}
if
grid
is
None
:
grid
=
torch
.
Tensor
()
# empty tensor
self
.
grid
=
grid
self
.
benchmark
=
benchmark
self
.
benchmark_record
=
{}
@
classmethod
def
from_dense
(
cls
,
x
:
torch
.
Tensor
):
"""create sparse tensor fron channel last dense tensor by to_sparse
x must be NHWC tensor, channel last
"""
x
=
x
.
to_sparse
(
x
.
ndim
-
1
)
spatial_shape
=
x
.
shape
[
1
:
-
1
]
batch_size
=
x
.
shape
[
0
]
indices_th
=
x
.
indices
().
permute
(
1
,
0
).
contiguous
().
int
()
features_th
=
x
.
values
()
return
cls
(
features_th
,
indices_th
,
spatial_shape
,
batch_size
)
@
property
def
spatial_size
(
self
):
return
np
.
prod
(
self
.
spatial_shape
)
def
find_indice_pair
(
self
,
key
)
->
Optional
[
IndiceData
]:
if
key
is
None
:
return
None
if
key
in
self
.
indice_dict
:
return
self
.
indice_dict
[
key
]
return
None
def
dense
(
self
,
channels_first
=
True
):
output_shape
=
[
self
.
batch_size
]
+
list
(
self
.
spatial_shape
)
+
[
self
.
features
.
shape
[
1
]]
res
=
scatter_nd
(
self
.
indices
.
to
(
self
.
features
.
device
).
long
(),
self
.
features
,
output_shape
)
if
not
channels_first
:
return
res
ndim
=
len
(
self
.
spatial_shape
)
trans_params
=
list
(
range
(
0
,
ndim
+
1
))
trans_params
.
insert
(
1
,
ndim
+
1
)
return
res
.
permute
(
*
trans_params
).
contiguous
()
@
property
def
sparity
(
self
):
return
self
.
indices
.
shape
[
0
]
/
np
.
prod
(
self
.
spatial_shape
)
/
self
.
batch_size
def
shadow_copy
(
self
)
->
"SparseConvTensor"
:
"""create a new spconv tensor with all member unchanged"""
tensor
=
SparseConvTensor
(
self
.
features
,
self
.
indices
,
self
.
spatial_shape
,
self
.
batch_size
,
self
.
grid
,
self
.
benchmark
)
tensor
.
benchmark_record
=
self
.
benchmark_record
tensor
.
indice_dict
=
self
.
indice_dict
return
tensor
spconv/modules.py
View file @
3517290c
...
@@ -49,7 +49,9 @@ def _mean_update(vals, m_vals, t):
...
@@ -49,7 +49,9 @@ def _mean_update(vals, m_vals, t):
class
SparseModule
(
nn
.
Module
):
class
SparseModule
(
nn
.
Module
):
""" place holder, all module subclass from this will take sptensor in SparseSequential.
""" place holder, all module subclass from this will take sptensor in SparseSequential.
"""
"""
pass
def
__init__
(
self
,
name
=
None
):
super
().
__init__
()
self
.
name
=
name
class
SparseSequential
(
SparseModule
):
class
SparseSequential
(
SparseModule
):
...
...
spconv/ops.py
View file @
3517290c
...
@@ -24,7 +24,8 @@ class ConvAlgo(Enum):
...
@@ -24,7 +24,8 @@ class ConvAlgo(Enum):
Batch
=
1
# high memory cost, faster when number of points is small (< 50000)
Batch
=
1
# high memory cost, faster when number of points is small (< 50000)
BatchGemmGather
=
2
# high memory cost, faster when number of points medium
BatchGemmGather
=
2
# high memory cost, faster when number of points medium
SparseConvNet
=
3
SparseConvNet
=
3
Minkowski
=
4
# https://github.com/StanfordVL/MinkowskiEngine/blob/master/src/convolution.cu
Minkowski
=
4
# https://github.com/StanfordVL/MinkowskiEngine/blob/master/src/convolution.cu
def
get_conv_output_size
(
input_size
,
kernel_size
,
stride
,
padding
,
dilation
):
def
get_conv_output_size
(
input_size
,
kernel_size
,
stride
,
padding
,
dilation
):
ndim
=
len
(
input_size
)
ndim
=
len
(
input_size
)
...
...
spconv/pool.py
View file @
3517290c
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
import
spconv
import
spconv
import
spconv.functional
as
Fsp
import
spconv.functional
as
Fsp
from
spconv
import
ops
from
spconv
import
ops
from
spconv.core
import
IndiceData
from
spconv.modules
import
SparseModule
from
spconv.modules
import
SparseModule
...
@@ -34,8 +35,10 @@ class SparseMaxPool(SparseModule):
...
@@ -34,8 +35,10 @@ class SparseMaxPool(SparseModule):
stride
=
None
,
stride
=
None
,
padding
=
0
,
padding
=
0
,
dilation
=
1
,
dilation
=
1
,
subm
=
False
):
indice_key
=
None
,
super
(
SparseMaxPool
,
self
).
__init__
()
subm
=
False
,
name
=
None
):
super
(
SparseMaxPool
,
self
).
__init__
(
name
=
name
)
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
kernel_size
=
[
kernel_size
]
*
ndim
kernel_size
=
[
kernel_size
]
*
ndim
if
stride
is
None
:
if
stride
is
None
:
...
@@ -52,6 +55,7 @@ class SparseMaxPool(SparseModule):
...
@@ -52,6 +55,7 @@ class SparseMaxPool(SparseModule):
self
.
padding
=
padding
self
.
padding
=
padding
self
.
subm
=
subm
self
.
subm
=
subm
self
.
dilation
=
dilation
self
.
dilation
=
dilation
self
.
indice_key
=
indice_key
def
forward
(
self
,
input
):
def
forward
(
self
,
input
):
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
...
@@ -66,6 +70,32 @@ class SparseMaxPool(SparseModule):
...
@@ -66,6 +70,32 @@ class SparseMaxPool(SparseModule):
self
.
dilation
)
self
.
dilation
)
else
:
else
:
out_spatial_shape
=
spatial_shape
out_spatial_shape
=
spatial_shape
out_tensor
=
input
.
shadow_copy
()
if
input
.
benchmark
:
if
self
.
name
is
None
:
raise
ValueError
(
"you need to assign name to spmodules before benchmark (spconv.utils.bench.assign_name_to_spmod)"
)
if
self
.
name
not
in
input
.
benchmark_record
:
input
.
benchmark_record
[
self
.
name
]
=
{
"type"
:
"SparseMaxPool"
,
"indice_gen_time"
:
[],
"time"
:
[],
"num_points"
:
[],
"num_out_points"
:
[],
"params"
:
{
"kernel_size"
:
self
.
kernel_size
,
"stride"
:
self
.
stride
,
"padding"
:
self
.
padding
,
"dilation"
:
self
.
dilation
,
"channels"
:
features
.
shape
[
1
],
}
}
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
outids
,
indice_pairs
,
indice_pairs_num
=
ops
.
get_indice_pairs
(
outids
,
indice_pairs
,
indice_pairs_num
=
ops
.
get_indice_pairs
(
indices
,
indices
,
batch_size
,
batch_size
,
...
@@ -77,24 +107,65 @@ class SparseMaxPool(SparseModule):
...
@@ -77,24 +107,65 @@ class SparseMaxPool(SparseModule):
0
,
0
,
self
.
subm
,
self
.
subm
,
grid
=
input
.
grid
)
grid
=
input
.
grid
)
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
interval
=
time
.
time
()
-
t
out_tensor
.
benchmark_record
[
self
.
name
][
"indice_gen_time"
].
append
(
interval
)
t
=
time
.
time
()
if
self
.
indice_key
is
not
None
:
datas
=
input
.
find_indice_pair
(
self
.
indice_key
)
if
datas
is
None
:
indice_data
=
IndiceData
(
outids
,
indices
,
indice_pairs
,
indice_pairs_num
,
spatial_shape
)
input
.
indice_dict
[
self
.
indice_key
]
=
indice_data
else
:
raise
ValueError
(
"indice data exists"
)
out_features
=
Fsp
.
indice_maxpool
(
features
,
indice_pairs
.
to
(
device
),
out_features
=
Fsp
.
indice_maxpool
(
features
,
indice_pairs
.
to
(
device
),
indice_pairs_num
.
to
(
device
),
indice_pairs_num
.
to
(
device
),
outids
.
shape
[
0
])
outids
.
shape
[
0
])
out_tensor
=
spconv
.
SparseConvTensor
(
out_features
,
outids
,
if
input
.
benchmark
:
out_spatial_shape
,
batch_size
)
torch
.
cuda
.
synchronize
()
out_tensor
.
indice_dict
=
input
.
indice_dict
interval
=
time
.
time
()
-
t
out_tensor
.
grid
=
input
.
grid
out_tensor
.
benchmark_record
[
self
.
name
][
"time"
].
append
(
interval
)
out_tensor
.
benchmark_record
[
self
.
name
][
"num_points"
].
append
(
features
.
shape
[
0
])
out_tensor
.
benchmark_record
[
self
.
name
][
"num_out_points"
].
append
(
out_features
.
shape
[
0
])
out_tensor
.
features
=
out_features
out_tensor
.
indices
=
outids
out_tensor
.
spatial_shape
=
out_spatial_shape
return
out_tensor
return
out_tensor
class
SparseMaxPool2d
(
SparseMaxPool
):
class
SparseMaxPool2d
(
SparseMaxPool
):
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
):
def
__init__
(
self
,
super
(
SparseMaxPool2d
,
self
).
__init__
(
2
,
kernel_size
,
stride
,
padding
,
kernel_size
,
dilation
)
stride
=
None
,
padding
=
0
,
dilation
=
1
,
name
=
None
):
super
(
SparseMaxPool2d
,
self
).
__init__
(
2
,
kernel_size
,
stride
,
padding
,
dilation
,
name
=
name
)
class
SparseMaxPool3d
(
SparseMaxPool
):
class
SparseMaxPool3d
(
SparseMaxPool
):
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
):
def
__init__
(
self
,
super
(
SparseMaxPool3d
,
self
).
__init__
(
3
,
kernel_size
,
stride
,
padding
,
kernel_size
,
dilation
)
stride
=
None
,
padding
=
0
,
dilation
=
1
,
name
=
None
):
super
(
SparseMaxPool3d
,
self
).
__init__
(
3
,
kernel_size
,
stride
,
padding
,
dilation
,
name
=
name
)
spconv/spatial.py
View file @
3517290c
...
@@ -24,9 +24,10 @@ from torch.nn.parameter import Parameter
...
@@ -24,9 +24,10 @@ from torch.nn.parameter import Parameter
import
spconv
import
spconv
from
spconv.modules
import
SparseModule
from
spconv.modules
import
SparseModule
class
RemoveDuplicate
(
SparseModule
):
class
RemoveDuplicate
(
SparseModule
):
def
forward
(
self
,
x
:
spconv
.
SparseConvTensor
):
def
forward
(
self
,
x
:
spconv
.
SparseConvTensor
):
inds
=
x
.
indices
inds
=
x
.
indices
spatial_shape
=
[
x
.
batch_size
,
*
x
.
spatial_shape
]
spatial_shape
=
[
x
.
batch_size
,
*
x
.
spatial_shape
]
spatial_stride
=
[
0
]
*
len
(
spatial_shape
)
spatial_stride
=
[
0
]
*
len
(
spatial_shape
)
val
=
1
val
=
1
...
@@ -39,5 +40,6 @@ class RemoveDuplicate(SparseModule):
...
@@ -39,5 +40,6 @@ class RemoveDuplicate(SparseModule):
_
,
unique_inds
=
torch
.
unique
(
indices_index
)
_
,
unique_inds
=
torch
.
unique
(
indices_index
)
new_inds
=
inds
[
unique_inds
]
new_inds
=
inds
[
unique_inds
]
new_features
=
x
.
features
[
unique_inds
]
new_features
=
x
.
features
[
unique_inds
]
res
=
spconv
.
SparseConvTensor
(
new_features
,
new_inds
,
x
.
spatial_shape
,
x
.
batch_size
,
x
.
grid
)
res
=
spconv
.
SparseConvTensor
(
new_features
,
new_inds
,
x
.
spatial_shape
,
return
res
x
.
batch_size
,
x
.
grid
)
\ No newline at end of file
return
res
spconv/utils/__init__.py
View file @
3517290c
...
@@ -294,20 +294,18 @@ class VoxelGeneratorV2:
...
@@ -294,20 +294,18 @@ class VoxelGeneratorV2:
def
grid_size
(
self
):
def
grid_size
(
self
):
return
self
.
_grid_size
return
self
.
_grid_size
class
VoxelGeneratorV3
:
class
VoxelGeneratorV3
:
def
__init__
(
self
,
def
__init__
(
self
,
voxel_size
,
point_cloud_range
,
max_points
,
num_features
,
voxel_size
,
dtype
,
device
):
point_cloud_range
,
max_points
,
num_features
,
dtype
,
device
):
self
.
_max_points
=
max_points
self
.
_max_points
=
max_points
self
.
_point_cloud_range
=
point_cloud_range
self
.
_point_cloud_range
=
point_cloud_range
self
.
_voxel_size
=
voxel_size
self
.
_voxel_size
=
voxel_size
self
.
_grid_size
=
torch
.
round
((
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
self
.
_grid_size
=
torch
.
round
(
(
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
grid_volume
=
self
.
_grid_size
.
prod
()
grid_volume
=
self
.
_grid_size
.
prod
()
self
.
_grid_size
=
self
.
_grid_size
.
cpu
().
numpy
().
tolist
()
self
.
_grid_size
=
self
.
_grid_size
.
cpu
().
numpy
().
tolist
()
self
.
_ndim
=
len
(
self
.
_grid_size
)
self
.
_ndim
=
len
(
self
.
_grid_size
)
...
@@ -315,19 +313,34 @@ class VoxelGeneratorV3:
...
@@ -315,19 +313,34 @@ class VoxelGeneratorV3:
self
.
_dtype
=
dtype
self
.
_dtype
=
dtype
self
.
_device
=
device
self
.
_device
=
device
self
.
_point_index
=
torch
.
full
([
max_points
+
1
],
grid_volume
,
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_point_index
=
torch
.
full
([
max_points
+
1
],
self
.
_grids
=
torch
.
zeros
([
grid_volume
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
grid_volume
,
self
.
_num_points_per_grid
=
torch
.
zeros
([
grid_volume
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
dtype
=
torch
.
int32
,
self
.
_voxels
=
torch
.
zeros
([
max_points
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
device
=
self
.
_device
)
self
.
_coors
=
torch
.
zeros
([
max_points
,
self
.
_ndim
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_grids
=
torch
.
zeros
([
grid_volume
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_num_points_per_grid
=
torch
.
zeros
([
grid_volume
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_voxels
=
torch
.
zeros
([
max_points
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_coors
=
torch
.
zeros
([
max_points
,
self
.
_ndim
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
def
generate
(
self
,
points
):
def
generate
(
self
,
points
):
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
return
self
.
points_to_voxel
(
points
)
return
self
.
points_to_voxel
(
points
)
def
generate_multi_gpu
(
self
,
points
):
def
generate_multi_gpu
(
self
,
points
):
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
return
self
.
points_to_voxel
(
points
)
return
self
.
points_to_voxel
(
points
)
...
@@ -351,23 +364,21 @@ class VoxelGeneratorV3:
...
@@ -351,23 +364,21 @@ class VoxelGeneratorV3:
coors_range: [6] list/tuple or array or tensor, float. indicate voxel range.
coors_range: [6] list/tuple or array or tensor, float. indicate voxel range.
format: xyzxyz, minmax
format: xyzxyz, minmax
"""
"""
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
self
.
_point_cloud_range
[:
3
])
/
num_voxel
=
torch
.
ops
.
spconv
.
points_to_voxel
(
points
,
indexes
,
self
.
_voxel_size
).
to
(
torch
.
int32
)
self
.
_point_index
,
num_voxel
=
torch
.
ops
.
spconv
.
points_to_voxel
(
self
.
_grids
,
points
,
indexes
,
self
.
_point_index
,
self
.
_grids
,
self
.
_num_points_per_grid
,
self
.
_num_points_per_grid
,
self
.
_voxels
,
self
.
_coors
,
self
.
_voxels
,
self
.
_grid_size
,
self
.
_ndim
)
self
.
_coors
,
self
.
_grid_size
,
self
.
_ndim
)
voxels
=
self
.
_voxels
[:
num_voxel
,
:]
voxels
=
self
.
_voxels
[:
num_voxel
,
:]
coors
=
self
.
_coors
[:
num_voxel
,
:]
coors
=
self
.
_coors
[:
num_voxel
,
:]
# xyz --> zyx
# xyz --> zyx
#coors = coors[::-1]
#coors = coors[::-1]
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
([
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
(
[
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
coors
=
torch
.
cat
([
z
,
y
,
x
],
dim
=
1
)
coors
=
torch
.
cat
([
z
,
y
,
x
],
dim
=
1
)
# can be skipped
# can be skipped
# x, y, z, f = voxels[:, 0].reshape([-1, 1]), voxels[:, 1].reshape([-1, 1]), voxels[:, 2].reshape([-1, 1]), voxels[:, 3:]
# x, y, z, f = voxels[:, 0].reshape([-1, 1]), voxels[:, 1].reshape([-1, 1]), voxels[:, 2].reshape([-1, 1]), voxels[:, 3:]
# voxels = torch.cat([z, y, x, f], dim=1)
# voxels = torch.cat([z, y, x, f], dim=1)
return
voxels
,
coors
return
voxels
,
coors
src/spconv/fused_conv.cu
View file @
3517290c
...
@@ -15,8 +15,8 @@
...
@@ -15,8 +15,8 @@
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <spconv/fused_conv.cu.h>
#include <spconv/fused_conv.cu.h>
#include <spconv/fused_conv.h>
#include <spconv/fused_conv.h>
#include <tensorview/torch_utils.h>
#include <spconv/minkowski.cu.h>
#include <spconv/minkowski.cu.h>
#include <tensorview/torch_utils.h>
namespace
spconv
{
namespace
spconv
{
void
fused_conv_cuda
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
void
fused_conv_cuda
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
...
@@ -56,8 +56,8 @@ void fused_conv_backward_cuda(torch::Tensor features, torch::Tensor din,
...
@@ -56,8 +56,8 @@ void fused_conv_backward_cuda(torch::Tensor features, torch::Tensor din,
}
}
void
fused_conv_cuda_minkowski
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
void
fused_conv_cuda_minkowski
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
)
{
torch
::
Tensor
indicesOut
,
int
nHot
)
{
auto
dtype
=
output
.
scalar_type
();
auto
dtype
=
output
.
scalar_type
();
auto
in_nchannel
=
features
.
size
(
1
);
auto
in_nchannel
=
features
.
size
(
1
);
auto
out_nchannel
=
output
.
size
(
1
);
auto
out_nchannel
=
output
.
size
(
1
);
...
@@ -81,10 +81,9 @@ void fused_conv_cuda_minkowski(torch::Tensor output, torch::Tensor features,
...
@@ -81,10 +81,9 @@ void fused_conv_cuda_minkowski(torch::Tensor output, torch::Tensor features,
int
step
=
(
nHot
+
num_div
-
1
)
/
num_div
;
int
step
=
(
nHot
+
num_div
-
1
)
/
num_div
;
dim3
threads
(
shared_mem_size
,
shared_mem_size
);
dim3
threads
(
shared_mem_size
,
shared_mem_size
);
tv
::
dispatch_torch
<
float
>
(
dtype
,
[
&
](
auto
I
)
{
tv
::
dispatch_torch
<
float
>
(
dtype
,
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
using
T
=
decltype
(
I
);
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
){
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
)
{
constexpr
int
ShmemSize
=
decltype
(
ShSizeValue
)
::
value
;
constexpr
int
ShmemSize
=
decltype
(
ShSizeValue
)
::
value
;
for
(
int
s
=
0
;
s
<
num_div
;
s
++
)
{
for
(
int
s
=
0
;
s
<
num_div
;
s
++
)
{
int
remainder
=
nHot
-
step
*
s
;
int
remainder
=
nHot
-
step
*
s
;
...
@@ -93,17 +92,19 @@ void fused_conv_cuda_minkowski(torch::Tensor output, torch::Tensor features,
...
@@ -93,17 +92,19 @@ void fused_conv_cuda_minkowski(torch::Tensor output, torch::Tensor features,
(
curr_num_active
+
threads
.
y
-
1
)
/
threads
.
y
);
(
curr_num_active
+
threads
.
y
-
1
)
/
threads
.
y
);
matmul
<
T
,
int32_t
,
ShmemSize
><<<
grid
,
threads
,
0
,
stream
>>>
(
matmul
<
T
,
int32_t
,
ShmemSize
><<<
grid
,
threads
,
0
,
stream
>>>
(
features
.
data_ptr
<
T
>
(),
in_nchannel
,
curr_num_active
,
features
.
data_ptr
<
T
>
(),
in_nchannel
,
curr_num_active
,
filters
.
data_ptr
<
T
>
(),
out_nchannel
,
filters
.
data_ptr
<
T
>
(),
out_nchannel
,
in_nchannel
,
in_nchannel
,
output
.
data_ptr
<
T
>
(),
indicesIn
.
data_ptr
<
int32_t
>
(),
output
.
data_ptr
<
T
>
(),
indicesIn
.
data_ptr
<
int32_t
>
(),
indicesOut
.
data_ptr
<
int32_t
>
());
indicesOut
.
data_ptr
<
int32_t
>
());
}
}
});
});
});
});
}
}
void
fused_conv_backward_cuda_minkowski
(
torch
::
Tensor
features
,
torch
::
Tensor
din
,
void
fused_conv_backward_cuda_minkowski
(
torch
::
Tensor
features
,
torch
::
Tensor
dout
,
torch
::
Tensor
filters
,
torch
::
Tensor
din
,
torch
::
Tensor
dout
,
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicesOut
,
int
nHot
)
{
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
)
{
auto
dtype
=
features
.
scalar_type
();
auto
dtype
=
features
.
scalar_type
();
auto
in_nchannel
=
features
.
size
(
1
);
auto
in_nchannel
=
features
.
size
(
1
);
auto
out_nchannel
=
dout
.
size
(
1
);
auto
out_nchannel
=
dout
.
size
(
1
);
...
@@ -131,7 +132,7 @@ void fused_conv_backward_cuda_minkowski(torch::Tensor features, torch::Tensor di
...
@@ -131,7 +132,7 @@ void fused_conv_backward_cuda_minkowski(torch::Tensor features, torch::Tensor di
tv
::
dispatch_torch
<
float
>
(
dtype
,
[
&
](
auto
I
)
{
tv
::
dispatch_torch
<
float
>
(
dtype
,
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
using
T
=
decltype
(
I
);
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
){
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
)
{
constexpr
int
ShmemSize
=
decltype
(
ShSizeValue
)
::
value
;
constexpr
int
ShmemSize
=
decltype
(
ShSizeValue
)
::
value
;
for
(
int
s
=
0
;
s
<
num_div
;
s
++
)
{
for
(
int
s
=
0
;
s
<
num_div
;
s
++
)
{
int
remainder
=
nHot
-
step
*
s
;
int
remainder
=
nHot
-
step
*
s
;
...
@@ -141,10 +142,10 @@ void fused_conv_backward_cuda_minkowski(torch::Tensor features, torch::Tensor di
...
@@ -141,10 +142,10 @@ void fused_conv_backward_cuda_minkowski(torch::Tensor features, torch::Tensor di
matmul2
<
T
,
int32_t
,
ShmemSize
><<<
grid
,
threads
,
0
,
stream
>>>
(
matmul2
<
T
,
int32_t
,
ShmemSize
><<<
grid
,
threads
,
0
,
stream
>>>
(
dout
.
data_ptr
<
T
>
(),
out_nchannel
,
curr_num_active
,
// A
dout
.
data_ptr
<
T
>
(),
out_nchannel
,
curr_num_active
,
// A
filters
.
data_ptr
<
T
>
(),
out_nchannel
,
filters
.
data_ptr
<
T
>
(),
out_nchannel
,
in_nchannel
,
// B
in_nchannel
,
// B
features
.
data_ptr
<
T
>
(),
in_nchannel
,
curr_num_active
,
// D
features
.
data_ptr
<
T
>
(),
in_nchannel
,
curr_num_active
,
// D
din
.
data_ptr
<
T
>
(),
// C
din
.
data_ptr
<
T
>
(),
// C
dfilters
.
data_ptr
<
T
>
(),
// E
dfilters
.
data_ptr
<
T
>
(),
// E
indicesIn
.
data_ptr
<
int32_t
>
(),
indicesOut
.
data_ptr
<
int32_t
>
());
indicesIn
.
data_ptr
<
int32_t
>
(),
indicesOut
.
data_ptr
<
int32_t
>
());
}
}
});
});
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment