Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
73427720
Commit
73427720
authored
May 04, 2019
by
traveller59
Browse files
fix #45 release requirement of kernel size
parent
10db9b67
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
36 additions
and
33 deletions
+36
-33
include/spconv/indice.cu.h
include/spconv/indice.cu.h
+1
-2
include/spconv/spconv_ops.h
include/spconv/spconv_ops.h
+2
-2
include/tensorview/tensorview.h
include/tensorview/tensorview.h
+1
-1
include/torch_utils.h
include/torch_utils.h
+7
-3
src/spconv/indice.cu
src/spconv/indice.cu
+11
-11
src/spconv/maxpool.cu
src/spconv/maxpool.cu
+8
-8
src/spconv/reordering.cu
src/spconv/reordering.cu
+6
-6
No files found.
include/spconv/indice.cu.h
View file @
73427720
...
...
@@ -147,8 +147,7 @@ assignIndicePairsKernel(tv::TensorView<Index> indicesOut,
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
,
int
KernelMaxVolume
=
256
>
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
__global__
void
prepareSubMGridKernel
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
...
...
include/spconv/spconv_ops.h
View file @
73427720
...
...
@@ -47,7 +47,7 @@ getIndicePair(torch::Tensor indices, int64_t batchSize,
for
(
int
i
=
1
;
i
<
kernelSize
.
size
();
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
TV_ASSERT_RT_ERR
(
kernelVolume
<=
25
6
,
"error"
);
TV_ASSERT_RT_ERR
(
kernelVolume
<=
409
6
,
"error"
);
auto
outputVolume
=
outSpatialShape
[
0
];
for
(
int
i
=
1
;
i
<
outSpatialShape
.
size
();
++
i
)
{
outputVolume
*=
outSpatialShape
[
i
];
...
...
@@ -159,7 +159,7 @@ getIndicePairPreGrid(torch::Tensor indices, torch::Tensor gridOut, int64_t batch
for
(
int
i
=
1
;
i
<
kernelSize
.
size
();
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
TV_ASSERT_RT_ERR
(
kernelVolume
<=
25
6
,
"error"
);
TV_ASSERT_RT_ERR
(
kernelVolume
<=
409
6
,
"error"
);
auto
outputVolume
=
outSpatialShape
[
0
];
for
(
int
i
=
1
;
i
<
outSpatialShape
.
size
();
++
i
)
{
outputVolume
*=
outSpatialShape
[
i
];
...
...
include/tensorview/tensorview.h
View file @
73427720
...
...
@@ -102,7 +102,7 @@ void sstream_print(SStream &ss, T val, TArgs... args) {
struct
GPU
{
GPU
(
cudaStream_t
s
=
0
)
:
mStream
(
s
)
{}
cudaStream_t
s
tream
()
const
{
return
mStream
;
}
virtual
cudaStream_t
getS
tream
()
const
{
return
mStream
;
}
cudaStream_t
mStream
=
0
;
};
struct
CPU
{};
...
...
include/torch_utils.h
View file @
73427720
...
...
@@ -21,8 +21,8 @@
namespace
tv
{
struct
TorchGPU
:
public
tv
::
GPU
{
TorchGPU
()
{
mStream
=
at
::
cuda
::
getCurrentCUDAStream
();
virtual
cudaStream_t
getStream
()
const
override
{
return
at
::
cuda
::
getCurrentCUDAStream
();
}
};
...
...
@@ -48,7 +48,11 @@ template <typename T> void check_torch_dtype(const torch::Tensor &tensor) {
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Long
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
long
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
default:
TV_ASSERT_RT_ERR
(
false
,
"error"
);
}
...
...
src/spconv/indice.cu
View file @
73427720
...
...
@@ -45,15 +45,15 @@ struct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {
return
0
;
// auto timer = spconv::CudaContextTimer<>();
if
(
transpose
)
prepareDeConvIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
25
6
>
prepareDeConvIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
409
6
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
d
.
getS
tream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
else
prepareIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
25
6
>
prepareIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
409
6
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
d
.
getS
tream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
...
...
@@ -80,18 +80,18 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
Index
numAct
=
indicePairUnique
.
dim
(
0
)
-
1
;
assignGridAndIndiceOutKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numAct
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesOut
,
gridsOut
,
numAct
,
indicePairs
,
d
.
getS
tream
()
>>>
(
indicesOut
,
gridsOut
,
numAct
,
indicePairs
,
indicePairUnique
,
outSpatialShape
,
batchSize
);
TV_CHECK_CUDA_ERR
();
assignIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesOut
,
gridsOut
,
numActIn
,
indicePairs
,
d
.
getS
tream
()
>>>
(
indicesOut
,
gridsOut
,
numActIn
,
indicePairs
,
indicePairUnique
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
if
(
resetGrid
)
{
resetGridKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numAct
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicePairUnique
.
data
(),
gridsOut
,
numAct
);
d
.
getS
tream
()
>>>
(
indicePairUnique
.
data
(),
gridsOut
,
numAct
);
TV_CHECK_CUDA_ERR
();
}
return
numAct
;
...
...
@@ -116,18 +116,18 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
// auto timer = spconv::CudaContextTimer<>();
prepareSubMGridKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesIn
,
gridsOut
,
outSpatialShape
);
d
.
getS
tream
()
>>>
(
indicesIn
,
gridsOut
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
getSubMIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
>
getSubMIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
d
.
getS
tream
()
>>>
(
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
// std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl;
if
(
resetGrid
)
{
resetGridSubMKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
s
tream
()
>>>
(
indicesIn
.
data
(),
gridsOut
,
outSpatialShape
,
numActIn
);
d
.
getS
tream
()
>>>
(
indicesIn
.
data
(),
gridsOut
,
outSpatialShape
,
numActIn
);
TV_CHECK_CUDA_ERR
();
}
return
numActIn
;
...
...
src/spconv/maxpool.cu
View file @
73427720
...
...
@@ -329,7 +329,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
maxPoolFwdVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
std
::
min
(
size
/
NumTLP
,
512
),
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
s
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
1
).
data
(),
numHotBlock
,
numPlanes
/
vecloadFactor
);
...
...
@@ -339,7 +339,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
if
(
size
>
numHotBlock
)
{
maxPoolFwdGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
size
-
numHotBlock
,
numPlanes
);
...
...
@@ -357,7 +357,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
if
(
numHotBlock
>=
NumTLP
)
{
maxPoolFwdGenericBlockKernel
<
T
,
Index
,
NumTLP
,
NumILP
>
<<<
dim3
(
size
/
NumTLP
,
tv
::
launch
::
DivUp
(
numPlanes
,
NumTLP
)),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
1
).
data
(),
numHotBlock
,
numPlanes
);
...
...
@@ -367,7 +367,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
if
(
size
>
numHotBlock
)
{
maxPoolFwdGenericKernel
<
T
,
Index
,
NumTLP
,
NumILP
>
<<<
dim3
(
1
,
tv
::
launch
::
DivUp
(
numPlanes
,
NumTLP
)),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
size
-
numHotBlock
,
...
...
@@ -403,7 +403,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
maxPoolBwdVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
std
::
min
(
size
/
NumTLP
,
512
),
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
s
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
dout
.
data
(),
din
.
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
1
).
data
(),
numHotBlock
,
...
...
@@ -414,7 +414,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
if
(
size
>
numHotBlock
)
{
maxPoolBwdGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
dout
.
data
(),
din
.
data
(),
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
...
...
@@ -433,7 +433,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
if
(
numHotBlock
>=
NumTLP
)
{
maxPoolBwdGenericBlockKernel
<
T
,
Index
,
NumTLP
,
NumILP
>
<<<
dim3
(
size
/
NumTLP
,
tv
::
launch
::
DivUp
(
numPlanes
,
NumTLP
)),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
dout
.
data
(),
din
.
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
1
).
data
(),
numHotBlock
,
numPlanes
);
...
...
@@ -443,7 +443,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
if
(
size
>
numHotBlock
)
{
maxPoolBwdGenericKernel
<
T
,
Index
,
NumTLP
,
NumILP
>
<<<
dim3
(
1
,
tv
::
launch
::
DivUp
(
numPlanes
,
NumTLP
)),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
dout
.
data
(),
din
.
data
(),
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
size
-
numHotBlock
,
...
...
src/spconv/reordering.cu
View file @
73427720
...
...
@@ -50,7 +50,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
gatherVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
numPlanes
/
NumTLP
,
size
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
s
tream
()
>>>
(
buffer
.
data
(),
features
.
data
(),
indices
.
data
(),
d
.
getS
tream
()
>>>
(
buffer
.
data
(),
features
.
data
(),
indices
.
data
(),
nHotBlock
,
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
...
...
@@ -59,7 +59,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
gatherVecKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
/
vecloadFactor
),
0
,
d
.
s
tream
()
>>>
(
buffer
.
data
()
+
nHotBlock
*
numPlanes
,
d
.
getS
tream
()
>>>
(
buffer
.
data
()
+
nHotBlock
*
numPlanes
,
features
.
data
(),
indices
.
data
()
+
nHotBlock
,
size
-
nHotBlock
,
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
...
...
@@ -75,7 +75,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
gatherGenericKernel
<
T
,
Index
,
NumTLP
,
NumILP
>
<<<
dim3
(
tv
::
launch
::
DivUp
(
size
,
NumTLP
),
tv
::
launch
::
DivUp
(
numPlanes
,
NumTLP
)),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getS
tream
()
>>>
(
buffer
.
data
(),
features
.
data
(),
indices
.
data
(),
size
,
numPlanes
);
TV_CHECK_CUDA_ERR
();
}
...
...
@@ -107,7 +107,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
vecload_type_t
>
<<<
dim3
(
numPlanes
/
NumTLP
,
size
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
s
tream
()
>>>
(
outFeatures
.
data
(),
buffer
.
data
(),
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
buffer
.
data
(),
indices
.
data
(),
nHotBlock
,
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
...
...
@@ -115,7 +115,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
if
(
size
-
nHotBlock
>
0
)
{
scatterAddGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
buffer
.
data
()
+
nHotBlock
*
numPlanes
,
indices
.
data
()
+
nHotBlock
,
size
-
nHotBlock
,
numPlanes
);
TV_CHECK_CUDA_ERR
();
...
...
@@ -130,7 +130,7 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
scatterAddGenericKernel
<
T
,
Index
,
NumTLP
,
NumILP
>
<<<
dim3
(
tv
::
launch
::
DivUp
(
size
,
NumTLP
),
tv
::
launch
::
DivUp
(
numPlanes
,
NumTLP
)),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
s
tream
()
>>>
(
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getS
tream
()
>>>
(
outFeatures
.
data
(),
buffer
.
data
(),
indices
.
data
(),
size
,
numPlanes
);
TV_CHECK_CUDA_ERR
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment