Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
48b9a86a
Commit
48b9a86a
authored
May 24, 2019
by
traveller59
Browse files
fix cpu bug, increase cpu speed
parent
e8533f47
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
188 additions
and
166 deletions
+188
-166
include/spconv/geometry.h
include/spconv/geometry.h
+0
-158
src/spconv/CMakeLists.txt
src/spconv/CMakeLists.txt
+6
-0
src/spconv/indice.cc
src/spconv/indice.cc
+176
-0
src/spconv/reordering.cc
src/spconv/reordering.cc
+6
-8
No files found.
include/spconv/geometry.h
View file @
48b9a86a
...
@@ -185,164 +185,6 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
...
@@ -185,164 +185,6 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
return
pointCounter
;
return
pointCounter
;
}
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
hash
.
find
(
index
)
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hash
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hash
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
hash
.
find
(
index
)
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hash
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hash
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
// Index validPoints[kernelVolume * (NDim + 1)];
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
Index
index
=
0
;
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
hash
[
index
]
=
j
;
}
Index
index
=
0
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
if
(
hash
.
find
(
index
)
==
hash
.
end
())
{
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hash
[
index
];
}
}
}
return
numActIn
;
}
}
// namespace spconv
}
// namespace spconv
#endif
#endif
\ No newline at end of file
src/spconv/CMakeLists.txt
View file @
48b9a86a
...
@@ -4,6 +4,12 @@ if (SPCONV_BuildCUDA)
...
@@ -4,6 +4,12 @@ if (SPCONV_BuildCUDA)
endif
()
endif
()
add_library
(
spconv SHARED
${
ALL_FILES
}
)
add_library
(
spconv SHARED
${
ALL_FILES
}
)
find_package
(
OpenMP
)
if
(
OpenMP_CXX_FOUND
)
target_link_libraries
(
spconv PUBLIC OpenMP::OpenMP_CXX
)
endif
()
target_include_directories
(
spconv PRIVATE
${
ALL_INCLUDE
}
)
target_include_directories
(
spconv PRIVATE
${
ALL_INCLUDE
}
)
set_property
(
TARGET spconv PROPERTY CUDA_STANDARD 14
)
set_property
(
TARGET spconv PROPERTY CUDA_STANDARD 14
)
set_property
(
TARGET spconv PROPERTY CXX_STANDARD 14
)
set_property
(
TARGET spconv PROPERTY CXX_STANDARD 14
)
...
...
src/spconv/indice.cc
View file @
48b9a86a
...
@@ -16,9 +16,185 @@
...
@@ -16,9 +16,185 @@
#include <spconv/indice.h>
#include <spconv/indice.h>
#include <spconv/spconv_ops.h>
#include <spconv/spconv_ops.h>
#include <torch/script.h>
#include <torch/script.h>
#include <ATen/Parallel.h>
namespace
spconv
{
namespace
spconv
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
hashval
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
auto
iter
=
hash
.
find
(
index
);
if
(
iter
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hashval
=
numAct
++
;
hash
[
index
]
=
hashval
;
}
else
{
hashval
=
iter
->
second
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hashval
;
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
hashval
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
auto
iter
=
hash
.
find
(
index
);
if
(
iter
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hashval
=
numAct
++
;
hash
[
index
]
=
hashval
;
}
else
{
hashval
=
iter
->
second
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hashval
;
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
Index
index
=
0
;
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
hash
[
index
]
=
j
;
}
at
::
parallel_for
(
0
,
numActIn
,
0
,
[
&
](
int64_t
begin
,
int64_t
end
){
Index
index
=
0
;
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
oldOffset
=
0
;
for
(
int
j
=
begin
;
j
<
end
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
auto
iter
=
hash
.
find
(
index
);
if
(
iter
!=
hash
.
end
())
{
#pragma omp atomic capture
oldOffset
=
indiceNum
[
offset
]
++
;
indicePairs
(
offset
,
0
,
oldOffset
)
=
j
;
indicePairs
(
offset
,
1
,
oldOffset
)
=
iter
->
second
;
}
}
}
});
return
numActIn
;
}
namespace
functor
{
namespace
functor
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
struct
CreateConvIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
...
...
src/spconv/reordering.cc
View file @
48b9a86a
...
@@ -41,15 +41,13 @@ struct SparseScatterAddFunctor<tv::CPU, T, Index> {
...
@@ -41,15 +41,13 @@ struct SparseScatterAddFunctor<tv::CPU, T, Index> {
int
numPlanes
=
outFeatures
.
dim
(
1
);
int
numPlanes
=
outFeatures
.
dim
(
1
);
const
T
*
buf
=
buffer
.
data
();
const
T
*
buf
=
buffer
.
data
();
T
*
out
=
outFeatures
.
data
();
T
*
out
=
outFeatures
.
data
();
at
::
parallel_for
(
0
,
size
,
0
,
[
&
](
int64_t
begin
,
int64_t
end
){
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int
i
=
begin
;
i
<
end
;
++
i
)
{
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
){
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
){
out
[
j
]
+=
buf
[
j
];
out
[
j
]
+=
buf
[
j
];
}
}
}
}
);
}
}
}
};
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment