Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
48b9a86a
"vscode:/vscode.git/clone" did not exist on "d8c6769914e3144cbdd01c29343db181fb0b6014"
Commit
48b9a86a
authored
May 24, 2019
by
traveller59
Browse files
fix cpu bug, increase cpu speed
parent
e8533f47
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
188 additions
and
166 deletions
+188
-166
include/spconv/geometry.h
include/spconv/geometry.h
+0
-158
src/spconv/CMakeLists.txt
src/spconv/CMakeLists.txt
+6
-0
src/spconv/indice.cc
src/spconv/indice.cc
+176
-0
src/spconv/reordering.cc
src/spconv/reordering.cc
+6
-8
No files found.
include/spconv/geometry.h
View file @
48b9a86a
...
...
@@ -185,164 +185,6 @@ TV_HOST_DEVICE Index getValidOutPosTranspose(
return
pointCounter
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
hash
.
find
(
index
)
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hash
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hash
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
if
(
hash
.
find
(
index
)
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hash
[
index
]
=
numAct
++
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hash
[
index
];
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
// Index validPoints[kernelVolume * (NDim + 1)];
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
Index
index
=
0
;
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
hash
[
index
]
=
j
;
}
Index
index
=
0
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
if
(
hash
.
find
(
index
)
==
hash
.
end
())
{
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hash
[
index
];
}
}
}
return
numActIn
;
}
}
// namespace spconv
#endif
\ No newline at end of file
src/spconv/CMakeLists.txt
View file @
48b9a86a
...
...
@@ -4,6 +4,12 @@ if (SPCONV_BuildCUDA)
endif
()
add_library
(
spconv SHARED
${
ALL_FILES
}
)
find_package
(
OpenMP
)
if
(
OpenMP_CXX_FOUND
)
target_link_libraries
(
spconv PUBLIC OpenMP::OpenMP_CXX
)
endif
()
target_include_directories
(
spconv PRIVATE
${
ALL_INCLUDE
}
)
set_property
(
TARGET spconv PROPERTY CUDA_STANDARD 14
)
set_property
(
TARGET spconv PROPERTY CXX_STANDARD 14
)
...
...
src/spconv/indice.cc
View file @
48b9a86a
...
...
@@ -16,9 +16,185 @@
#include <spconv/indice.h>
#include <spconv/spconv_ops.h>
#include <torch/script.h>
#include <ATen/Parallel.h>
namespace
spconv
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
// indicesOut: num_active * kernelVolume * (NDim + 1)
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
hashval
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
auto
iter
=
hash
.
find
(
index
);
if
(
iter
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hashval
=
numAct
++
;
hash
[
index
]
=
hashval
;
}
else
{
hashval
=
iter
->
second
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hashval
;
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsDeConv
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
kernelSize
,
const
Index
*
stride
,
const
Index
*
padding
,
const
Index
*
dilation
,
const
Index
*
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
hashval
;
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
batchIdx
=
indicesIn
(
j
,
0
);
numValidPoints
=
getValidOutPosTranspose
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
auto
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
batchIdx
;
auto
iter
=
hash
.
find
(
index
);
if
(
iter
==
hash
.
end
())
{
for
(
unsigned
k
=
1
;
k
<
NDim
+
1
;
++
k
)
{
indicesOut
(
numAct
,
k
)
=
pointPtr
[
k
-
1
];
}
indicesOut
(
numAct
,
0
)
=
batchIdx
;
hashval
=
numAct
++
;
hash
[
index
]
=
hashval
;
}
else
{
hashval
=
iter
->
second
;
}
// indicePairs: [K, 2, L]
indicePairs
(
offset
,
0
,
indiceNum
[
offset
])
=
j
;
indicePairs
(
offset
,
1
,
indiceNum
[
offset
]
++
)
=
hashval
;
}
}
return
numAct
;
}
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
Index
getIndicePairsSubM
(
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
Index
*
const
kernelSize
,
const
Index
*
const
stride
,
const
Index
*
const
padding
,
const
Index
*
dilation
,
const
Index
*
const
outSpatialShape
)
{
Index
numAct
=
0
;
auto
numActIn
=
indicesIn
.
dim
(
0
);
Index
batchIdx
=
0
;
Index
spatialVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
spatialVolume
*=
outSpatialShape
[
i
];
}
Index
kernelVolume
=
1
;
#pragma unroll
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
kernelVolume
*=
kernelSize
[
i
];
}
tsl
::
robin_map
<
Index
,
Index
>
hash
;
for
(
int
j
=
0
;
j
<
numActIn
;
++
j
)
{
Index
index
=
0
;
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
hash
[
index
]
=
j
;
}
at
::
parallel_for
(
0
,
numActIn
,
0
,
[
&
](
int64_t
begin
,
int64_t
end
){
Index
index
=
0
;
Index
numValidPoints
=
0
;
std
::
vector
<
Index
>
validPoints_
(
kernelVolume
*
(
NDim
+
1
));
Index
*
validPoints
=
validPoints_
.
data
();
Index
*
pointPtr
=
nullptr
;
Index
oldOffset
=
0
;
for
(
int
j
=
begin
;
j
<
end
;
++
j
)
{
numValidPoints
=
getValidOutPos
<
Index
,
NDim
>
(
indicesIn
.
data
()
+
j
*
(
NDim
+
1
)
+
1
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
,
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
index
=
tv
::
rowArrayIdx
<
Index
,
NDim
>
(
pointPtr
,
outSpatialShape
)
+
spatialVolume
*
indicesIn
(
j
,
0
);
auto
iter
=
hash
.
find
(
index
);
if
(
iter
!=
hash
.
end
())
{
#pragma omp atomic capture
oldOffset
=
indiceNum
[
offset
]
++
;
indicePairs
(
offset
,
0
,
oldOffset
)
=
j
;
indicePairs
(
offset
,
1
,
oldOffset
)
=
iter
->
second
;
}
}
}
});
return
numActIn
;
}
namespace
functor
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
...
...
src/spconv/reordering.cc
View file @
48b9a86a
...
...
@@ -41,15 +41,13 @@ struct SparseScatterAddFunctor<tv::CPU, T, Index> {
int
numPlanes
=
outFeatures
.
dim
(
1
);
const
T
*
buf
=
buffer
.
data
();
T
*
out
=
outFeatures
.
data
();
at
::
parallel_for
(
0
,
size
,
0
,
[
&
](
int64_t
begin
,
int64_t
end
){
for
(
int
i
=
begin
;
i
<
end
;
++
i
)
{
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
){
out
[
j
]
+=
buf
[
j
];
}
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
){
out
[
j
]
+=
buf
[
j
];
}
}
);
}
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment