Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
3517290c
Commit
3517290c
authored
Jul 09, 2020
by
yanyan
Browse files
format code, add benchmark per layer
parent
540a2209
Changes
29
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
548 additions
and
330 deletions
+548
-330
include/spconv/fused_conv.cu.h
include/spconv/fused_conv.cu.h
+3
-3
include/spconv/indice.cu.h
include/spconv/indice.cu.h
+8
-7
include/spconv/minkowski.cu.h
include/spconv/minkowski.cu.h
+0
-1
include/spconv/point2voxel.cu.h
include/spconv/point2voxel.cu.h
+19
-21
include/spconv/point2voxel_ops.h
include/spconv/point2voxel_ops.h
+5
-10
include/spconv/points2voxels.h
include/spconv/points2voxels.h
+12
-16
include/spconv/spconv_ops.h
include/spconv/spconv_ops.h
+9
-3
include/tensorview/cc17.h
include/tensorview/cc17.h
+93
-63
include/tensorview/tensor.h
include/tensorview/tensor.h
+26
-21
include/tensorview/tools.h
include/tensorview/tools.h
+3
-4
include/tensorview/torch_utils.h
include/tensorview/torch_utils.h
+2
-2
spconv/__init__.py
spconv/__init__.py
+1
-79
spconv/conv.py
spconv/conv.py
+107
-39
spconv/core.py
spconv/core.py
+111
-0
spconv/modules.py
spconv/modules.py
+3
-1
spconv/ops.py
spconv/ops.py
+2
-1
spconv/pool.py
spconv/pool.py
+83
-12
spconv/spatial.py
spconv/spatial.py
+5
-3
spconv/utils/__init__.py
spconv/utils/__init__.py
+38
-27
src/spconv/fused_conv.cu
src/spconv/fused_conv.cu
+18
-17
No files found.
include/spconv/fused_conv.cu.h
View file @
3517290c
include/spconv/indice.cu.h
View file @
3517290c
...
...
@@ -21,15 +21,16 @@
namespace
spconv
{
template
<
bool
UseDeconv
,
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
;
template
<
bool
UseDeconv
,
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
;
template
<
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
<
true
,
Index
,
NDim
>
{
constexpr
static
auto
*
func
=
getValidOutPosTranspose
<
Index
,
NDim
>
;
struct
ConvIndiceDispatch
<
true
,
Index
,
NDim
>
{
constexpr
static
auto
*
func
=
getValidOutPosTranspose
<
Index
,
NDim
>
;
};
template
<
typename
Index
,
unsigned
NDim
>
struct
ConvIndiceDispatch
<
false
,
Index
,
NDim
>
{
constexpr
static
auto
*
func
=
getValidOutPos
<
Index
,
NDim
>
;
struct
ConvIndiceDispatch
<
false
,
Index
,
NDim
>
{
constexpr
static
auto
*
func
=
getValidOutPos
<
Index
,
NDim
>
;
};
template
<
typename
Index
,
unsigned
NDim
,
bool
UseDeconv
,
...
...
@@ -61,8 +62,8 @@ __global__ void prepareIndicePairsKernel(
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numActIn
))
{
numValidPoints
=
ConvIndiceDispatch
<
UseDeconv
,
Index
,
NDim
>::
func
(
indicesIn
.
data
()
+
ix
*
(
NDim
+
1
)
+
1
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
(),
validPoints
);
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
(),
validPoints
);
for
(
Index
i
=
0
;
i
<
numValidPoints
;
++
i
)
{
pointPtr
=
validPoints
+
i
*
(
NDim
+
1
);
auto
offset
=
pointPtr
[
NDim
];
...
...
include/spconv/minkowski.cu.h
View file @
3517290c
...
...
@@ -89,7 +89,6 @@ __global__ void matmul(const Dtype *A, const int wA, const int hA,
// C[wB * out_row + x] += Csub;
}
template
<
typename
Dtype
,
typename
Itype
,
int
BLOCK_SIZE
>
__global__
void
matmul2
(
const
Dtype
*
A
,
const
int
wA
,
const
int
hA
,
const
Dtype
*
B
,
const
int
wB
,
const
int
hB
,
...
...
include/spconv/point2voxel.cu.h
View file @
3517290c
...
...
@@ -7,10 +7,8 @@
namespace
spconv
{
template
<
typename
Index
,
unsigned
NDim
>
__global__
void
scatterPointToGridKernel
(
tv
::
TensorView
<
const
float
>
points
,
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
const
float
>
points
,
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndex
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
Index
index
;
...
...
@@ -24,14 +22,15 @@ __global__ void scatterPointToGridKernel(
atomicAdd
(
numPointsPerGrid
.
data
()
+
index
,
Index
(
1
));
#pragma unroll
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
atomicAdd
(
grids
.
data
()
+
index
*
numFeatures
+
k
,
*
(
points
.
data
()
+
ix
*
numFeatures
+
k
));
atomicAdd
(
grids
.
data
()
+
index
*
numFeatures
+
k
,
*
(
points
.
data
()
+
ix
*
numFeatures
+
k
));
}
}
}
template
<
typename
Index
,
unsigned
NDim
>
__global__
void
gatherPointFromGridKernel
(
tv
::
TensorView
<
const
float
>
grids
,
__global__
void
gatherPointFromGridKernel
(
tv
::
TensorView
<
const
float
>
grids
,
tv
::
TensorView
<
const
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
const
Index
>
pointIndexUnique
,
tv
::
TensorView
<
float
>
voxels
,
...
...
@@ -47,14 +46,13 @@ __global__ void gatherPointFromGridKernel(
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
voxels
(
ix
,
k
)
=
grids
(
index
,
k
)
/
numPointsPerGrid
(
index
);
}
index
=
tv
::
rowArrayIdxInv
<
Index
,
NDim
>
(
index
,
coors
.
data
()
+
ix
*
NDim
,
gridShape
.
data
());
index
=
tv
::
rowArrayIdxInv
<
Index
,
NDim
>
(
index
,
coors
.
data
()
+
ix
*
NDim
,
gridShape
.
data
());
}
}
template
<
typename
Index
>
__global__
void
resetGridKernel
(
tv
::
TensorView
<
float
>
grids
,
__global__
void
resetGridKernel
(
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndexUnique
)
{
Index
index
;
...
...
@@ -72,8 +70,8 @@ __global__ void resetGridKernel(
}
template
<
typename
Index
>
__global__
void
resetPointIndexKernel
(
tv
::
TensorView
<
Index
>
pointIndex
,
const
Index
gridVolume
)
{
__global__
void
resetPointIndexKernel
(
tv
::
TensorView
<
Index
>
pointIndex
,
const
Index
gridVolume
)
{
int
num_max_points
=
pointIndex
.
dim
(
0
)
-
1
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
num_max_points
))
{
...
...
include/spconv/point2voxel_ops.h
View file @
3517290c
...
...
@@ -21,15 +21,10 @@
namespace
spconv
{
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
);
}
// namespace spconv
include/spconv/points2voxels.h
View file @
3517290c
...
...
@@ -3,20 +3,16 @@
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace
spconv
{
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
...
...
include/spconv/spconv_ops.h
View file @
3517290c
...
...
@@ -23,9 +23,15 @@
namespace
spconv
{
enum
ConvAlgo
{
kNative
=
0
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
};
using
all_conv_algos_t
=
tv
::
mp_list_c
<
int
,
kNative
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
>
;
enum
ConvAlgo
{
kNative
=
0
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
};
using
all_conv_algos_t
=
tv
::
mp_list_c
<
int
,
kNative
,
kBatch
,
kBatchGemmGather
,
kSparseConvNet
,
kMinkowskiEngine
>
;
// torch.jit's doc says only support int64, so we need to convert to int32.
std
::
vector
<
torch
::
Tensor
>
...
...
include/tensorview/cc17.h
View file @
3517290c
...
...
@@ -7,9 +7,10 @@ Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
Copyright (c) 2011-2013 NYU (Clement Farabet)
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou, Iain Melvin, Jason Weston)
Copyright (c) 2006 Idiap Research Institute (Samy Bengio)
Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert, Samy Bengio, Johnny Mariethoz)
Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
Samy Bengio, Johnny Mariethoz)
From Caffe2:
...
...
@@ -53,8 +54,8 @@ modification, are permitted provided that the following conditions are met:
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
America
and IDIAP Research Institute nor the names of its contributors may be
3. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
America
and IDIAP Research Institute nor the names of its contributors may be
used to endorse or promote products derived from this software without
specific prior written permission.
...
...
@@ -97,47 +98,67 @@ struct _identity final {
return
std
::
forward
<
T
>
(
arg
);
}
};
template
<
class
Func
,
class
Enable
=
void
>
template
<
class
Func
,
class
Enable
=
void
>
struct
function_takes_identity_argument
:
std
::
false_type
{};
#if defined(_MSC_VER)
// For some weird reason, MSVC shows a compiler error when using guts::void_t instead of std::void_t.
// But we're only building on MSVC versions that have std::void_t, so let's just use that one.
template
<
class
Func
>
struct
function_takes_identity_argument
<
Func
,
std
::
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
// For some weird reason, MSVC shows a compiler error when using guts::void_t
// instead of std::void_t. But we're only building on MSVC versions that have
// std::void_t, so let's just use that one.
template
<
class
Func
>
struct
function_takes_identity_argument
<
Func
,
std
::
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
#else
template
<
class
Func
>
struct
function_takes_identity_argument
<
Func
,
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
template
<
class
Func
>
struct
function_takes_identity_argument
<
Func
,
void_t
<
decltype
(
std
::
declval
<
Func
>
()(
_identity
()))
>>
:
std
::
true_type
{};
#endif
template
<
bool
Condition
>
struct
_if_constexpr
;
template
<
>
struct
_if_constexpr
<
true
>
final
{
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
// The _identity instance passed in can be used to delay evaluation of an expression,
// because the compiler can't know that it's just the identity we're passing in.
template
<
bool
Condition
>
struct
_if_constexpr
;
template
<
>
struct
_if_constexpr
<
true
>
final
{
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
// The _identity instance passed in can be used to delay evaluation of an
// expression, because the compiler can't know that it's just the identity
// we're passing in.
return
thenCallback
(
_identity
());
}
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ThenCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
/* elseCallback */
)
{
return
thenCallback
();
}
};
template
<
>
struct
_if_constexpr
<
false
>
final
{
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
// The _identity instance passed in can be used to delay evaluation of an expression,
// because the compiler can't know that it's just the identity we're passing in.
template
<
>
struct
_if_constexpr
<
false
>
final
{
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
// The _identity instance passed in can be used to delay evaluation of an
// expression, because the compiler can't know that it's just the identity
// we're passing in.
return
elseCallback
(
_identity
());
}
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
template
<
class
ThenCallback
,
class
ElseCallback
,
std
::
enable_if_t
<!
function_takes_identity_argument
<
ElseCallback
>
::
value
,
void
*>
=
nullptr
>
static
decltype
(
auto
)
call
(
ThenCallback
&&
/* thenCallback */
,
ElseCallback
&&
elseCallback
)
{
return
elseCallback
();
}
};
...
...
@@ -173,33 +194,40 @@ struct _if_constexpr<false> final {
* template <class T>
* int func(T t) {
* return if_constexpr<std::is_same<T, MyClass1>::value>(
* [&](auto _) { return _(t).value; }, // this code is invalid for T == MyClass2, so a regular non-constexpr if statement wouldn't compile
* [&](auto _) { return _(t).val; } // this code is invalid for T == MyClass1
* [&](auto _) { return _(t).value; }, // this code is invalid for T ==
* MyClass2, so a regular non-constexpr if statement wouldn't compile
* [&](auto _) { return _(t).val; } // this code is invalid for T ==
* MyClass1
* );
* }
*
* Note: The _ argument passed in Example 3 is the identity function, i.e. it does nothing.
* It is used to force the compiler to delay type checking, because the compiler
* doesn't know what kind of _ is passed in. Without it, the compiler would fail
* when you try to access t.value but the member doesn't exist.
* Note: The _ argument passed in Example 3 is the identity function, i.e. it
* does nothing. It is used to force the compiler to delay type checking,
* because the compiler doesn't know what kind of _ is passed in. Without it,
* the compiler would fail when you try to access t.value but the member doesn't
* exist.
*
* Note: In Example 3, both branches return int, so func() returns int. This is not necessary.
* If func() had a return type of "auto", then both branches could return different
* types, say func<MyClass1>() could return int and func<MyClass2>() could return string.
* Note: In Example 3, both branches return int, so func() returns int. This is
* not necessary. If func() had a return type of "auto", then both branches
* could return different types, say func<MyClass1>() could return int and
* func<MyClass2>() could return string.
*/
template
<
bool
Condition
,
class
ThenCallback
,
class
ElseCallback
>
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
elseCallback
)
{
template
<
bool
Condition
,
class
ThenCallback
,
class
ElseCallback
>
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
,
ElseCallback
&&
elseCallback
)
{
#if defined(__cpp_if_constexpr)
// If we have C++17, just use it's "if constexpr" feature instead of wrapping it.
// This will give us better error messages.
if
constexpr
(
Condition
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
// If we have C++17, just use it's "if constexpr" feature instead of wrapping
// it. This will give us better error messages.
if
constexpr
(
Condition
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)(
detail
::
_identity
());
}
else
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)();
}
}
else
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ElseCallback
>::
value
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ElseCallback
>::
value
)
{
return
std
::
forward
<
ElseCallback
>
(
elseCallback
)(
detail
::
_identity
());
}
else
{
return
std
::
forward
<
ElseCallback
>
(
elseCallback
)();
...
...
@@ -207,18 +235,20 @@ decltype(auto) if_constexpr(ThenCallback&& thenCallback, ElseCallback&& elseCall
}
#else
// C++14 implementation of if constexpr
return
detail
::
_if_constexpr
<
Condition
>::
call
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
return
detail
::
_if_constexpr
<
Condition
>::
call
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
std
::
forward
<
ElseCallback
>
(
elseCallback
));
#endif
}
template
<
bool
Condition
,
class
ThenCallback
>
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
)
{
template
<
bool
Condition
,
class
ThenCallback
>
decltype
(
auto
)
if_constexpr
(
ThenCallback
&&
thenCallback
)
{
#if defined(__cpp_if_constexpr)
// If we have C++17, just use it's "if constexpr" feature instead of wrapping it.
// This will give us better error messages.
if
constexpr
(
Condition
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
// If we have C++17, just use it's "if constexpr" feature instead of wrapping
// it. This will give us better error messages.
if
constexpr
(
Condition
)
{
if
constexpr
(
detail
::
function_takes_identity_argument
<
ThenCallback
>::
value
)
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)(
detail
::
_identity
());
}
else
{
return
std
::
forward
<
ThenCallback
>
(
thenCallback
)();
...
...
@@ -226,9 +256,9 @@ decltype(auto) if_constexpr(ThenCallback&& thenCallback) {
}
#else
// C++14 implementation of if constexpr
return
if_constexpr
<
Condition
>
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
[]
(
auto
)
{});
return
if_constexpr
<
Condition
>
(
std
::
forward
<
ThenCallback
>
(
thenCallback
),
[](
auto
)
{});
#endif
}
}
}
// namespace tv
include/tensorview/tensor.h
View file @
3517290c
...
...
@@ -22,13 +22,13 @@ If you can use libtorch, dont use tv::Tensor.
*/
#pragma once
#include "cc17.h"
#include "mp_helper.h"
#include "tensorview.h"
#include <cstring>
#include <iomanip>
#include <memory>
#include <type_traits>
#include "cc17.h"
#ifdef TV_CUDA
#include <cuda_fp16.h>
#include <cuda_runtime.h>
...
...
@@ -632,25 +632,30 @@ struct Tensor {
tview
()
const
{
static_assert
(
Rank
==
-
1
||
Rank
>
0
,
"error"
);
TV_ASSERT_RT_ERR
(
dtype_
==
type_v
<
T
>
,
"error"
);
return
if_constexpr
<
(
Rank
>
0
)
>
([
&
](
auto
_
){
return
if_constexpr
<
(
Rank
>
0
)
>
(
[
&
](
auto
_
)
{
TV_ASSERT_RT_ERR
(
Rank
==
ndim
(),
"error"
);
ShapeBase
<
_
(
Rank
)
==
-
1
?
TV_MAX_DIM
:
Rank
,
Tindex
>
shape
(
Rank
),
stride
(
Rank
);
ShapeBase
<
_
(
Rank
)
==
-
1
?
TV_MAX_DIM
:
Rank
,
Tindex
>
shape
(
Rank
),
stride
(
Rank
);
for
(
int
i
=
0
;
i
<
Rank
;
++
i
)
{
shape
[
i
]
=
shape_
[
i
];
stride
[
i
]
=
stride_
[
i
];
}
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
},
[
&
](
auto
_
){
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
},
[
&
](
auto
_
)
{
ShapeBase
<
TV_MAX_DIM
,
Tindex
>
shape
(
_
(
ndim
())),
stride
(
ndim
());
for
(
int
i
=
0
;
i
<
int
(
ndim
());
++
i
)
{
shape
[
i
]
=
shape_
[
i
];
stride
[
i
]
=
stride_
[
i
];
}
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
return
TensorView
<
const
std
::
remove_const_t
<
T
>
,
Rank
,
PtrTraits
,
Tindex
>
(
reinterpret_cast
<
const
std
::
remove_const_t
<
T
>
*>
(
data
<
T
>
()),
shape
,
stride
);
});
}
template
<
class
...
Inds
>
Tensor
view
(
Inds
...
newShapes
)
const
{
...
...
include/tensorview/tools.h
View file @
3517290c
...
...
@@ -36,22 +36,21 @@ template <typename TimeT = std::chrono::microseconds> struct CudaContextTimer {
return
res
;
}
template
<
int
Count
,
typename
F
>
double
benchmark
(
F
&&
f
,
int
start
=
int
(
Count
)
*
0.3
){
double
benchmark
(
F
&&
f
,
int
start
=
int
(
Count
)
*
0.3
)
{
// std::vector<TimeT::rep> times;
auto
res
=
typename
TimeT
::
rep
();
int
count
=
0
;
cudaDeviceSynchronize
();
for
(
int
i
=
0
;
i
<
Count
;
++
i
){
for
(
int
i
=
0
;
i
<
Count
;
++
i
)
{
std
::
forward
<
F
>
(
f
)();
auto
time
=
report
();
if
(
i
>=
start
){
if
(
i
>=
start
)
{
// times.push_back(time)
res
+=
time
;
count
+=
1
;
}
}
return
res
/
double
(
count
);
}
private:
...
...
include/tensorview/torch_utils.h
View file @
3517290c
...
...
@@ -14,9 +14,9 @@
#pragma once
#include "mp_helper.h"
#include <tensorview/tensorview.h>
#include <tensorview/tensor.h>
#include <ATen/ATen.h>
#include <tensorview/tensor.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
#ifdef TV_CUDA
#include <ATen/cuda/CUDAContext.h>
...
...
spconv/__init__.py
View file @
3517290c
...
...
@@ -22,6 +22,7 @@ from spconv import ops, utils
from
spconv.conv
import
(
SparseConv2d
,
SparseConv3d
,
SparseConvTranspose2d
,
SparseConvTranspose3d
,
SparseInverseConv2d
,
SparseInverseConv3d
,
SubMConv2d
,
SubMConv3d
)
from
spconv.core
import
SparseConvTensor
from
spconv.identity
import
Identity
from
spconv.modules
import
SparseModule
,
SparseSequential
from
spconv.ops
import
ConvAlgo
...
...
@@ -35,85 +36,6 @@ _LIB_PATH = str(Path(__file__).parent / _LIB_FILE_NAME)
torch
.
ops
.
load_library
(
_LIB_PATH
)
def
scatter_nd
(
indices
,
updates
,
shape
):
"""pytorch edition of tensorflow scatter_nd.
this function don't contain except handle code. so use this carefully
when indice repeats, don't support repeat add which is supported
in tensorflow.
"""
ret
=
torch
.
zeros
(
*
shape
,
dtype
=
updates
.
dtype
,
device
=
updates
.
device
)
ndim
=
indices
.
shape
[
-
1
]
output_shape
=
list
(
indices
.
shape
[:
-
1
])
+
shape
[
indices
.
shape
[
-
1
]:]
flatted_indices
=
indices
.
view
(
-
1
,
ndim
)
slices
=
[
flatted_indices
[:,
i
]
for
i
in
range
(
ndim
)]
slices
+=
[
Ellipsis
]
ret
[
slices
]
=
updates
.
view
(
*
output_shape
)
return
ret
class
SparseConvTensor
(
object
):
def
__init__
(
self
,
features
,
indices
,
spatial_shape
,
batch_size
,
grid
=
None
):
"""
Args:
features: [num_points, num_features] feature tensor
indices: [num_points, ndim + 1] indice tensor. batch index saved in indices[:, 0]
spatial_shape: spatial shape of your sparse data
batch_size: batch size of your sparse data
grid: pre-allocated grid tensor. should be used when the volume of spatial shape
is very large.
"""
self
.
features
=
features
self
.
indices
=
indices
self
.
spatial_shape
=
spatial_shape
self
.
batch_size
=
batch_size
self
.
indice_dict
=
{}
if
grid
is
None
:
grid
=
torch
.
Tensor
()
# empty tensor
self
.
grid
=
grid
@
classmethod
def
from_dense
(
cls
,
x
:
torch
.
Tensor
):
"""create sparse tensor fron channel last dense tensor by to_sparse
x must be NHWC tensor, channel last
"""
x
=
x
.
to_sparse
(
x
.
ndim
-
1
)
spatial_shape
=
x
.
shape
[
1
:
-
1
]
batch_size
=
x
.
shape
[
0
]
indices_th
=
x
.
indices
().
permute
(
1
,
0
).
contiguous
().
int
()
features_th
=
x
.
values
()
return
cls
(
features_th
,
indices_th
,
spatial_shape
,
batch_size
)
@
property
def
spatial_size
(
self
):
return
np
.
prod
(
self
.
spatial_shape
)
def
find_indice_pair
(
self
,
key
):
if
key
is
None
:
return
None
if
key
in
self
.
indice_dict
:
return
self
.
indice_dict
[
key
]
return
None
def
dense
(
self
,
channels_first
=
True
):
output_shape
=
[
self
.
batch_size
]
+
list
(
self
.
spatial_shape
)
+
[
self
.
features
.
shape
[
1
]]
res
=
scatter_nd
(
self
.
indices
.
to
(
self
.
features
.
device
).
long
(),
self
.
features
,
output_shape
)
if
not
channels_first
:
return
res
ndim
=
len
(
self
.
spatial_shape
)
trans_params
=
list
(
range
(
0
,
ndim
+
1
))
trans_params
.
insert
(
1
,
ndim
+
1
)
return
res
.
permute
(
*
trans_params
).
contiguous
()
@
property
def
sparity
(
self
):
return
self
.
indices
.
shape
[
0
]
/
np
.
prod
(
self
.
spatial_shape
)
/
self
.
batch_size
class
ToDense
(
SparseModule
):
"""convert SparseConvTensor to NCHW dense tensor.
"""
...
...
spconv/conv.py
View file @
3517290c
...
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
import
spconv
import
spconv.functional
as
Fsp
from
spconv
import
ops
from
spconv.core
import
IndiceData
,
SparseConvTensor
from
spconv.modules
import
SparseModule
...
...
@@ -72,8 +73,9 @@ class SparseConvolution(SparseModule):
indice_key
=
None
,
fused_bn
=
False
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
super
(
SparseConvolution
,
self
).
__init__
()
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvolution
,
self
).
__init__
(
name
=
name
)
assert
groups
==
1
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
kernel_size
=
[
kernel_size
]
*
ndim
...
...
@@ -123,8 +125,8 @@ class SparseConvolution(SparseModule):
bound
=
1
/
math
.
sqrt
(
fan_in
)
init
.
uniform_
(
self
.
bias
,
-
bound
,
bound
)
def
forward
(
self
,
input
):
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
def
forward
(
self
,
input
:
SparseConvTensor
):
assert
isinstance
(
input
,
SparseConvTensor
)
features
=
input
.
features
device
=
features
.
device
indices
=
input
.
indices
...
...
@@ -143,29 +145,58 @@ class SparseConvolution(SparseModule):
out_spatial_shape
=
spatial_shape
# input.update_grid(out_spatial_shape)
# t = time.time()
out_tensor
=
input
.
shadow_copy
()
if
input
.
benchmark
:
if
self
.
name
is
None
:
raise
ValueError
(
"you need to assign name to spmodules before benchmark (spconv.utils.bench.assign_name_to_spmod)"
)
if
self
.
name
not
in
input
.
benchmark_record
:
input
.
benchmark_record
[
self
.
name
]
=
{
"type"
:
"SparseConvolution"
,
"indice_gen_time"
:
[],
"time"
:
[],
"num_points"
:
[],
"num_out_points"
:
[],
"params"
:
{
"kernel_size"
:
self
.
kernel_size
,
"stride"
:
self
.
stride
,
"padding"
:
self
.
padding
,
"dilation"
:
self
.
dilation
,
"output_padding"
:
self
.
output_padding
,
"subm"
:
self
.
subm
,
"transposed"
:
self
.
transposed
,
"input_channels"
:
self
.
in_channels
,
"out_channels"
:
self
.
out_channels
,
}
}
if
self
.
conv1x1
:
features
=
torch
.
mm
(
input
.
features
,
self
.
weight
.
view
(
self
.
in_channels
,
self
.
out_channels
))
if
self
.
bias
is
not
None
:
features
+=
self
.
bias
out_tensor
=
spconv
.
SparseConvTensor
(
features
,
input
.
indices
,
input
.
spatial_shape
,
input
.
batch_size
)
out_tensor
.
indice_dict
=
input
.
indice_dict
out_tensor
.
grid
=
input
.
grid
out_tensor
.
features
=
features
return
out_tensor
datas
=
input
.
find_indice_pair
(
self
.
indice_key
)
if
self
.
inverse
:
assert
datas
is
not
None
and
self
.
indice_key
is
not
None
_
,
outids
,
indice_pairs
,
indice_pair_num
,
out_spatial_shape
=
datas
outids
=
datas
.
indices
indice_pairs
=
datas
.
indice_pairs
indice_pair_num
=
datas
.
indice_pair_num
out_spatial_shape
=
datas
.
out_spatial_shape
assert
indice_pair_num
.
shape
[
0
]
==
np
.
prod
(
self
.
kernel_size
),
"inverse conv must have same kernel size as its couple conv"
else
:
if
self
.
indice_key
is
not
None
and
datas
is
not
None
:
outids
,
_
,
indice_pairs
,
indice_pair_num
,
_
=
datas
outids
=
datas
.
out_indices
indice_pairs
=
datas
.
indice_pairs
indice_pair_num
=
datas
.
indice_pair_num
else
:
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
outids
,
indice_pairs
,
indice_pair_num
=
ops
.
get_indice_pairs
(
indices
,
batch_size
,
...
...
@@ -179,10 +210,19 @@ class SparseConvolution(SparseModule):
self
.
transposed
,
grid
=
input
.
grid
,
use_hash
=
self
.
use_hash
)
input
.
indice_dict
[
self
.
indice_key
]
=
(
outids
,
indices
,
indice_pairs
,
indice_pair_num
,
spatial_shape
)
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
interval
=
time
.
time
()
-
t
out_tensor
.
benchmark_record
[
self
.
name
][
"indice_gen_time"
].
append
(
interval
)
indice_data
=
IndiceData
(
outids
,
indices
,
indice_pairs
,
indice_pair_num
,
spatial_shape
)
input
.
indice_dict
[
self
.
indice_key
]
=
indice_data
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
if
self
.
fused_bn
:
assert
self
.
bias
is
not
None
out_features
=
ops
.
fused_indice_conv
(
features
,
self
.
weight
,
...
...
@@ -210,10 +250,18 @@ class SparseConvolution(SparseModule):
if
self
.
bias
is
not
None
:
out_features
+=
self
.
bias
out_tensor
=
spconv
.
SparseConvTensor
(
out_features
,
outids
,
out_spatial_shape
,
batch_size
)
out_tensor
.
indice_dict
=
input
.
indice_dict
out_tensor
.
grid
=
input
.
grid
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
interval
=
time
.
time
()
-
t
out_tensor
.
benchmark_record
[
self
.
name
][
"time"
].
append
(
interval
)
out_tensor
.
benchmark_record
[
self
.
name
][
"num_points"
].
append
(
features
.
shape
[
0
])
out_tensor
.
benchmark_record
[
self
.
name
][
"num_out_points"
].
append
(
out_features
.
shape
[
0
])
out_tensor
.
features
=
out_features
out_tensor
.
indices
=
outids
out_tensor
.
spatial_shape
=
out_spatial_shape
return
out_tensor
...
...
@@ -229,7 +277,8 @@ class SparseConv2d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConv2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
...
...
@@ -241,7 +290,8 @@ class SparseConv2d(SparseConvolution):
bias
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConv3d
(
SparseConvolution
):
...
...
@@ -256,7 +306,8 @@ class SparseConv3d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConv3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
...
...
@@ -268,7 +319,8 @@ class SparseConv3d(SparseConvolution):
bias
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConv4d
(
SparseConvolution
):
...
...
@@ -283,7 +335,8 @@ class SparseConv4d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConv4d
,
self
).
__init__
(
4
,
in_channels
,
out_channels
,
...
...
@@ -295,7 +348,8 @@ class SparseConv4d(SparseConvolution):
bias
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConvTranspose2d
(
SparseConvolution
):
...
...
@@ -310,7 +364,8 @@ class SparseConvTranspose2d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvTranspose2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
...
...
@@ -323,7 +378,8 @@ class SparseConvTranspose2d(SparseConvolution):
transposed
=
True
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseConvTranspose3d
(
SparseConvolution
):
...
...
@@ -338,7 +394,8 @@ class SparseConvTranspose3d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseConvTranspose3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
...
...
@@ -351,7 +408,8 @@ class SparseConvTranspose3d(SparseConvolution):
transposed
=
True
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseInverseConv2d
(
SparseConvolution
):
...
...
@@ -361,7 +419,8 @@ class SparseInverseConv2d(SparseConvolution):
kernel_size
,
indice_key
,
bias
=
True
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseInverseConv2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
...
...
@@ -369,7 +428,8 @@ class SparseInverseConv2d(SparseConvolution):
bias
=
bias
,
inverse
=
True
,
indice_key
=
indice_key
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SparseInverseConv3d
(
SparseConvolution
):
...
...
@@ -379,7 +439,8 @@ class SparseInverseConv3d(SparseConvolution):
kernel_size
,
indice_key
,
bias
=
True
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SparseInverseConv3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
...
...
@@ -387,7 +448,8 @@ class SparseInverseConv3d(SparseConvolution):
bias
=
bias
,
inverse
=
True
,
indice_key
=
indice_key
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SubMConv2d
(
SparseConvolution
):
...
...
@@ -402,7 +464,8 @@ class SubMConv2d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SubMConv2d
,
self
).
__init__
(
2
,
in_channels
,
out_channels
,
...
...
@@ -415,7 +478,8 @@ class SubMConv2d(SparseConvolution):
True
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SubMConv3d
(
SparseConvolution
):
...
...
@@ -430,7 +494,8 @@ class SubMConv3d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SubMConv3d
,
self
).
__init__
(
3
,
in_channels
,
out_channels
,
...
...
@@ -443,7 +508,8 @@ class SubMConv3d(SparseConvolution):
True
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
class
SubMConv4d
(
SparseConvolution
):
...
...
@@ -458,7 +524,8 @@ class SubMConv4d(SparseConvolution):
bias
=
True
,
indice_key
=
None
,
use_hash
=
False
,
algo
=
ops
.
ConvAlgo
.
Native
):
algo
=
ops
.
ConvAlgo
.
Native
,
name
=
None
):
super
(
SubMConv4d
,
self
).
__init__
(
4
,
in_channels
,
out_channels
,
...
...
@@ -471,4 +538,5 @@ class SubMConv4d(SparseConvolution):
True
,
indice_key
=
indice_key
,
use_hash
=
use_hash
,
algo
=
algo
)
algo
=
algo
,
name
=
name
)
spconv/core.py
0 → 100644
View file @
3517290c
from
typing
import
Optional
import
numpy
as
np
import
torch
class
IndiceData
(
object
):
def
__init__
(
self
,
out_indices
,
indices
,
indice_pairs
,
indice_pair_num
,
out_spatial_shape
):
self
.
out_indices
=
out_indices
self
.
indices
=
indices
self
.
indice_pairs
=
indice_pairs
self
.
indice_pair_num
=
indice_pair_num
self
.
out_spatial_shape
=
out_spatial_shape
def
scatter_nd
(
indices
,
updates
,
shape
):
"""pytorch edition of tensorflow scatter_nd.
this function don't contain except handle code. so use this carefully
when indice repeats, don't support repeat add which is supported
in tensorflow.
"""
ret
=
torch
.
zeros
(
*
shape
,
dtype
=
updates
.
dtype
,
device
=
updates
.
device
)
ndim
=
indices
.
shape
[
-
1
]
output_shape
=
list
(
indices
.
shape
[:
-
1
])
+
shape
[
indices
.
shape
[
-
1
]:]
flatted_indices
=
indices
.
view
(
-
1
,
ndim
)
slices
=
[
flatted_indices
[:,
i
]
for
i
in
range
(
ndim
)]
slices
+=
[
Ellipsis
]
ret
[
slices
]
=
updates
.
view
(
*
output_shape
)
return
ret
class
SparseConvTensor
(
object
):
def
__init__
(
self
,
features
,
indices
,
spatial_shape
,
batch_size
,
grid
=
None
,
benchmark
=
False
):
"""
Args:
features: [num_points, num_features] feature tensor
indices: [num_points, ndim + 1] indice tensor. batch index saved in indices[:, 0]
spatial_shape: spatial shape of your sparse data
batch_size: batch size of your sparse data
grid: pre-allocated grid tensor. should be used when the volume of spatial shape
is very large.
benchmark: whether to enable benchmark. if enabled, all sparse operators will be record to
SparseConvTensor.
"""
self
.
features
=
features
self
.
indices
=
indices
self
.
spatial_shape
=
spatial_shape
self
.
batch_size
=
batch_size
self
.
indice_dict
=
{}
if
grid
is
None
:
grid
=
torch
.
Tensor
()
# empty tensor
self
.
grid
=
grid
self
.
benchmark
=
benchmark
self
.
benchmark_record
=
{}
@
classmethod
def
from_dense
(
cls
,
x
:
torch
.
Tensor
):
"""create sparse tensor fron channel last dense tensor by to_sparse
x must be NHWC tensor, channel last
"""
x
=
x
.
to_sparse
(
x
.
ndim
-
1
)
spatial_shape
=
x
.
shape
[
1
:
-
1
]
batch_size
=
x
.
shape
[
0
]
indices_th
=
x
.
indices
().
permute
(
1
,
0
).
contiguous
().
int
()
features_th
=
x
.
values
()
return
cls
(
features_th
,
indices_th
,
spatial_shape
,
batch_size
)
@
property
def
spatial_size
(
self
):
return
np
.
prod
(
self
.
spatial_shape
)
def
find_indice_pair
(
self
,
key
)
->
Optional
[
IndiceData
]:
if
key
is
None
:
return
None
if
key
in
self
.
indice_dict
:
return
self
.
indice_dict
[
key
]
return
None
def
dense
(
self
,
channels_first
=
True
):
output_shape
=
[
self
.
batch_size
]
+
list
(
self
.
spatial_shape
)
+
[
self
.
features
.
shape
[
1
]]
res
=
scatter_nd
(
self
.
indices
.
to
(
self
.
features
.
device
).
long
(),
self
.
features
,
output_shape
)
if
not
channels_first
:
return
res
ndim
=
len
(
self
.
spatial_shape
)
trans_params
=
list
(
range
(
0
,
ndim
+
1
))
trans_params
.
insert
(
1
,
ndim
+
1
)
return
res
.
permute
(
*
trans_params
).
contiguous
()
@
property
def
sparity
(
self
):
return
self
.
indices
.
shape
[
0
]
/
np
.
prod
(
self
.
spatial_shape
)
/
self
.
batch_size
def
shadow_copy
(
self
)
->
"SparseConvTensor"
:
"""create a new spconv tensor with all member unchanged"""
tensor
=
SparseConvTensor
(
self
.
features
,
self
.
indices
,
self
.
spatial_shape
,
self
.
batch_size
,
self
.
grid
,
self
.
benchmark
)
tensor
.
benchmark_record
=
self
.
benchmark_record
tensor
.
indice_dict
=
self
.
indice_dict
return
tensor
spconv/modules.py
View file @
3517290c
...
...
@@ -49,7 +49,9 @@ def _mean_update(vals, m_vals, t):
class
SparseModule
(
nn
.
Module
):
""" place holder, all module subclass from this will take sptensor in SparseSequential.
"""
pass
def
__init__
(
self
,
name
=
None
):
super
().
__init__
()
self
.
name
=
name
class
SparseSequential
(
SparseModule
):
...
...
spconv/ops.py
View file @
3517290c
...
...
@@ -26,6 +26,7 @@ class ConvAlgo(Enum):
SparseConvNet
=
3
Minkowski
=
4
# https://github.com/StanfordVL/MinkowskiEngine/blob/master/src/convolution.cu
def
get_conv_output_size
(
input_size
,
kernel_size
,
stride
,
padding
,
dilation
):
ndim
=
len
(
input_size
)
output_size
=
[]
...
...
spconv/pool.py
View file @
3517290c
...
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
import
spconv
import
spconv.functional
as
Fsp
from
spconv
import
ops
from
spconv.core
import
IndiceData
from
spconv.modules
import
SparseModule
...
...
@@ -34,8 +35,10 @@ class SparseMaxPool(SparseModule):
stride
=
None
,
padding
=
0
,
dilation
=
1
,
subm
=
False
):
super
(
SparseMaxPool
,
self
).
__init__
()
indice_key
=
None
,
subm
=
False
,
name
=
None
):
super
(
SparseMaxPool
,
self
).
__init__
(
name
=
name
)
if
not
isinstance
(
kernel_size
,
(
list
,
tuple
)):
kernel_size
=
[
kernel_size
]
*
ndim
if
stride
is
None
:
...
...
@@ -52,6 +55,7 @@ class SparseMaxPool(SparseModule):
self
.
padding
=
padding
self
.
subm
=
subm
self
.
dilation
=
dilation
self
.
indice_key
=
indice_key
def
forward
(
self
,
input
):
assert
isinstance
(
input
,
spconv
.
SparseConvTensor
)
...
...
@@ -66,6 +70,32 @@ class SparseMaxPool(SparseModule):
self
.
dilation
)
else
:
out_spatial_shape
=
spatial_shape
out_tensor
=
input
.
shadow_copy
()
if
input
.
benchmark
:
if
self
.
name
is
None
:
raise
ValueError
(
"you need to assign name to spmodules before benchmark (spconv.utils.bench.assign_name_to_spmod)"
)
if
self
.
name
not
in
input
.
benchmark_record
:
input
.
benchmark_record
[
self
.
name
]
=
{
"type"
:
"SparseMaxPool"
,
"indice_gen_time"
:
[],
"time"
:
[],
"num_points"
:
[],
"num_out_points"
:
[],
"params"
:
{
"kernel_size"
:
self
.
kernel_size
,
"stride"
:
self
.
stride
,
"padding"
:
self
.
padding
,
"dilation"
:
self
.
dilation
,
"channels"
:
features
.
shape
[
1
],
}
}
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
outids
,
indice_pairs
,
indice_pairs_num
=
ops
.
get_indice_pairs
(
indices
,
batch_size
,
...
...
@@ -77,24 +107,65 @@ class SparseMaxPool(SparseModule):
0
,
self
.
subm
,
grid
=
input
.
grid
)
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
interval
=
time
.
time
()
-
t
out_tensor
.
benchmark_record
[
self
.
name
][
"indice_gen_time"
].
append
(
interval
)
t
=
time
.
time
()
if
self
.
indice_key
is
not
None
:
datas
=
input
.
find_indice_pair
(
self
.
indice_key
)
if
datas
is
None
:
indice_data
=
IndiceData
(
outids
,
indices
,
indice_pairs
,
indice_pairs_num
,
spatial_shape
)
input
.
indice_dict
[
self
.
indice_key
]
=
indice_data
else
:
raise
ValueError
(
"indice data exists"
)
out_features
=
Fsp
.
indice_maxpool
(
features
,
indice_pairs
.
to
(
device
),
indice_pairs_num
.
to
(
device
),
outids
.
shape
[
0
])
out_tensor
=
spconv
.
SparseConvTensor
(
out_features
,
outids
,
out_spatial_shape
,
batch_size
)
out_tensor
.
indice_dict
=
input
.
indice_dict
out_tensor
.
grid
=
input
.
grid
if
input
.
benchmark
:
torch
.
cuda
.
synchronize
()
interval
=
time
.
time
()
-
t
out_tensor
.
benchmark_record
[
self
.
name
][
"time"
].
append
(
interval
)
out_tensor
.
benchmark_record
[
self
.
name
][
"num_points"
].
append
(
features
.
shape
[
0
])
out_tensor
.
benchmark_record
[
self
.
name
][
"num_out_points"
].
append
(
out_features
.
shape
[
0
])
out_tensor
.
features
=
out_features
out_tensor
.
indices
=
outids
out_tensor
.
spatial_shape
=
out_spatial_shape
return
out_tensor
class
SparseMaxPool2d
(
SparseMaxPool
):
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
):
super
(
SparseMaxPool2d
,
self
).
__init__
(
2
,
kernel_size
,
stride
,
padding
,
dilation
)
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
,
name
=
None
):
super
(
SparseMaxPool2d
,
self
).
__init__
(
2
,
kernel_size
,
stride
,
padding
,
dilation
,
name
=
name
)
class
SparseMaxPool3d
(
SparseMaxPool
):
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
):
super
(
SparseMaxPool3d
,
self
).
__init__
(
3
,
kernel_size
,
stride
,
padding
,
dilation
)
def
__init__
(
self
,
kernel_size
,
stride
=
None
,
padding
=
0
,
dilation
=
1
,
name
=
None
):
super
(
SparseMaxPool3d
,
self
).
__init__
(
3
,
kernel_size
,
stride
,
padding
,
dilation
,
name
=
name
)
spconv/spatial.py
View file @
3517290c
...
...
@@ -24,6 +24,7 @@ from torch.nn.parameter import Parameter
import
spconv
from
spconv.modules
import
SparseModule
class
RemoveDuplicate
(
SparseModule
):
def
forward
(
self
,
x
:
spconv
.
SparseConvTensor
):
inds
=
x
.
indices
...
...
@@ -39,5 +40,6 @@ class RemoveDuplicate(SparseModule):
_
,
unique_inds
=
torch
.
unique
(
indices_index
)
new_inds
=
inds
[
unique_inds
]
new_features
=
x
.
features
[
unique_inds
]
res
=
spconv
.
SparseConvTensor
(
new_features
,
new_inds
,
x
.
spatial_shape
,
x
.
batch_size
,
x
.
grid
)
res
=
spconv
.
SparseConvTensor
(
new_features
,
new_inds
,
x
.
spatial_shape
,
x
.
batch_size
,
x
.
grid
)
return
res
spconv/utils/__init__.py
View file @
3517290c
...
...
@@ -294,20 +294,18 @@ class VoxelGeneratorV2:
def
grid_size
(
self
):
return
self
.
_grid_size
class
VoxelGeneratorV3
:
def
__init__
(
self
,
voxel_size
,
point_cloud_range
,
max_points
,
num_features
,
dtype
,
device
):
def
__init__
(
self
,
voxel_size
,
point_cloud_range
,
max_points
,
num_features
,
dtype
,
device
):
self
.
_max_points
=
max_points
self
.
_point_cloud_range
=
point_cloud_range
self
.
_voxel_size
=
voxel_size
self
.
_grid_size
=
torch
.
round
((
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
self
.
_grid_size
=
torch
.
round
(
(
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
grid_volume
=
self
.
_grid_size
.
prod
()
self
.
_grid_size
=
self
.
_grid_size
.
cpu
().
numpy
().
tolist
()
self
.
_ndim
=
len
(
self
.
_grid_size
)
...
...
@@ -315,19 +313,34 @@ class VoxelGeneratorV3:
self
.
_dtype
=
dtype
self
.
_device
=
device
self
.
_point_index
=
torch
.
full
([
max_points
+
1
],
grid_volume
,
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_grids
=
torch
.
zeros
([
grid_volume
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_num_points_per_grid
=
torch
.
zeros
([
grid_volume
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_voxels
=
torch
.
zeros
([
max_points
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_coors
=
torch
.
zeros
([
max_points
,
self
.
_ndim
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_point_index
=
torch
.
full
([
max_points
+
1
],
grid_volume
,
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_grids
=
torch
.
zeros
([
grid_volume
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_num_points_per_grid
=
torch
.
zeros
([
grid_volume
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_voxels
=
torch
.
zeros
([
max_points
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_coors
=
torch
.
zeros
([
max_points
,
self
.
_ndim
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
def
generate
(
self
,
points
):
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
return
self
.
points_to_voxel
(
points
)
def
generate_multi_gpu
(
self
,
points
):
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
return
self
.
points_to_voxel
(
points
)
...
...
@@ -351,23 +364,21 @@ class VoxelGeneratorV3:
coors_range: [6] list/tuple or array or tensor, float. indicate voxel range.
format: xyzxyz, minmax
"""
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
num_voxel
=
torch
.
ops
.
spconv
.
points_to_voxel
(
points
,
indexes
,
self
.
_point_index
,
self
.
_grids
,
self
.
_num_points_per_grid
,
self
.
_voxels
,
self
.
_coors
,
self
.
_grid_size
,
self
.
_ndim
)
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
num_voxel
=
torch
.
ops
.
spconv
.
points_to_voxel
(
points
,
indexes
,
self
.
_point_index
,
self
.
_grids
,
self
.
_num_points_per_grid
,
self
.
_voxels
,
self
.
_coors
,
self
.
_grid_size
,
self
.
_ndim
)
voxels
=
self
.
_voxels
[:
num_voxel
,
:]
coors
=
self
.
_coors
[:
num_voxel
,
:]
# xyz --> zyx
#coors = coors[::-1]
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
([
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
(
[
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
coors
=
torch
.
cat
([
z
,
y
,
x
],
dim
=
1
)
# can be skipped
# x, y, z, f = voxels[:, 0].reshape([-1, 1]), voxels[:, 1].reshape([-1, 1]), voxels[:, 2].reshape([-1, 1]), voxels[:, 3:]
# voxels = torch.cat([z, y, x, f], dim=1)
# x, y, z, f = voxels[:, 0].reshape([-1, 1]), voxels[:, 1].reshape([-1, 1]), voxels[:, 2].reshape([-1, 1]), voxels[:, 3:]
# voxels = torch.cat([z, y, x, f], dim=1)
return
voxels
,
coors
src/spconv/fused_conv.cu
View file @
3517290c
...
...
@@ -15,8 +15,8 @@
#include <ATen/ATen.h>
#include <spconv/fused_conv.cu.h>
#include <spconv/fused_conv.h>
#include <tensorview/torch_utils.h>
#include <spconv/minkowski.cu.h>
#include <tensorview/torch_utils.h>
namespace
spconv
{
void
fused_conv_cuda
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
...
...
@@ -81,10 +81,9 @@ void fused_conv_cuda_minkowski(torch::Tensor output, torch::Tensor features,
int
step
=
(
nHot
+
num_div
-
1
)
/
num_div
;
dim3
threads
(
shared_mem_size
,
shared_mem_size
);
tv
::
dispatch_torch
<
float
>
(
dtype
,
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
){
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
)
{
constexpr
int
ShmemSize
=
decltype
(
ShSizeValue
)
::
value
;
for
(
int
s
=
0
;
s
<
num_div
;
s
++
)
{
int
remainder
=
nHot
-
step
*
s
;
...
...
@@ -93,16 +92,18 @@ void fused_conv_cuda_minkowski(torch::Tensor output, torch::Tensor features,
(
curr_num_active
+
threads
.
y
-
1
)
/
threads
.
y
);
matmul
<
T
,
int32_t
,
ShmemSize
><<<
grid
,
threads
,
0
,
stream
>>>
(
features
.
data_ptr
<
T
>
(),
in_nchannel
,
curr_num_active
,
filters
.
data_ptr
<
T
>
(),
out_nchannel
,
in_nchannel
,
output
.
data_ptr
<
T
>
(),
indicesIn
.
data_ptr
<
int32_t
>
(),
filters
.
data_ptr
<
T
>
(),
out_nchannel
,
in_nchannel
,
output
.
data_ptr
<
T
>
(),
indicesIn
.
data_ptr
<
int32_t
>
(),
indicesOut
.
data_ptr
<
int32_t
>
());
}
});
});
}
void
fused_conv_backward_cuda_minkowski
(
torch
::
Tensor
features
,
torch
::
Tensor
din
,
torch
::
Tensor
dout
,
torch
::
Tensor
filters
,
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
void
fused_conv_backward_cuda_minkowski
(
torch
::
Tensor
features
,
torch
::
Tensor
din
,
torch
::
Tensor
dout
,
torch
::
Tensor
filters
,
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
)
{
auto
dtype
=
features
.
scalar_type
();
auto
in_nchannel
=
features
.
size
(
1
);
...
...
@@ -131,7 +132,7 @@ void fused_conv_backward_cuda_minkowski(torch::Tensor features, torch::Tensor di
tv
::
dispatch_torch
<
float
>
(
dtype
,
[
&
](
auto
I
)
{
using
T
=
decltype
(
I
);
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
){
tv
::
DispatchInt
<
shmem_sizes_t
>
()(
shared_mem_size
,
[
&
](
auto
ShSizeValue
)
{
constexpr
int
ShmemSize
=
decltype
(
ShSizeValue
)
::
value
;
for
(
int
s
=
0
;
s
<
num_div
;
s
++
)
{
int
remainder
=
nHot
-
step
*
s
;
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment