Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
3517290c
Commit
3517290c
authored
Jul 09, 2020
by
yanyan
Browse files
format code, add benchmark per layer
parent
540a2209
Changes
29
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
470 additions
and
137 deletions
+470
-137
src/spconv/point2voxel.cu
src/spconv/point2voxel.cu
+63
-67
src/spconv/point2voxel_ops.cc
src/spconv/point2voxel_ops.cc
+10
-14
src/spconv/spconv_ops.cc
src/spconv/spconv_ops.cc
+11
-12
test/benchmark.py
test/benchmark.py
+80
-14
test/benchmark_detail.py
test/benchmark_detail.py
+199
-0
test/benchmark_points_to_voxel.py
test/benchmark_points_to_voxel.py
+15
-9
test/benchmark_points_to_voxel_gpu.py
test/benchmark_points_to_voxel_gpu.py
+90
-19
third_party/cutlass
third_party/cutlass
+1
-1
third_party/mp11
third_party/mp11
+1
-1
No files found.
src/spconv/point2voxel.cu
View file @
3517290c
...
...
@@ -10,14 +10,12 @@
namespace
spconv
{
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
num_points
=
points
.
size
(
0
);
auto
num_features
=
points
.
size
(
1
);
...
...
@@ -27,86 +25,84 @@ void scatter_point_to_grid_cuda(
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
scatterPointToGridKernel
<
Index
,
NDim
>
<<<
tv
::
cuda
::
getBlocks
(
num_points
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
points
),
tv
::
torch2tv
<
Index
>
(
indexes
),
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndex
),
gs
);
<<<
tv
::
cuda
::
getBlocks
(
num_points
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
points
),
tv
::
torch2tv
<
Index
>
(
indexes
),
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndex
),
gs
);
TV_CHECK_CUDA_ERR_V2
(
"scatterPointToGridKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr
,
scatterPointToGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"scatterPointToGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr
.
numRegs
);
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr
,
scatterPointToGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"scatterPointToGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr
.
numRegs
);
#endif
});
});
}
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
num_voxel
=
voxels
.
size
(
0
);
auto
num_max_points
=
pointIndex
.
size
(
0
)
-
1
;
auto
grid_volume
=
grids
.
size
(
0
);
tv
::
dispatch_torch
<
int32_t
>
(
pointIndexUnique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
using
Index
=
decltype
(
IndexValue
);
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
tv
::
dispatch_torch
<
int32_t
>
(
pointIndexUnique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
using
Index
=
decltype
(
IndexValue
);
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
resetPointIndexKernel
<
Index
>
<<<
tv
::
cuda
::
getBlocks
(
num_max_points
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
Index
>
(
pointIndex
),
grid_volume
);
TV_CHECK_CUDA_ERR_V2
(
"resetPointIndexKernel failed"
);
resetPointIndexKernel
<
Index
>
<<<
tv
::
cuda
::
getBlocks
(
num_max_points
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
Index
>
(
pointIndex
),
grid_volume
);
TV_CHECK_CUDA_ERR_V2
(
"resetPointIndexKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr0
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr0
,
resetPointIndexKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"resetPointIndexKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr0
.
numRegs
);
cudaFuncAttributes
attr0
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr0
,
resetPointIndexKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"resetPointIndexKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr0
.
numRegs
);
#endif
gatherPointFromGridKernel
<
Index
,
NDim
>
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
),
tv
::
torch2tv
<
float
>
(
voxels
),
tv
::
torch2tv
<
Index
>
(
coors
),
gs
);
TV_CHECK_CUDA_ERR_V2
(
"gatherPointFromGridKernel failed"
);
gatherPointFromGridKernel
<
Index
,
NDim
>
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
),
tv
::
torch2tv
<
float
>
(
voxels
),
tv
::
torch2tv
<
Index
>
(
coors
),
gs
);
TV_CHECK_CUDA_ERR_V2
(
"gatherPointFromGridKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr1
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr1
,
gatherPointFromGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"gatherPointFromGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr1
.
numRegs
);
cudaFuncAttributes
attr1
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr1
,
gatherPointFromGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"gatherPointFromGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr1
.
numRegs
);
#endif
resetGridKernel
<
Index
>
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
));
TV_CHECK_CUDA_ERR_V2
(
"resetGridKernel failed"
);
resetGridKernel
<
Index
><<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
));
TV_CHECK_CUDA_ERR_V2
(
"resetGridKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr2
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr2
,
resetGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"resetGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr2
.
numRegs
);
cudaFuncAttributes
attr2
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr2
,
resetGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"resetGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr2
.
numRegs
);
#endif
});
});
});
});
}
}
// namespace spconv
src/spconv/point2voxel_ops.cc
View file @
3517290c
...
...
@@ -3,23 +3,18 @@
namespace
spconv
{
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
)
{
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
)
{
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
}
#ifdef TV_CUDA
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
scatter_point_to_grid_cuda
(
points
,
indexes
,
grids
,
numPointsPerGrid
,
pointIndex
,
gridShape
,
ndim
);
scatter_point_to_grid_cuda
(
points
,
indexes
,
grids
,
numPointsPerGrid
,
pointIndex
,
gridShape
,
ndim
);
}
#endif
else
{
...
...
@@ -33,8 +28,9 @@ pointsToVoxel(torch::Tensor points,
}
#ifdef TV_CUDA
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
gather_point_from_grid_cuda
(
grids
,
numPointsPerGrid
,
pointIndex
,
pointIndexUnique
,
voxels
,
coors
,
gridShape
,
ndim
);
gather_point_from_grid_cuda
(
grids
,
numPointsPerGrid
,
pointIndex
,
pointIndexUnique
,
voxels
,
coors
,
gridShape
,
ndim
);
}
#endif
else
{
...
...
src/spconv/spconv_ops.cc
View file @
3517290c
...
...
@@ -247,10 +247,10 @@ torch::Tensor indiceConvNative(torch::Tensor features, torch::Tensor filters,
}
template
<
int
Algo
>
torch
::
Tensor
indiceConvFused
(
torch
::
Tensor
features
,
torch
::
Tensor
filte
rs
,
torch
::
Tensor
indice
Pairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
torch
::
Tensor
indiceConvFused
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicePai
rs
,
torch
::
Tensor
indice
Num
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
auto
kernelVolume
=
indiceNum
.
size
(
0
);
// auto timer = spconv::CudaContextTimer<>();
bool
subM
=
_subM
!=
0
;
...
...
@@ -282,8 +282,9 @@ indiceConvFused(torch::Tensor features, torch::Tensor filters,
}
#ifdef TV_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
FusedConvDispatch
<
Algo
>::
fwd
(
output
,
features
,
filters
[
i
],
indicePairs
[
inverse
][
i
],
indicePairs
[
!
inverse
][
i
],
nHot
);
FusedConvDispatch
<
Algo
>::
fwd
(
output
,
features
,
filters
[
i
],
indicePairs
[
inverse
][
i
],
indicePairs
[
!
inverse
][
i
],
nHot
);
}
#endif
else
{
...
...
@@ -517,9 +518,8 @@ indiceConvBwNative(torch::Tensor features, torch::Tensor filters,
template
<
int
Algo
>
std
::
vector
<
torch
::
Tensor
>
indiceConvBwFused
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
_inverse
,
int64_t
_subM
)
{
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
_inverse
,
int64_t
_subM
)
{
auto
kernelVolume
=
indiceNum
.
size
(
0
);
bool
subM
=
_subM
!=
0
;
bool
inverse
=
_inverse
!=
0
;
...
...
@@ -557,8 +557,8 @@ indiceConvBwFused(torch::Tensor features, torch::Tensor filters,
#ifdef TV_CUDA
else
if
(
device
==
torch
::
kCUDA
)
{
FusedConvDispatch
<
Algo
>::
bwd
(
features
,
inputGrad
,
outGrad
,
filters
[
i
],
filtersGrad
[
i
],
indicePairs
[
inverse
][
i
],
indicePairs
[
!
inverse
][
i
],
nHot
);
filtersGrad
[
i
],
indicePairs
[
inverse
][
i
],
indicePairs
[
!
inverse
][
i
],
nHot
);
}
#endif
else
{
...
...
@@ -723,7 +723,6 @@ template <> struct ConvDispatch<kMinkowskiEngine> {
constexpr
static
auto
*
bwd
=
indiceConvBwFused
<
kFMinkowskiEngine
>
;
};
torch
::
Tensor
indiceConv
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
,
...
...
test/benchmark.py
View file @
3517290c
...
...
@@ -26,38 +26,104 @@ class Net(nn.Module):
def
__init__
(
self
,
shape
,
algo
):
super
().
__init__
()
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
3
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# nn.BatchNorm1d(32),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
# nn.BatchNorm1d(64),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
224
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
256
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
256
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
)
max_batch_size
=
1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
...
...
test/benchmark_detail.py
0 → 100644
View file @
3517290c
import
time
from
pathlib
import
Path
import
numpy
as
np
import
torch
from
torch
import
nn
import
spconv
from
spconv.utils
import
VoxelGeneratorV2
def
waymo_data
(
batch_size
=
1
):
gen
=
VoxelGeneratorV2
([
0.1
,
0.1
,
0.1
],
[
-
80
,
-
80
,
-
2
,
80
,
80
,
6
],
1
,
150000
)
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
pc
=
data
[
"pc"
]
data
=
gen
.
generate
(
pc
)
voxels
=
data
[
"voxels"
].
reshape
(
-
1
,
3
)
coors
=
data
[
"coordinates"
]
N
=
coors
.
shape
[
0
]
coors
=
np
.
concatenate
([
np
.
full
([
N
,
1
],
0
,
coors
.
dtype
),
coors
],
axis
=
1
)
return
voxels
,
coors
,
gen
.
grid_size
class
Net
(
nn
.
Module
):
def
__init__
(
self
,
shape
,
algo
):
super
().
__init__
()
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
,
name
=
"subm-0-0"
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
,
name
=
"subm-0-1"
),
# nn.BatchNorm1d(32),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
name
=
"pool-0"
),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
,
name
=
"subm-1-0"
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
,
name
=
"subm-1-1"
),
# nn.BatchNorm1d(64),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
name
=
"pool-1"
),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
,
name
=
"subm-2-0"
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
,
name
=
"subm-2-1"
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
name
=
"pool-2"
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
,
name
=
"subm-3-0"
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
,
name
=
"subm-3-1"
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
name
=
"pool-3"
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
,
name
=
"subm-4-0"
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
,
name
=
"subm-4-1"
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
name
=
"pool-4"
),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
,
name
=
"subm-5-0"
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
,
name
=
"subm-5-1"
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
name
=
"pool-5"
),
spconv
.
SubMConv3d
(
224
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
,
name
=
"subm-6-0"
),
spconv
.
SubMConv3d
(
256
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
,
name
=
"subm-6-1"
),
)
max_batch_size
=
1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
self
.
grid
=
torch
.
full
([
max_batch_size
,
*
shape
],
-
1
,
dtype
=
torch
.
int32
).
cuda
()
# self.grid = None
self
.
shape
=
shape
def
forward
(
self
,
features
,
coors
,
batch_size
):
x
=
spconv
.
SparseConvTensor
(
features
,
coors
,
self
.
shape
,
batch_size
,
self
.
grid
,
benchmark
=
True
)
return
self
.
net
(
x
)
def
main
():
dtype
=
torch
.
float32
voxels
,
coors
,
spatial_shape
=
waymo_data
()
voxels_th
=
torch
.
from_numpy
(
voxels
).
cuda
().
to
(
dtype
)
coors_th
=
torch
.
from_numpy
(
coors
).
cuda
().
int
()
algo
=
spconv
.
ConvAlgo
.
Minkowski
net
=
Net
(
spatial_shape
[::
-
1
],
algo
).
cuda
().
eval
().
to
(
dtype
)
print
(
coors_th
.
shape
)
out
=
net
(
voxels_th
,
coors_th
,
1
)
print
(
out
.
spatial_shape
)
times
=
[]
detail_bench
=
{}
detail_ind_gen_bench
=
{}
with
torch
.
no_grad
():
for
i
in
range
(
20
):
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
out
=
net
(
voxels_th
,
coors_th
,
1
)
for
k
,
v
in
out
.
benchmark_record
.
items
():
if
k
not
in
detail_bench
:
detail_bench
[
k
]
=
[]
detail_ind_gen_bench
[
k
]
=
[]
detail_bench
[
k
].
extend
(
v
[
"time"
])
detail_ind_gen_bench
[
k
].
extend
(
v
[
"indice_gen_time"
])
torch
.
cuda
.
synchronize
()
times
.
append
(
time
.
time
()
-
t
)
# print((net.grid == -1).float().sum(), net.grid.numel())
# print("spconv time", time.time() - t)
print
(
"spconv time"
,
np
.
mean
(
times
[
10
:]))
print
(
detail_bench
[
"subm-6-0"
])
print
(
detail_ind_gen_bench
[
"subm-6-0"
])
if
__name__
==
"__main__"
:
main
()
test/benchmark_points_to_voxel.py
View file @
3517290c
...
...
@@ -13,13 +13,17 @@ def waymo_data_gpu(batch_size=1):
print
(
'gpu with total points available per voxel'
)
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
points
=
torch
.
from_numpy
(
data
[
'pc'
]).
cuda
().
float
()
voxel_size
=
torch
.
Tensor
([
0.1
,
0.1
,
0.1
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
coors_range
=
torch
.
Tensor
([
-
80
,
-
80
,
-
2
,
80
,
80
,
6
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
gen
=
VoxelGeneratorV3
(
voxel_size
,
coors_range
,
max_points
=
200000
,
num_features
=
points
.
shape
[
1
],
dtype
=
points
.
dtype
,
device
=
points
.
device
)
voxel_size
=
torch
.
Tensor
([
0.1
,
0.1
,
0.1
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
coors_range
=
torch
.
Tensor
([
-
80
,
-
80
,
-
2
,
80
,
80
,
6
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
gen
=
VoxelGeneratorV3
(
voxel_size
,
coors_range
,
max_points
=
200000
,
num_features
=
points
.
shape
[
1
],
dtype
=
points
.
dtype
,
device
=
points
.
device
)
voxels
,
coors
=
gen
.
generate
(
points
)
times
=
[]
...
...
@@ -40,8 +44,8 @@ def waymo_data_gpu(batch_size=1):
def
waymo_data_cpu
(
max_points_per_voxel
=
1
,
batch_size
=
1
):
print
(
'cpu with %d max points per voxel'
%
max_points_per_voxel
)
gen
=
VoxelGeneratorV2
([
0.1
,
0.1
,
0.1
],
[
-
80
,
-
80
,
-
2
,
80
,
80
,
6
],
max_points_per_voxel
,
150000
)
gen
=
VoxelGeneratorV2
([
0.1
,
0.1
,
0.1
],
[
-
80
,
-
80
,
-
2
,
80
,
80
,
6
],
max_points_per_voxel
,
150000
)
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
pc
=
data
[
"pc"
]
data
=
gen
.
generate
(
pc
)
...
...
@@ -62,6 +66,7 @@ def waymo_data_cpu(max_points_per_voxel=1, batch_size=1):
coors
=
np
.
concatenate
([
np
.
full
([
N
,
1
],
0
,
coors
.
dtype
),
coors
],
axis
=
1
)
return
voxels
,
coors
,
gen
.
grid_size
def
get_index
(
coor
,
grid_size
):
index
=
coor
[
0
]
for
c
,
g
in
zip
(
coor
[
1
:],
grid_size
):
...
...
@@ -100,5 +105,6 @@ def main():
print
(
'Perfect GPU Voxelization!!!'
)
if
__name__
==
"__main__"
:
main
()
test/benchmark_points_to_voxel_gpu.py
View file @
3517290c
...
...
@@ -12,8 +12,10 @@ from spconv.utils import VoxelGeneratorV3
def
waymo_data
(
batch_size
=
1
):
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
points
=
torch
.
from_numpy
(
data
[
'pc'
]).
cuda
().
float
()
voxel_size
=
torch
.
Tensor
([
0.1
,
0.1
,
0.1
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
coors_range
=
torch
.
Tensor
([
-
80
,
-
80
,
-
2
,
80
,
80
,
6
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
voxel_size
=
torch
.
Tensor
([
0.1
,
0.1
,
0.1
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
coors_range
=
torch
.
Tensor
([
-
80
,
-
80
,
-
2
,
80
,
80
,
6
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
gen
=
VoxelGeneratorV3
(
voxel_size
,
coors_range
)
voxels
,
coors
=
gen
.
generate
(
points
)
...
...
@@ -28,43 +30,111 @@ class Net(nn.Module):
super
().
__init__
()
self
.
device
=
device
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
3
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# nn.BatchNorm1d(32),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
# nn.BatchNorm1d(64),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
224
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
256
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
256
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
)
max_batch_size
=
1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
self
.
grid
=
torch
.
full
([
max_batch_size
,
*
shape
],
-
1
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
self
.
grid
=
torch
.
full
([
max_batch_size
,
*
shape
],
-
1
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
# self.grid = None
self
.
shape
=
shape
...
...
@@ -78,7 +148,8 @@ def main():
voxels
,
coors
,
spatial_shape
=
waymo_data
()
voxels_th
,
coors_th
=
voxels
,
coors
algo
=
spconv
.
ConvAlgo
.
Native
net
=
Net
(
spatial_shape
[::
-
1
],
algo
,
voxels_th
.
device
).
cuda
(
device
=
voxels_th
.
device
).
eval
().
float
()
net
=
Net
(
spatial_shape
[::
-
1
],
algo
,
voxels_th
.
device
).
cuda
(
device
=
voxels_th
.
device
).
eval
().
float
()
print
(
coors_th
.
shape
)
out
=
net
(
voxels_th
,
coors_th
,
1
)
print
(
out
.
spatial_shape
)
...
...
cutlass
@
fd7e058d
Compare
86931fef
...
fd7e058d
Subproject commit
86931fef8538008a1a92036732b3eb7fe47b25d0
Subproject commit
fd7e058d0cb3e4bf743edc530c7778a210cb168b
mp11
@
29764aad
Compare
10ba80ac
...
29764aad
Subproject commit
10ba80acb91f138170b7a22bb86523cb07d6f942
Subproject commit
29764aad4881fde809af6a025c12012e47a55515
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment