Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
c328c870
Commit
c328c870
authored
Jul 03, 2020
by
xmyqsh
Browse files
init GPU Voxelization
parent
cfaa1a3a
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
418 additions
and
2 deletions
+418
-2
include/spconv/point2voxel.cu.h
include/spconv/point2voxel.cu.h
+58
-0
include/spconv/point2voxel_ops.h
include/spconv/point2voxel_ops.h
+30
-0
include/spconv/points2voxels.h
include/spconv/points2voxels.h
+25
-0
spconv/ops.py
spconv/ops.py
+33
-0
spconv/utils/__init__.py
spconv/utils/__init__.py
+32
-0
src/spconv/CMakeLists.txt
src/spconv/CMakeLists.txt
+2
-2
src/spconv/all.cc
src/spconv/all.cc
+2
-0
src/spconv/point2voxel.cu
src/spconv/point2voxel.cu
+82
-0
src/spconv/point2voxel_ops.cc
src/spconv/point2voxel_ops.cc
+55
-0
test/benchmark_points_to_voxel_gpu.py
test/benchmark_points_to_voxel_gpu.py
+99
-0
No files found.
include/spconv/point2voxel.cu.h
0 → 100644
View file @
c328c870
#pragma once
#include <tensorview/kernel_utils.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace
spconv
{
template
<
typename
Index
,
unsigned
NDim
>
__global__
void
scatterPointToGridKernel
(
tv
::
TensorView
<
const
float
>
points
,
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndexUnique
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
Index
index
;
int
numPoints
=
points
.
dim
(
0
);
int
numFeatures
=
points
.
dim
(
1
);
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numPoints
))
{
// slow here, atomic Add + random access
// Use ILP to speed up it
index
=
tv
::
ArrayIndexRowMajor
<
NDim
,
NDim
>::
runPtrs
(
indexes
.
data
()
+
ix
*
NDim
,
gridShape
.
data
(),
0
);
pointIndexUnique
(
ix
)
=
index
;
atomicAdd
(
numPointsPerGrid
.
data
()
+
index
,
Index
(
1
));
#pragma unroll
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
atomicAdd
(
grids
.
data
()
+
index
*
numFeatures
+
k
,
*
(
points
.
data
()
+
ix
*
numFeatures
+
k
));
}
}
}
template
<
typename
Index
,
unsigned
NDim
>
__global__
void
gatherPointFromGridKernel
(
tv
::
TensorView
<
const
float
>
grids
,
tv
::
TensorView
<
const
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
const
Index
>
pointIndexUnique
,
tv
::
TensorView
<
float
>
voxels
,
tv
::
TensorView
<
Index
>
coors
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
Index
index
;
int
numVoxels
=
voxels
.
dim
(
0
);
int
numFeatures
=
grids
.
dim
(
1
);
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numVoxels
))
{
// slow here, random access
// Use ILP to speed up it
index
=
pointIndexUnique
(
ix
);
#pragma unroll
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
voxels
(
ix
,
k
)
=
grids
(
index
,
k
)
/
numPointsPerGrid
(
index
);
}
index
=
tv
::
rowArrayIdxInv
<
Index
,
NDim
>
(
index
,
coors
.
data
()
+
ix
*
NDim
,
gridShape
.
data
());
}
}
}
// namespace spconv
include/spconv/point2voxel_ops.h
0 → 100644
View file @
c328c870
// Copyright 2020 xmyqsh
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <spconv/points2voxels.h>
#include <tensorview/torch_utils.h>
#include <torch/script.h>
#include <utility/timer.h>
namespace
spconv
{
std
::
vector
<
torch
::
Tensor
>
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
,
const
int64_t
gridVolume
);
}
// namespace spconv
include/spconv/points2voxels.h
0 → 100644
View file @
c328c870
#pragma once
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace
spconv
{
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndexUnique
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
}
// namespace spconv
spconv/ops.py
View file @
c328c870
...
...
@@ -157,3 +157,36 @@ def pillar_scatter(features, coors, shape):
return
torch
.
ops
.
spconv
.
pillar_scatter_half
(
features
,
coors
,
shape
)
else
:
raise
NotImplementedError
def
points_to_voxel
(
points
,
voxel_size
,
coors_range
):
"""
points: [N, ndim] float tensor. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size: [3] list/tuple or array or tensor, float. xyz, indicate voxel size
coors_range: [6] list/tuple or array or tensor, float. indicate voxel range.
format: xyzxyz, minmax
"""
if
not
isinstance
(
voxel_size
,
torch
.
Tensor
):
if
not
isinstance
(
voxel_size
,
np
.
ndarray
):
voxel_size
=
np
.
array
(
voxel_size
,
dtype
=
points
.
dtype
)
voxel_size
=
torch
.
from_numpy
(
voxel_size
).
to
(
points
.
device
)
if
not
isinstance
(
voxel_size
,
torch
.
Tensor
):
if
not
isinstance
(
coors_range
,
np
.
ndarray
):
coors_range
=
np
.
array
(
coors_range
,
dtype
=
points
.
dtype
)
coors_range
=
torch
.
from_numpy
(
coors_range
).
to
(
points
.
device
)
grid_shape
=
torch
.
round
((
coors_range
[
3
:]
-
coors_range
[:
3
])
/
voxel_size
).
to
(
torch
.
int32
)
grid_volume
=
grid_shape
.
prod
()
ndim
=
grid_shape
.
shape
[
0
]
# indexes = torch.round((points[:, :3] - coors_range[:3]) / voxel_size).to(torch.int32)
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
coors_range
[:
3
])
/
voxel_size
).
to
(
torch
.
int32
)
voxels
,
coors
=
torch
.
ops
.
spconv
.
points_to_voxel
(
points
,
indexes
,
grid_shape
.
cpu
().
numpy
().
tolist
(),
ndim
,
grid_volume
.
item
())
# xyz --> zyx
#coors = coors[::-1]
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
([
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
coors
=
torch
.
cat
([
z
,
y
,
x
],
dim
=
1
)
# can be skipped
x
,
y
,
z
,
f
=
voxels
[:,
0
].
reshape
([
-
1
,
1
]),
voxels
[:,
1
].
reshape
([
-
1
,
1
]),
voxels
[:,
2
].
reshape
([
-
1
,
1
]),
voxels
[:,
3
:]
voxels
=
torch
.
cat
([
z
,
y
,
x
,
f
],
dim
=
1
)
return
voxels
,
coors
spconv/utils/__init__.py
View file @
c328c870
...
...
@@ -13,8 +13,10 @@
# limitations under the License.
import
numpy
as
np
import
torch
from
spconv
import
spconv_utils
from
spconv.ops
import
points_to_voxel
as
points_to_voxel3
from
spconv.spconv_utils
import
(
non_max_suppression_cpu
,
points_to_voxel_3d_np
,
points_to_voxel_3d_np_mean
,
...
...
@@ -292,3 +294,33 @@ class VoxelGeneratorV2:
@
property
def
grid_size
(
self
):
return
self
.
_grid_size
class
VoxelGeneratorV3
:
def
__init__
(
self
,
voxel_size
,
point_cloud_range
):
self
.
_point_cloud_range
=
point_cloud_range
self
.
_voxel_size
=
voxel_size
self
.
_grid_size
=
torch
.
round
((
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
self
.
_grid_size
=
self
.
_grid_size
.
cpu
().
numpy
().
tolist
()
def
generate
(
self
,
points
):
res
=
points_to_voxel3
(
points
,
self
.
_voxel_size
,
self
.
_point_cloud_range
)
return
res
def
generate_multi_gpu
(
self
,
points
,
max_voxels
=
None
):
res
=
points_to_voxel3
(
points
,
self
.
_voxel_size
,
self
.
_point_cloud_range
)
return
res
@
property
def
voxel_size
(
self
):
return
self
.
_voxel_size
@
property
def
point_cloud_range
(
self
):
return
self
.
_point_cloud_range
@
property
def
grid_size
(
self
):
return
self
.
_grid_size
src/spconv/CMakeLists.txt
View file @
c328c870
set
(
ALL_FILES all.cc indice.cc reordering.cc maxpool.cc nms.cc spconv_ops.cc pool_ops.cc
)
set
(
ALL_FILES all.cc indice.cc reordering.cc maxpool.cc nms.cc spconv_ops.cc pool_ops.cc
point2voxel_ops.cc
)
if
(
SPCONV_BuildCUDA
)
set
(
ALL_FILES
${
ALL_FILES
}
indice.cu reordering.cu maxpool.cu pillar_scatter.cu cublas_gemm.cc fused_conv.cu
)
set
(
ALL_FILES
${
ALL_FILES
}
indice.cu reordering.cu maxpool.cu pillar_scatter.cu cublas_gemm.cc
point2voxel.cu
fused_conv.cu
)
endif
()
add_library
(
spconv SHARED
${
ALL_FILES
}
)
...
...
src/spconv/all.cc
View file @
c328c870
...
...
@@ -15,12 +15,14 @@
#include <spconv/fused_spconv_ops.h>
#include <spconv/nms_ops.h>
#include <spconv/pillar_scatter_ops.h>
#include <spconv/point2voxel_ops.h>
#include <spconv/pool_ops.h>
#include <spconv/spconv_ops.h>
#include <torch/script.h>
static
auto
registry
=
torch
::
RegisterOperators
()
.
op
(
"spconv::points_to_voxel"
,
&
spconv
::
pointsToVoxel
)
.
op
(
"spconv::get_indice_pairs"
,
&
spconv
::
getIndicePairs
)
.
op
(
"spconv::indice_conv"
,
&
spconv
::
indiceConv
)
.
op
(
"spconv::indice_conv_backward"
,
&
spconv
::
indiceConvBackward
)
...
...
src/spconv/point2voxel.cu
0 → 100644
View file @
c328c870
#include <ATen/ATen.h>
#include <spconv/point2voxel.cu.h>
//#include <spconv/point2voxel.h>
#include <tensorview/cuda_utils.h>
#include <tensorview/mp_helper.h>
#include <tensorview/tensor.h>
#include <tensorview/tensorview.h>
#include <tensorview/torch_utils.h>
namespace
spconv
{
void
scatter_point_to_grid_cuda
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndexUnique
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
num_points
=
points
.
size
(
0
);
auto
num_features
=
points
.
size
(
1
);
tv
::
dispatch_torch
<
int32_t
>
(
pointIndexUnique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
using
Index
=
decltype
(
IndexValue
);
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
scatterPointToGridKernel
<
Index
,
NDim
>
<<<
tv
::
cuda
::
getBlocks
(
num_points
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
points
),
tv
::
torch2tv
<
Index
>
(
indexes
),
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
),
gs
);
TV_CHECK_CUDA_ERR_V2
(
"scatterPointToGridKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr
,
scatterPointToGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"scatterPointToGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr
.
numRegs
);
#endif
});
});
}
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
num_voxel
=
voxels
.
size
(
0
);
tv
::
dispatch_torch
<
int32_t
>
(
pointIndexUnique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
using
Index
=
decltype
(
IndexValue
);
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
gatherPointFromGridKernel
<
Index
,
NDim
>
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
),
tv
::
torch2tv
<
float
>
(
voxels
),
tv
::
torch2tv
<
Index
>
(
coors
),
gs
);
TV_CHECK_CUDA_ERR_V2
(
"gatherPointFromGridKernel failed"
);
cudaFuncAttributes
attr2
;
#ifdef TV_LOG_KERNEL_INFO
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr2
,
gatherPointFromGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"gatherPointFromGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr2
.
numRegs
);
#endif
});
});
}
}
// namespace spconv
src/spconv/point2voxel_ops.cc
0 → 100644
View file @
c328c870
#include <spconv/point2voxel_ops.h>
//#include <spconv/point2voxel.cu.h>
namespace
spconv
{
std
::
vector
<
torch
::
Tensor
>
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
,
const
int64_t
gridVolume
)
{
auto
device
=
points
.
device
().
type
();
auto
num_points
=
points
.
size
(
0
);
auto
num_features
=
points
.
size
(
1
);
auto
pointIndexUnique
=
torch
::
full
(
{
num_points
+
1
},
std
::
numeric_limits
<
int
>::
max
(),
torch
::
dtype
(
torch
::
kInt32
).
device
(
points
.
device
()));
auto
grids
=
torch
::
zeros
({
gridVolume
,
num_features
},
torch
::
dtype
(
points
.
dtype
()).
device
(
points
.
device
()));
auto
numPointsPerGrid
=
torch
::
zeros
({
gridVolume
},
torch
::
dtype
(
torch
::
kInt32
).
device
(
points
.
device
()));
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
}
#ifdef TV_CUDA
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
scatter_point_to_grid_cuda
(
points
,
indexes
,
grids
,
numPointsPerGrid
,
pointIndexUnique
,
gridShape
,
ndim
);
}
#endif
else
{
TV_THROW_INVALID_ARG
(
"unknown device type"
);
}
auto
res
=
torch
::
_unique
(
pointIndexUnique
);
pointIndexUnique
=
std
::
get
<
0
>
(
res
);
auto
num_voxel
=
pointIndexUnique
.
size
(
0
)
-
1
;
auto
voxels
=
torch
::
zeros
({
num_voxel
,
num_features
},
torch
::
dtype
(
points
.
dtype
()).
device
(
points
.
device
()));
auto
coors
=
torch
::
zeros
({
num_voxel
,
ndim
},
torch
::
dtype
(
torch
::
kInt32
).
device
(
points
.
device
()));
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
}
#ifdef TV_CUDA
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
gather_point_from_grid_cuda
(
grids
,
numPointsPerGrid
,
pointIndexUnique
,
voxels
,
coors
,
gridShape
,
ndim
);
}
#endif
else
{
TV_THROW_INVALID_ARG
(
"unknown device type"
);
}
return
{
voxels
,
coors
};
}
}
// namespace spconv
test/benchmark_points_to_voxel_gpu.py
0 → 100644
View file @
c328c870
import
time
from
pathlib
import
Path
import
numpy
as
np
import
torch
from
torch
import
nn
import
spconv
from
spconv.utils
import
VoxelGeneratorV3
def
waymo_data
(
batch_size
=
1
):
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
points
=
torch
.
from_numpy
(
data
[
'pc'
]).
cuda
().
float
()
voxel_size
=
torch
.
Tensor
([
0.1
,
0.1
,
0.1
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
coors_range
=
torch
.
Tensor
([
-
80
,
-
80
,
-
2
,
80
,
80
,
6
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
gen
=
VoxelGeneratorV3
(
voxel_size
,
coors_range
)
voxels
,
coors
=
gen
.
generate
(
points
)
N
=
coors
.
shape
[
0
]
batch_id
=
torch
.
zeros
([
N
,
1
],
dtype
=
coors
.
dtype
,
device
=
coors
.
device
)
coors
=
torch
.
cat
([
batch_id
,
coors
],
dim
=
1
)
return
voxels
,
coors
,
gen
.
grid_size
class
Net
(
nn
.
Module
):
def
__init__
(
self
,
shape
,
algo
,
device
):
super
().
__init__
()
self
.
device
=
device
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# nn.BatchNorm1d(32),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
# nn.BatchNorm1d(64),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
),
spconv
.
SubMConv3d
(
224
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
256
,
256
,
3
,
bias
=
False
,
indice_key
=
"c6"
,
algo
=
algo
),
)
max_batch_size
=
1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
self
.
grid
=
torch
.
full
([
max_batch_size
,
*
shape
],
-
1
,
dtype
=
torch
.
int32
,
device
=
self
.
device
)
# self.grid = None
self
.
shape
=
shape
def
forward
(
self
,
features
,
coors
,
batch_size
):
x
=
spconv
.
SparseConvTensor
(
features
,
coors
,
self
.
shape
,
batch_size
,
self
.
grid
)
return
self
.
net
(
x
)
def
main
():
voxels
,
coors
,
spatial_shape
=
waymo_data
()
voxels_th
,
coors_th
=
voxels
,
coors
algo
=
spconv
.
ConvAlgo
.
Native
net
=
Net
(
spatial_shape
[::
-
1
],
algo
,
voxels_th
.
device
).
cuda
(
device
=
voxels_th
.
device
).
eval
().
float
()
print
(
coors_th
.
shape
)
out
=
net
(
voxels_th
,
coors_th
,
1
)
print
(
out
.
spatial_shape
)
times
=
[]
with
torch
.
no_grad
():
for
i
in
range
(
20
):
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
out
=
net
(
voxels_th
,
coors_th
,
1
)
torch
.
cuda
.
synchronize
()
times
.
append
(
time
.
time
()
-
t
)
# print((net.grid == -1).float().sum(), net.grid.numel())
# print("spconv time", time.time() - t)
print
(
"spconv time"
,
np
.
mean
(
times
[
10
:]))
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment