Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
c88efea2
Commit
c88efea2
authored
Jul 04, 2020
by
xmyqsh
Browse files
pre-alloc and re-use gpu memory
9ms --> 1ms on waymo open dataset
parent
c328c870
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
223 additions
and
77 deletions
+223
-77
include/spconv/point2voxel.cu.h
include/spconv/point2voxel.cu.h
+33
-2
include/spconv/point2voxel_ops.h
include/spconv/point2voxel_ops.h
+10
-5
include/spconv/points2voxels.h
include/spconv/points2voxels.h
+2
-1
spconv/ops.py
spconv/ops.py
+0
-33
spconv/utils/__init__.py
spconv/utils/__init__.py
+54
-7
src/spconv/point2voxel.cu
src/spconv/point2voxel.cu
+35
-5
src/spconv/point2voxel_ops.cc
src/spconv/point2voxel_ops.cc
+15
-24
test/benchmark_points_to_voxel.py
test/benchmark_points_to_voxel.py
+74
-0
No files found.
include/spconv/point2voxel.cu.h
View file @
c88efea2
...
@@ -11,7 +11,7 @@ __global__ void scatterPointToGridKernel(
...
@@ -11,7 +11,7 @@ __global__ void scatterPointToGridKernel(
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
const
Index
>
indexes
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndex
Unique
,
tv
::
TensorView
<
Index
>
pointIndex
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
const
tv
::
SimpleVector
<
Index
,
NDim
>
gridShape
)
{
Index
index
;
Index
index
;
int
numPoints
=
points
.
dim
(
0
);
int
numPoints
=
points
.
dim
(
0
);
...
@@ -22,7 +22,7 @@ __global__ void scatterPointToGridKernel(
...
@@ -22,7 +22,7 @@ __global__ void scatterPointToGridKernel(
// Use ILP to speed up it
// Use ILP to speed up it
index
=
tv
::
ArrayIndexRowMajor
<
NDim
,
NDim
>::
runPtrs
(
index
=
tv
::
ArrayIndexRowMajor
<
NDim
,
NDim
>::
runPtrs
(
indexes
.
data
()
+
ix
*
NDim
,
gridShape
.
data
(),
0
);
indexes
.
data
()
+
ix
*
NDim
,
gridShape
.
data
(),
0
);
pointIndex
Unique
(
ix
)
=
index
;
pointIndex
(
ix
)
=
index
;
atomicAdd
(
numPointsPerGrid
.
data
()
+
index
,
Index
(
1
));
atomicAdd
(
numPointsPerGrid
.
data
()
+
index
,
Index
(
1
));
#pragma unroll
#pragma unroll
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
...
@@ -55,4 +55,35 @@ __global__ void gatherPointFromGridKernel(
...
@@ -55,4 +55,35 @@ __global__ void gatherPointFromGridKernel(
index
,
coors
.
data
()
+
ix
*
NDim
,
gridShape
.
data
());
index
,
coors
.
data
()
+
ix
*
NDim
,
gridShape
.
data
());
}
}
}
}
template
<
typename
Index
>
__global__
void
resetGridKernel
(
tv
::
TensorView
<
float
>
grids
,
tv
::
TensorView
<
Index
>
numPointsPerGrid
,
tv
::
TensorView
<
Index
>
pointIndexUnique
)
{
Index
index
;
int
numVoxels
=
pointIndexUnique
.
dim
(
0
)
-
1
;
int
numFeatures
=
grids
.
dim
(
1
);
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
numVoxels
))
{
// slow here, random access
// Use ILP to speed up it
index
=
pointIndexUnique
(
ix
);
#pragma unroll
for
(
int
k
=
0
;
k
!=
numFeatures
;
++
k
)
{
grids
(
index
,
k
)
=
0
;
numPointsPerGrid
(
index
)
=
0
;
}
}
}
template
<
typename
Index
>
__global__
void
resetPointIndexKernel
(
tv
::
TensorView
<
Index
>
pointIndex
,
const
Index
gridVolume
)
{
int
num_max_points
=
pointIndex
.
dim
(
0
)
-
1
;
for
(
int
ix
:
tv
::
KernelLoopX
<
int
>
(
num_max_points
))
{
pointIndex
(
ix
)
=
gridVolume
;
}
}
}
// namespace spconv
}
// namespace spconv
include/spconv/point2voxel_ops.h
View file @
c88efea2
...
@@ -21,10 +21,15 @@
...
@@ -21,10 +21,15 @@
namespace
spconv
{
namespace
spconv
{
std
::
vector
<
torch
::
Tensor
>
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
pointsToVoxel
(
torch
::
Tensor
points
,
std
::
vector
<
int64_t
>
gridShape
,
torch
::
Tensor
indexes
,
const
int64_t
ndim
,
torch
::
Tensor
pointIndex
,
const
int64_t
gridVolume
);
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
const
int64_t
ndim
);
}
// namespace spconv
}
// namespace spconv
include/spconv/points2voxels.h
View file @
c88efea2
...
@@ -11,12 +11,13 @@ void scatter_point_to_grid_cuda(
...
@@ -11,12 +11,13 @@ void scatter_point_to_grid_cuda(
torch
::
Tensor
indexes
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
Unique
,
torch
::
Tensor
pointIndex
,
std
::
vector
<
int64_t
>
gridShape
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
);
const
int
ndim
);
void
gather_point_from_grid_cuda
(
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
std
::
vector
<
int64_t
>
gridShape
,
...
...
spconv/ops.py
View file @
c88efea2
...
@@ -157,36 +157,3 @@ def pillar_scatter(features, coors, shape):
...
@@ -157,36 +157,3 @@ def pillar_scatter(features, coors, shape):
return
torch
.
ops
.
spconv
.
pillar_scatter_half
(
features
,
coors
,
shape
)
return
torch
.
ops
.
spconv
.
pillar_scatter_half
(
features
,
coors
,
shape
)
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
def
points_to_voxel
(
points
,
voxel_size
,
coors_range
):
"""
points: [N, ndim] float tensor. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size: [3] list/tuple or array or tensor, float. xyz, indicate voxel size
coors_range: [6] list/tuple or array or tensor, float. indicate voxel range.
format: xyzxyz, minmax
"""
if
not
isinstance
(
voxel_size
,
torch
.
Tensor
):
if
not
isinstance
(
voxel_size
,
np
.
ndarray
):
voxel_size
=
np
.
array
(
voxel_size
,
dtype
=
points
.
dtype
)
voxel_size
=
torch
.
from_numpy
(
voxel_size
).
to
(
points
.
device
)
if
not
isinstance
(
voxel_size
,
torch
.
Tensor
):
if
not
isinstance
(
coors_range
,
np
.
ndarray
):
coors_range
=
np
.
array
(
coors_range
,
dtype
=
points
.
dtype
)
coors_range
=
torch
.
from_numpy
(
coors_range
).
to
(
points
.
device
)
grid_shape
=
torch
.
round
((
coors_range
[
3
:]
-
coors_range
[:
3
])
/
voxel_size
).
to
(
torch
.
int32
)
grid_volume
=
grid_shape
.
prod
()
ndim
=
grid_shape
.
shape
[
0
]
# indexes = torch.round((points[:, :3] - coors_range[:3]) / voxel_size).to(torch.int32)
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
coors_range
[:
3
])
/
voxel_size
).
to
(
torch
.
int32
)
voxels
,
coors
=
torch
.
ops
.
spconv
.
points_to_voxel
(
points
,
indexes
,
grid_shape
.
cpu
().
numpy
().
tolist
(),
ndim
,
grid_volume
.
item
())
# xyz --> zyx
#coors = coors[::-1]
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
([
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
coors
=
torch
.
cat
([
z
,
y
,
x
],
dim
=
1
)
# can be skipped
x
,
y
,
z
,
f
=
voxels
[:,
0
].
reshape
([
-
1
,
1
]),
voxels
[:,
1
].
reshape
([
-
1
,
1
]),
voxels
[:,
2
].
reshape
([
-
1
,
1
]),
voxels
[:,
3
:]
voxels
=
torch
.
cat
([
z
,
y
,
x
,
f
],
dim
=
1
)
return
voxels
,
coors
spconv/utils/__init__.py
View file @
c88efea2
...
@@ -16,7 +16,6 @@ import numpy as np
...
@@ -16,7 +16,6 @@ import numpy as np
import
torch
import
torch
from
spconv
import
spconv_utils
from
spconv
import
spconv_utils
from
spconv.ops
import
points_to_voxel
as
points_to_voxel3
from
spconv.spconv_utils
import
(
non_max_suppression_cpu
,
from
spconv.spconv_utils
import
(
non_max_suppression_cpu
,
points_to_voxel_3d_np
,
points_to_voxel_3d_np
,
points_to_voxel_3d_np_mean
,
points_to_voxel_3d_np_mean
,
...
@@ -298,19 +297,39 @@ class VoxelGeneratorV2:
...
@@ -298,19 +297,39 @@ class VoxelGeneratorV2:
class
VoxelGeneratorV3
:
class
VoxelGeneratorV3
:
def
__init__
(
self
,
def
__init__
(
self
,
voxel_size
,
voxel_size
,
point_cloud_range
):
point_cloud_range
,
max_points
,
num_features
,
dtype
,
device
):
self
.
_max_points
=
max_points
self
.
_point_cloud_range
=
point_cloud_range
self
.
_point_cloud_range
=
point_cloud_range
self
.
_voxel_size
=
voxel_size
self
.
_voxel_size
=
voxel_size
self
.
_grid_size
=
torch
.
round
((
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
self
.
_grid_size
=
torch
.
round
((
self
.
_point_cloud_range
[
3
:]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
grid_volume
=
self
.
_grid_size
.
prod
()
self
.
_grid_size
=
self
.
_grid_size
.
cpu
().
numpy
().
tolist
()
self
.
_grid_size
=
self
.
_grid_size
.
cpu
().
numpy
().
tolist
()
self
.
_ndim
=
len
(
self
.
_grid_size
)
self
.
_dtype
=
dtype
self
.
_device
=
device
self
.
_point_index
=
torch
.
full
([
max_points
+
1
],
grid_volume
,
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_grids
=
torch
.
zeros
([
grid_volume
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_num_points_per_grid
=
torch
.
zeros
([
grid_volume
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
self
.
_voxels
=
torch
.
zeros
([
max_points
,
num_features
],
dtype
=
self
.
_dtype
,
device
=
self
.
_device
)
self
.
_coors
=
torch
.
zeros
([
max_points
,
self
.
_ndim
],
dtype
=
torch
.
int32
,
device
=
self
.
_device
)
def
generate
(
self
,
points
):
def
generate
(
self
,
points
):
res
=
points_to_voxel3
(
points
,
self
.
_voxel_size
,
self
.
_point_cloud_range
)
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
return
res
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
return
self
.
points_to_voxel
(
points
)
def
generate_multi_gpu
(
self
,
points
,
max_voxels
=
None
):
def
generate_multi_gpu
(
self
,
points
):
res
=
points_to_voxel3
(
points
,
self
.
_voxel_size
,
self
.
_point_cloud_range
)
assert
points
.
shape
[
0
]
<=
self
.
_max_points
,
'please enlarge max_points to not smaller than '
+
str
(
points
.
shape
[
0
])
return
res
points
.
to
(
self
.
_dtype
).
to
(
self
.
_device
)
return
self
.
points_to_voxel
(
points
)
@
property
@
property
def
voxel_size
(
self
):
def
voxel_size
(
self
):
...
@@ -324,3 +343,31 @@ class VoxelGeneratorV3:
...
@@ -324,3 +343,31 @@ class VoxelGeneratorV3:
def
grid_size
(
self
):
def
grid_size
(
self
):
return
self
.
_grid_size
return
self
.
_grid_size
def
points_to_voxel
(
self
,
points
):
"""
points: [N, ndim] float tensor. points[:, :3] contain xyz points and
points[:, 3:] contain other information such as reflectivity.
voxel_size: [3] list/tuple or array or tensor, float. xyz, indicate voxel size
coors_range: [6] list/tuple or array or tensor, float. indicate voxel range.
format: xyzxyz, minmax
"""
indexes
=
torch
.
floor
((
points
[:,
:
3
]
-
self
.
_point_cloud_range
[:
3
])
/
self
.
_voxel_size
).
to
(
torch
.
int32
)
num_voxel
=
torch
.
ops
.
spconv
.
points_to_voxel
(
points
,
indexes
,
self
.
_point_index
,
self
.
_grids
,
self
.
_num_points_per_grid
,
self
.
_voxels
,
self
.
_coors
,
self
.
_grid_size
,
self
.
_ndim
)
voxels
=
self
.
_voxels
[:
num_voxel
,
:]
coors
=
self
.
_coors
[:
num_voxel
,
:]
# xyz --> zyx
#coors = coors[::-1]
x
,
y
,
z
=
coors
[:,
0
].
reshape
([
-
1
,
1
]),
coors
[:,
1
].
reshape
([
-
1
,
1
]),
coors
[:,
2
].
reshape
([
-
1
,
1
])
coors
=
torch
.
cat
([
z
,
y
,
x
],
dim
=
1
)
# can be skipped
# x, y, z, f = voxels[:, 0].reshape([-1, 1]), voxels[:, 1].reshape([-1, 1]), voxels[:, 2].reshape([-1, 1]), voxels[:, 3:]
# voxels = torch.cat([z, y, x, f], dim=1)
return
voxels
,
coors
src/spconv/point2voxel.cu
View file @
c88efea2
...
@@ -15,13 +15,13 @@ void scatter_point_to_grid_cuda(
...
@@ -15,13 +15,13 @@ void scatter_point_to_grid_cuda(
torch
::
Tensor
indexes
,
torch
::
Tensor
indexes
,
torch
::
Tensor
grids
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
Unique
,
torch
::
Tensor
pointIndex
,
std
::
vector
<
int64_t
>
gridShape
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
const
int
ndim
)
{
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
num_points
=
points
.
size
(
0
);
auto
num_points
=
points
.
size
(
0
);
auto
num_features
=
points
.
size
(
1
);
auto
num_features
=
points
.
size
(
1
);
tv
::
dispatch_torch
<
int32_t
>
(
pointIndex
Unique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
tv
::
dispatch_torch
<
int32_t
>
(
pointIndex
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
using
Index
=
decltype
(
IndexValue
);
using
Index
=
decltype
(
IndexValue
);
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
...
@@ -32,7 +32,7 @@ void scatter_point_to_grid_cuda(
...
@@ -32,7 +32,7 @@ void scatter_point_to_grid_cuda(
tv
::
torch2tv
<
Index
>
(
indexes
),
tv
::
torch2tv
<
Index
>
(
indexes
),
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndex
Unique
),
tv
::
torch2tv
<
Index
>
(
pointIndex
),
gs
);
gs
);
TV_CHECK_CUDA_ERR_V2
(
"scatterPointToGridKernel failed"
);
TV_CHECK_CUDA_ERR_V2
(
"scatterPointToGridKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
#ifdef TV_LOG_KERNEL_INFO
...
@@ -48,17 +48,33 @@ void scatter_point_to_grid_cuda(
...
@@ -48,17 +48,33 @@ void scatter_point_to_grid_cuda(
void
gather_point_from_grid_cuda
(
void
gather_point_from_grid_cuda
(
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
grids
,
torch
::
Tensor
numPointsPerGrid
,
torch
::
Tensor
pointIndex
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
pointIndexUnique
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
torch
::
Tensor
voxels
,
torch
::
Tensor
coors
,
std
::
vector
<
int64_t
>
gridShape
,
std
::
vector
<
int64_t
>
gridShape
,
const
int
ndim
)
{
const
int
ndim
)
{
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
auto
num_voxel
=
voxels
.
size
(
0
);
auto
num_voxel
=
voxels
.
size
(
0
);
auto
num_max_points
=
pointIndex
.
size
(
0
)
-
1
;
auto
grid_volume
=
grids
.
size
(
0
);
tv
::
dispatch_torch
<
int32_t
>
(
pointIndexUnique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
tv
::
dispatch_torch
<
int32_t
>
(
pointIndexUnique
.
scalar_type
(),
[
&
](
auto
IndexValue
)
{
using
Index
=
decltype
(
IndexValue
);
using
Index
=
decltype
(
IndexValue
);
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
tv
::
dispatch_int
<
2
,
3
,
4
>
(
ndim
,
[
&
](
auto
I
)
{
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
constexpr
int
NDim
=
decltype
(
I
)
::
value
;
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
tv
::
SimpleVector
<
Index
,
NDim
>
gs
(
gridShape
.
begin
(),
gridShape
.
end
());
resetPointIndexKernel
<
Index
>
<<<
tv
::
cuda
::
getBlocks
(
num_max_points
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
Index
>
(
pointIndex
),
grid_volume
);
TV_CHECK_CUDA_ERR_V2
(
"resetPointIndexKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr0
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr0
,
resetPointIndexKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"resetPointIndexKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr0
.
numRegs
);
#endif
gatherPointFromGridKernel
<
Index
,
NDim
>
gatherPointFromGridKernel
<
Index
,
NDim
>
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
...
@@ -68,11 +84,25 @@ void gather_point_from_grid_cuda(
...
@@ -68,11 +84,25 @@ void gather_point_from_grid_cuda(
tv
::
torch2tv
<
Index
>
(
coors
),
tv
::
torch2tv
<
Index
>
(
coors
),
gs
);
gs
);
TV_CHECK_CUDA_ERR_V2
(
"gatherPointFromGridKernel failed"
);
TV_CHECK_CUDA_ERR_V2
(
"gatherPointFromGridKernel failed"
);
cudaFuncAttributes
attr2
;
#ifdef TV_LOG_KERNEL_INFO
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr1
;
checkCudaErrors
(
cudaFuncGetAttributes
(
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr
2
,
gatherPointFromGridKernel
<
Index
,
NDim
>
));
&
attr
1
,
gatherPointFromGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"gatherPointFromGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
tv
::
ssprint
(
"gatherPointFromGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr1
.
numRegs
);
#endif
resetGridKernel
<
Index
>
<<<
tv
::
cuda
::
getBlocks
(
num_voxel
),
tv
::
cuda
::
CUDA_NUM_THREADS
,
0
,
stream
>>>
(
tv
::
torch2tv
<
float
>
(
grids
),
tv
::
torch2tv
<
Index
>
(
numPointsPerGrid
),
tv
::
torch2tv
<
Index
>
(
pointIndexUnique
));
TV_CHECK_CUDA_ERR_V2
(
"resetGridKernel failed"
);
#ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes
attr2
;
checkCudaErrors
(
cudaFuncGetAttributes
(
&
attr2
,
resetGridKernel
<
Index
,
NDim
>
));
tv
::
ssprint
(
"resetGridKernel<"
,
tv
::
type_s
<
Index
>
,
NDim
,
">"
,
attr2
.
numRegs
);
attr2
.
numRegs
);
#endif
#endif
});
});
...
...
src/spconv/point2voxel_ops.cc
View file @
c88efea2
...
@@ -3,53 +3,44 @@
...
@@ -3,53 +3,44 @@
namespace
spconv
{
namespace
spconv
{
std
::
vector
<
torch
::
Tensor
>
int64_t
pointsToVoxel
(
torch
::
Tensor
points
,
torch
::
Tensor
indexes
,
pointsToVoxel
(
torch
::
Tensor
points
,
std
::
vector
<
int64_t
>
gridShape
,
torch
::
Tensor
indexes
,
const
int64_t
ndim
,
torch
::
Tensor
pointIndex
,
const
int64_t
gridVolume
)
{
torch
::
Tensor
grids
,
auto
device
=
points
.
device
().
type
();
torch
::
Tensor
numPointsPerGrid
,
auto
num_points
=
points
.
size
(
0
);
torch
::
Tensor
voxels
,
auto
num_features
=
points
.
size
(
1
);
torch
::
Tensor
coors
,
auto
pointIndexUnique
=
torch
::
full
(
std
::
vector
<
int64_t
>
gridShape
,
{
num_points
+
1
},
std
::
numeric_limits
<
int
>::
max
(),
const
int64_t
ndim
)
{
torch
::
dtype
(
torch
::
kInt32
).
device
(
points
.
device
()));
auto
grids
=
torch
::
zeros
({
gridVolume
,
num_features
},
torch
::
dtype
(
points
.
dtype
()).
device
(
points
.
device
()));
auto
numPointsPerGrid
=
torch
::
zeros
({
gridVolume
},
torch
::
dtype
(
torch
::
kInt32
).
device
(
points
.
device
()));
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
}
}
#ifdef TV_CUDA
#ifdef TV_CUDA
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
scatter_point_to_grid_cuda
(
points
,
indexes
,
grids
,
scatter_point_to_grid_cuda
(
points
,
indexes
,
grids
,
numPointsPerGrid
,
pointIndex
Unique
,
gridShape
,
ndim
);
numPointsPerGrid
,
pointIndex
,
gridShape
,
ndim
);
}
}
#endif
#endif
else
{
else
{
TV_THROW_INVALID_ARG
(
"unknown device type"
);
TV_THROW_INVALID_ARG
(
"unknown device type"
);
}
}
auto
res
=
torch
::
_unique
(
pointIndex
Unique
);
auto
res
=
torch
::
_unique
(
pointIndex
);
pointIndexUnique
=
std
::
get
<
0
>
(
res
);
auto
pointIndexUnique
=
std
::
get
<
0
>
(
res
);
auto
num_voxel
=
pointIndexUnique
.
size
(
0
)
-
1
;
auto
num_voxel
=
pointIndexUnique
.
size
(
0
)
-
1
;
auto
voxels
=
torch
::
zeros
({
num_voxel
,
num_features
},
torch
::
dtype
(
points
.
dtype
()).
device
(
points
.
device
()));
auto
coors
=
torch
::
zeros
({
num_voxel
,
ndim
},
torch
::
dtype
(
torch
::
kInt32
).
device
(
points
.
device
()));
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
if
(
points
.
device
().
type
()
==
torch
::
kCPU
)
{
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
TV_THROW_INVALID_ARG
(
"not support cpu currently"
);
}
}
#ifdef TV_CUDA
#ifdef TV_CUDA
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
else
if
(
points
.
device
().
type
()
==
torch
::
kCUDA
)
{
gather_point_from_grid_cuda
(
grids
,
numPointsPerGrid
,
gather_point_from_grid_cuda
(
grids
,
numPointsPerGrid
,
pointIndexUnique
,
voxels
,
coors
,
gridShape
,
ndim
);
pointIndex
,
pointIndexUnique
,
voxels
,
coors
,
gridShape
,
ndim
);
}
}
#endif
#endif
else
{
else
{
TV_THROW_INVALID_ARG
(
"unknown device type"
);
TV_THROW_INVALID_ARG
(
"unknown device type"
);
}
}
return
{
voxel
s
,
coors
}
;
return
num_
voxel
;
}
}
}
// namespace spconv
}
// namespace spconv
test/benchmark_points_to_voxel.py
0 → 100644
View file @
c88efea2
import
time
from
pathlib
import
Path
import
numpy
as
np
import
torch
from
torch
import
nn
import
spconv
from
spconv.utils
import
VoxelGeneratorV2
,
VoxelGeneratorV3
def
waymo_data_gpu
(
batch_size
=
1
):
print
(
'gpu with total points available per voxel'
)
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
points
=
torch
.
from_numpy
(
data
[
'pc'
]).
cuda
().
float
()
voxel_size
=
torch
.
Tensor
([
0.1
,
0.1
,
0.1
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
coors_range
=
torch
.
Tensor
([
-
80
,
-
80
,
-
2
,
80
,
80
,
6
]).
to
(
points
.
dtype
).
to
(
points
.
device
)
gen
=
VoxelGeneratorV3
(
voxel_size
,
coors_range
,
max_points
=
200000
,
num_features
=
points
.
shape
[
1
],
dtype
=
points
.
dtype
,
device
=
points
.
device
)
voxels
,
coors
=
gen
.
generate
(
points
)
times
=
[]
with
torch
.
no_grad
():
for
i
in
range
(
200
):
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
voxels
,
coors
=
gen
.
generate
(
points
)
torch
.
cuda
.
synchronize
()
times
.
append
(
time
.
time
()
-
t
)
print
(
"voxelization time"
,
np
.
mean
(
times
[
100
:]))
N
=
coors
.
shape
[
0
]
batch_id
=
torch
.
zeros
([
N
,
1
],
dtype
=
coors
.
dtype
,
device
=
coors
.
device
)
coors
=
torch
.
cat
([
batch_id
,
coors
],
dim
=
1
)
return
voxels
,
coors
,
gen
.
grid_size
def
waymo_data_cpu
(
max_points_per_voxel
=
1
,
batch_size
=
1
):
print
(
'cpu with %d max points per voxel'
%
max_points_per_voxel
)
gen
=
VoxelGeneratorV2
([
0.1
,
0.1
,
0.1
],
[
-
80
,
-
80
,
-
2
,
80
,
80
,
6
],
max_points_per_voxel
,
150000
)
data
=
np
.
load
(
Path
(
__file__
).
parent
/
"data"
/
"benchmark-pc.npz"
)
pc
=
data
[
"pc"
]
data
=
gen
.
generate
(
pc
)
times
=
[]
with
torch
.
no_grad
():
for
i
in
range
(
200
):
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
data
=
gen
.
generate
(
pc
)
torch
.
cuda
.
synchronize
()
times
.
append
(
time
.
time
()
-
t
)
print
(
"voxelization time"
,
np
.
mean
(
times
[
100
:]))
voxels
=
data
[
"voxels"
].
reshape
(
-
1
,
3
)
coors
=
data
[
"coordinates"
]
N
=
coors
.
shape
[
0
]
coors
=
np
.
concatenate
([
np
.
full
([
N
,
1
],
0
,
coors
.
dtype
),
coors
],
axis
=
1
)
return
voxels
,
coors
,
gen
.
grid_size
def
main
():
waymo_data_gpu
()
waymo_data_cpu
(
1
)
waymo_data_cpu
(
10
)
waymo_data_cpu
(
40
)
if
__name__
==
"__main__"
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment