Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
bf011c76
Commit
bf011c76
authored
Nov 23, 2021
by
yan.yan
Browse files
temp commit
parent
4791f582
Changes
34
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
1397 additions
and
654 deletions
+1397
-654
spconv/pytorch/conv.py
spconv/pytorch/conv.py
+17
-1
spconv/pytorch/core.py
spconv/pytorch/core.py
+8
-1
spconv/pytorch/cppcore.py
spconv/pytorch/cppcore.py
+14
-4
spconv/pytorch/modules.py
spconv/pytorch/modules.py
+1
-0
spconv/pytorch/ops.py
spconv/pytorch/ops.py
+9
-2
spconv/pytorch/utils.py
spconv/pytorch/utils.py
+56
-19
test/benchmark.py
test/benchmark.py
+20
-20
test/test_all_algo.py
test/test_all_algo.py
+663
-0
test/test_conv.py
test/test_conv.py
+274
-574
test/test_implgemm.py
test/test_implgemm.py
+0
-15
test/test_multi_impl.py
test/test_multi_impl.py
+324
-3
test/test_native_kernels.py
test/test_native_kernels.py
+0
-14
test_before_push.sh
test_before_push.sh
+10
-0
version.txt
version.txt
+1
-1
No files found.
spconv/pytorch/conv.py
View file @
bf011c76
...
...
@@ -35,6 +35,20 @@ from spconv.utils import nullcontext
FILTER_HWIO
=
False
def
expand_nd
(
val
:
Union
[
int
,
List
[
int
],
Tuple
[
int
,
...]],
ndim
:
int
)
->
List
[
int
]:
if
isinstance
(
val
,
int
):
val
=
[
val
]
*
ndim
elif
isinstance
(
val
,
list
):
assert
len
(
val
)
==
ndim
elif
isinstance
(
val
,
tuple
):
assert
len
(
val
)
==
ndim
return
[
*
val
]
else
:
raise
NotImplementedError
return
val
def
_calculate_fan_in_and_fan_out_hwio
(
tensor
,
algo
:
ConvAlgo
):
dimensions
=
tensor
.
ndimension
()
if
dimensions
<
2
:
...
...
@@ -110,7 +124,9 @@ class SparseConvolution(SparseModule):
self
.
out_channels
=
out_channels
self
.
kernel_size
=
kernel_size
kv
=
int
(
np
.
prod
(
kernel_size
))
self
.
conv1x1
=
kv
==
1
kv_stride
=
int
(
np
.
prod
(
kernel_size
))
self
.
conv1x1
=
kv
==
1
and
kv_stride
==
1
self
.
stride
=
stride
self
.
padding
=
padding
self
.
dilation
=
dilation
...
...
spconv/pytorch/core.py
View file @
bf011c76
...
...
@@ -104,7 +104,8 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
indice_dict
:
Optional
[
dict
]
=
None
,
benchmark
:
bool
=
False
,
permanent_thrust_allocator
:
bool
=
False
,
enable_timer
:
bool
=
False
):
enable_timer
:
bool
=
False
,
force_algo
:
Optional
[
ConvAlgo
]
=
None
):
"""
Args:
features: [num_points, num_features] feature tensor
...
...
@@ -115,6 +116,8 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
is very large.
benchmark: whether to enable benchmark. if enabled, all sparse operators will be record to
SparseConvTensor.
enable_timer: if exists, all spconv internal ops run time will be record in _timer.
force_algo: force conv/pool layers use this algo, should only used for debug.
"""
ndim
=
indices
.
shape
[
1
]
-
1
assert
features
.
ndim
==
2
...
...
@@ -139,6 +142,7 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
if
permanent_thrust_allocator
:
self
.
thrust_allocator
=
ThrustSortAllocator
(
features
.
device
)
self
.
_timer
=
CUDAKernelTimer
(
enable_timer
)
self
.
force_algo
=
force_algo
def
replace_feature
(
self
,
feature
:
torch
.
Tensor
):
"""we need to replace x.features = F.relu(x.features) with x = x.replace_feature(F.relu(x.features))
...
...
@@ -152,6 +156,8 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
new_spt
.
benchmark_record
=
self
.
benchmark_record
new_spt
.
thrust_allocator
=
self
.
thrust_allocator
new_spt
.
_timer
=
self
.
_timer
new_spt
.
force_algo
=
self
.
force_algo
return
new_spt
@
property
...
...
@@ -217,4 +223,5 @@ class SparseConvTensor(metaclass=SpConvTensorMeta):
tensor
.
benchmark_record
=
self
.
benchmark_record
tensor
.
thrust_allocator
=
self
.
thrust_allocator
tensor
.
_timer
=
self
.
_timer
tensor
.
force_algo
=
self
.
force_algo
return
tensor
spconv/pytorch/cppcore.py
View file @
bf011c76
...
...
@@ -30,7 +30,8 @@ _TORCH_DTYPE_TO_TV = {
def
torch_tensor_to_tv
(
ten
:
torch
.
Tensor
,
dtype
:
Optional
[
int
]
=
None
,
shape
:
Optional
[
List
[
int
]]
=
None
):
shape
:
Optional
[
List
[
int
]]
=
None
,
stride
:
Optional
[
List
[
int
]]
=
None
):
# assert ten.is_contiguous(), "must be contiguous tensor"
ptr
=
ten
.
data_ptr
()
device
=
ten
.
device
...
...
@@ -40,11 +41,20 @@ def torch_tensor_to_tv(ten: torch.Tensor,
tv_device
=
0
else
:
raise
NotImplementedError
if
shape
is
None
:
shape
=
list
(
ten
.
shape
)
if
dtype
is
None
:
dtype
=
_TORCH_DTYPE_TO_TV
[
ten
.
dtype
]
return
tv
.
from_blob
(
ptr
,
shape
,
list
(
ten
.
stride
()),
dtype
,
tv_device
)
if
stride
is
None
:
stride
=
list
(
ten
.
stride
())
if
shape
is
None
:
shape
=
list
(
ten
.
shape
)
else
:
if
not
ten
.
is_contiguous
():
msg
=
"if you provide custom shape for non-contig tensor, stride must not None"
assert
stride
is
not
None
,
msg
else
:
# custom shape, if tensor is contiguous, we use from_blob and calc strides
return
tv
.
from_blob
(
ptr
,
shape
,
dtype
,
tv_device
)
return
tv
.
from_blob_strided
(
ptr
,
shape
,
stride
,
dtype
,
tv_device
)
def
get_current_stream
():
...
...
spconv/pytorch/modules.py
View file @
bf011c76
...
...
@@ -137,6 +137,7 @@ class SparseSequential(SparseModule):
input
=
module
(
input
)
else
:
if
isinstance
(
input
,
spconv
.
SparseConvTensor
):
print
(
input
.
features
.
shape
)
if
input
.
indices
.
shape
[
0
]
!=
0
:
input
=
input
.
replace_feature
(
module
(
input
.
features
))
else
:
...
...
spconv/pytorch/ops.py
View file @
bf011c76
...
...
@@ -1066,7 +1066,7 @@ def indice_conv_backward(features: torch.Tensor,
alpha
=
1.0
,
beta
=
beta
)
if
not
FILTER_HWIO
:
if
is_KC_not_CK
:
a
=
out_bp_tv
b
=
features_tv
a_inds
=
out_indices
...
...
@@ -1376,6 +1376,9 @@ def implicit_gemm_backward(features: torch.Tensor,
mask_width
=-
1
,
beta
=
beta
,
stream
=
stream
)
# for backward weight, beta = 0 because each split
# handle different kernel locations.
# TODO remove D iterator in backward weight kernel
CONV
.
run_with_tuned_result
(
wgrad_tune_res
,
ConvOpType
.
kBackwardWeight
,
...
...
@@ -1389,7 +1392,7 @@ def implicit_gemm_backward(features: torch.Tensor,
reverse_mask
=
False
,
mask_filter
=
masks
[
j
].
item
(),
mask_width
=
mask_width
,
beta
=
beta
,
beta
=
0
,
workspace
=
workspace_tv
,
stream
=
stream
)
...
...
@@ -1403,6 +1406,8 @@ def indice_maxpool(features: torch.Tensor, indice_pairs: torch.Tensor,
# stream = get_current_stream()
# CONV.stream_synchronize(stream)
# t = time.time()
if
not
features
.
is_contiguous
():
features
=
features
.
contiguous
()
out_channel
=
features
.
shape
[
-
1
]
out_features
=
torch
.
zeros
((
num_activate_out
,
out_channel
),
...
...
@@ -1474,6 +1479,8 @@ def indice_maxpool_implicit_gemm(features: torch.Tensor,
stream
=
get_current_stream
()
# CONV.stream_synchronize(stream)
# t = time.time()
if
not
features
.
is_contiguous
():
features
=
features
.
contiguous
()
out_channel
=
features
.
shape
[
-
1
]
out_features
=
torch
.
empty
((
num_activate_out
,
out_channel
),
...
...
spconv/pytorch/utils.py
View file @
bf011c76
...
...
@@ -71,36 +71,72 @@ class PointToVoxel(object):
pc
:
torch
.
Tensor
,
clear_voxels
:
bool
=
True
,
empty_mean
:
bool
=
False
):
"""generate voxels/indices/num_point_per_voxel/pc_voxel_ids from
point cloud.
This function don't return pc_voxel_id for backward compatility.
pc_voxel_id will be added in spconv 2.2.
Args:
pc: [N, 3+] point cloud.
clear_voxels: if True, call zero on voxels
empty_mean: if True, full empty location of voxels with mean.
Returns:
voxels: voxels
indices: quantized coords
num_per_voxel: number of points in a voxel
"""
res
=
self
.
generate_voxel_with_id
(
pc
,
clear_voxels
,
empty_mean
)
return
res
[
0
],
res
[
1
],
res
[
2
]
def
generate_voxel_with_id
(
self
,
pc
:
torch
.
Tensor
,
clear_voxels
:
bool
=
True
,
empty_mean
:
bool
=
False
):
"""generate voxels/indices/num_point_per_voxel/pc_voxel_ids from
point cloud.
Args:
pc: [N, 3+] point cloud.
clear_voxels: if True, call zero on voxels
empty_mean: if True, full empty location of voxels with mean.
Returns:
voxels: voxels
indices: quantized coords
num_per_voxel: number of points in a voxel
pc_voxel_id: voxel id for every point. if not exists, -1.
"""
assert
pc
.
device
.
type
==
self
.
device
.
type
,
"your pc device is wrong"
expected_hash_data_num
=
pc
.
shape
[
0
]
*
2
with
torch
.
no_grad
():
pc_voxel_id
=
torch
.
empty
([
pc
.
shape
[
0
]],
dtype
=
torch
.
int64
,
device
=
self
.
device
)
pc_voxel_id_tv
=
torch_tensor_to_tv
(
pc_voxel_id
)
if
self
.
device
.
type
!=
"cpu"
:
if
self
.
hashdata
.
shape
[
0
]
<
expected_hash_data_num
:
self
.
hashdata
=
torch
.
empty
([
expected_hash_data_num
,
2
],
dtype
=
torch
.
int64
,
device
=
self
.
device
)
hashdata
=
torch
.
empty
([
expected_hash_data_num
,
2
],
dtype
=
torch
.
int64
,
device
=
pc
.
device
)
point_indice_data
=
torch
.
empty
([
pc
.
shape
[
0
]],
dtype
=
torch
.
int64
,
device
=
pc
.
device
)
if
self
.
point_indice_data
.
shape
[
0
]
<
pc
.
shape
[
0
]:
self
.
point_indice_data
=
torch
.
empty
([
pc
.
shape
[
0
]],
dtype
=
torch
.
int64
,
device
=
self
.
device
)
pc_tv
=
torch_tensor_to_tv
(
pc
)
stream
=
get_current_stream
()
voxels_tv
=
torch_tensor_to_tv
(
self
.
voxels
)
indices_tv
=
torch_tensor_to_tv
(
self
.
indices
)
num_per_voxel_tv
=
torch_tensor_to_tv
(
self
.
num_per_voxel
)
hashdata_tv
=
torch_tensor_to_tv
(
self
.
hashdata
,
hashdata
,
dtype
=
tv
.
custom128
,
shape
=
[
self
.
hashdata
.
shape
[
0
]])
point_indice_data_tv
=
torch_tensor_to_tv
(
self
.
point_indice_data
)
res
=
SpconvOps
.
point2voxel_cuda
(
pc_tv
,
voxels_tv
,
indices_tv
,
num_per_voxel_tv
,
hashdata_tv
,
point_indice_data_tv
,
self
.
vsize
,
self
.
grid_size
,
self
.
grid_stride
,
self
.
coors_range
,
empty_mean
,
clear_voxels
,
stream
)
shape
=
[
hashdata
.
shape
[
0
]])
point_indice_data_tv
=
torch_tensor_to_tv
(
point_indice_data
)
with
torch
.
cuda
.
device
(
pc
.
device
):
res
=
SpconvOps
.
point2voxel_cuda
(
pc_tv
,
voxels_tv
,
indices_tv
,
num_per_voxel_tv
,
hashdata_tv
,
point_indice_data_tv
,
pc_voxel_id_tv
,
self
.
vsize
,
self
.
grid_size
,
self
.
grid_stride
,
self
.
coors_range
,
empty_mean
,
clear_voxels
,
stream
)
num_voxels
=
res
[
0
].
shape
[
0
]
else
:
pc_tv
=
torch_tensor_to_tv
(
pc
)
...
...
@@ -111,6 +147,7 @@ class PointToVoxel(object):
hashdata_tv
=
torch_tensor_to_tv
(
self
.
hashdata
,
dtype
=
tv
.
int32
)
res
=
SpconvOps
.
point2voxel_cpu
(
pc_tv
,
voxels_tv
,
indices_tv
,
num_per_voxel_tv
,
hashdata_tv
,
pc_voxel_id_tv
,
self
.
vsize
,
self
.
grid_size
,
self
.
grid_stride
,
self
.
coors_range
,
empty_mean
,
...
...
@@ -118,4 +155,4 @@ class PointToVoxel(object):
num_voxels
=
res
[
0
].
shape
[
0
]
return
(
self
.
voxels
[:
num_voxels
],
self
.
indices
[:
num_voxels
],
self
.
num_per_voxel
[:
num_voxels
])
self
.
num_per_voxel
[:
num_voxels
]
,
pc_voxel_id
)
test/benchmark.py
View file @
bf011c76
...
...
@@ -24,7 +24,7 @@ from spconv.core import ConvAlgo
import
spconv.pytorch
as
spconv
from
spconv.utils
import
Point2VoxelCPU3d
# torch.backends.cudnn.enabled = False
def
waymo_data
(
batch_size
=
1
):
gen
=
Point2VoxelCPU3d
([
0.1
,
0.1
,
0.1
],
[
-
80
,
-
80
,
-
2
,
80
,
80
,
6
],
3
,
150000
,
1
)
...
...
@@ -289,7 +289,7 @@ def main():
voxels_th
=
torch
.
from_numpy
(
voxels
).
to
(
device
).
to
(
dtype
)
coors_th
=
torch
.
from_numpy
(
coors
).
to
(
device
).
int
()
voxels_th
.
requires_grad
=
True
algo
=
spconv
.
ConvAlgo
.
Native
algo
=
spconv
.
ConvAlgo
.
MaskImplicitGemm
# 3080 Laptop
# MaskImpGemm: 11.2ms
# MaskSplitImpGemm: 12.2ms
...
...
@@ -324,26 +324,26 @@ def main():
print
(
out
.
spatial_shape
,
out
.
features
.
mean
(),
out
.
features
.
max
(),
out
.
features
.
min
())
#
times = []
#
with torch.no_grad():
#
for i in range(20):
#
print("------------")
#
torch.cuda.synchronize()
#
t = time.time()
#
out_nograd = net(voxels_th, coors_th, 1, False)
#
timer = out_nograd._timer
#
# res = timer.collect_by_name("forward", timer.get_all_pair_time())
#
# res2 = timer.collect_by_name("forward0", timer.get_all_pair_time())
times
=
[]
with
torch
.
no_grad
():
for
i
in
range
(
20
):
print
(
"------------"
)
torch
.
cuda
.
synchronize
()
t
=
time
.
time
()
out_nograd
=
net
(
voxels_th
,
coors_th
,
1
,
False
)
timer
=
out_nograd
.
_timer
# res = timer.collect_by_name("forward", timer.get_all_pair_time())
# res2 = timer.collect_by_name("forward0", timer.get_all_pair_time())
#
# print(sum(res.values()) + sum(res2.values()))
#
# print(timer.get_all_pair_time())
# print(sum(res.values()) + sum(res2.values()))
# print(timer.get_all_pair_time())
#
# print(sum(timer.get_all_pair_time().values()))
#
torch.cuda.synchronize()
#
# sort_bench()
#
times.append(time.time() - t)
#
print("spconv time", np.mean(times[10:]))
#
times = []
# print(sum(timer.get_all_pair_time().values()))
torch
.
cuda
.
synchronize
()
# sort_bench()
times
.
append
(
time
.
time
()
-
t
)
print
(
"spconv time"
,
np
.
mean
(
times
[
10
:]))
times
=
[]
# for i in range(10):
# out = net(voxels_th, coors_th, 1)
...
...
test/test_all_algo.py
0 → 100644
View file @
bf011c76
This diff is collapsed.
Click to expand it.
test/test_conv.py
View file @
bf011c76
This diff is collapsed.
Click to expand it.
test/test_implgemm.py
deleted
100644 → 0
View file @
4791f582
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
spconv.core_cc.csrc.sparse.all
import
SpconvOps
test/test_multi_impl.py
View file @
bf011c76
...
...
@@ -12,9 +12,330 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Compare results between different algo:
CPU: gather-mm-scatter
"""Compare results between different algo
s
:
CPU:
simple
gather-mm-scatter
Native: Fused gather-mm-scatter
ImplicitGemm
ImplicitGemm
: implicit gemm
"""
import
time
from
pathlib
import
Path
import
numpy
as
np
import
torch
from
torch
import
nn
from
cumm
import
tensorview
as
tv
from
spconv.core
import
ConvAlgo
import
spconv.pytorch
as
spconv
import
pickle
from
spconv.test_utils
import
generate_sparse_data
,
params_grid
class
Net
(
nn
.
Module
):
def
__init__
(
self
,
shape
,
algo
):
super
().
__init__
()
pool_algo
=
algo
# pool_algo = ConvAlgo.Native
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
32
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
32
,
32
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# # nn.BatchNorm1d(32),
# # nn.ReLU(),
spconv
.
SubMConv3d
(
32
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# nn.BatchNorm1d(32),
# # nn.ReLU(),
spconv
.
SparseConv3d
(
64
,
64
,
3
,
2
,
1
,
bias
=
False
,
indice_key
=
"m0"
,
algo
=
algo
),
# # spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
# nn.BatchNorm1d(64),
# nn.ReLU(),
spconv
.
SparseConv3d
(
96
,
96
,
2
,
2
,
bias
=
False
,
indice_key
=
"m1"
,
algo
=
algo
),
# spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
spconv
.
SubMConv3d
(
96
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
128
,
128
,
3
,
bias
=
False
,
indice_key
=
"c2"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
# spconv.SparseConv3d(128, 128, 2, 2, bias=False, indice_key="m2"),
spconv
.
SparseMaxPool3d
(
2
,
2
,
algo
=
pool_algo
),
spconv
.
SubMConv3d
(
128
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
160
,
160
,
3
,
bias
=
False
,
indice_key
=
"c3"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
# spconv.SparseConv3d(160, 160, 2, 2, bias=False, indice_key="m3"),
spconv
.
SparseMaxPool3d
(
2
,
2
,
algo
=
pool_algo
,
indice_key
=
"m3"
),
spconv
.
SubMConv3d
(
160
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
192
,
192
,
3
,
bias
=
False
,
indice_key
=
"c4"
,
algo
=
algo
),
# nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseMaxPool3d
(
2
,
2
,
indice_key
=
"m4"
,
algo
=
pool_algo
),
# spconv.SparseConv3d(192, 192, 2, 2, bias=False, indice_key="m4"),
spconv
.
SubMConv3d
(
192
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
224
,
224
,
3
,
bias
=
False
,
indice_key
=
"c5"
,
algo
=
algo
),
# nn.BatchNorm1d(256),
# nn.ReLU(),
spconv
.
SparseInverseConv3d
(
224
,
128
,
2
,
indice_key
=
"m4"
,
bias
=
False
,
algo
=
algo
),
# # nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseInverseConv3d
(
128
,
64
,
2
,
indice_key
=
"m3"
,
bias
=
False
,
algo
=
algo
),
)
max_batch_size
=
1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
# self.grid = None
self
.
shape
=
shape
def
forward
(
self
,
features
,
coors
,
batch_size
):
x
=
spconv
.
SparseConvTensor
(
features
,
coors
,
self
.
shape
,
batch_size
)
return
self
.
net
(
x
)
class
NetLight
(
nn
.
Module
):
def
__init__
(
self
,
shape
,
algo
):
super
().
__init__
()
pool_algo
=
algo
# pool_algo = ConvAlgo.Native
self
.
net
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
3
,
32
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
32
,
32
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# # nn.BatchNorm1d(32),
# # nn.ReLU(),
spconv
.
SubMConv3d
(
32
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
64
,
64
,
3
,
bias
=
False
,
indice_key
=
"c0"
,
algo
=
algo
),
# nn.BatchNorm1d(32),
# # nn.ReLU(),
spconv
.
SparseConv3d
(
64
,
64
,
3
,
2
,
1
,
bias
=
False
,
indice_key
=
"m0"
,
algo
=
algo
),
# # spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
spconv
.
SubMConv3d
(
64
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
spconv
.
SubMConv3d
(
96
,
96
,
3
,
bias
=
False
,
indice_key
=
"c1"
,
algo
=
algo
),
# nn.BatchNorm1d(64),
# nn.ReLU(),
spconv
.
SparseConv3d
(
96
,
96
,
2
,
2
,
bias
=
False
,
indice_key
=
"m1"
,
algo
=
algo
),
# spconv.SparseMaxPool3d(2, 2, algo=pool_algo),
spconv
.
SparseInverseConv3d
(
96
,
64
,
2
,
indice_key
=
"m1"
,
bias
=
False
,
algo
=
algo
),
# # nn.BatchNorm1d(128),
# nn.ReLU(),
spconv
.
SparseInverseConv3d
(
64
,
32
,
3
,
indice_key
=
"m0"
,
bias
=
False
,
algo
=
algo
),
)
max_batch_size
=
1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
# self.grid = None
self
.
shape
=
shape
def
forward
(
self
,
features
,
coors
,
batch_size
):
x
=
spconv
.
SparseConvTensor
(
features
,
coors
,
self
.
shape
,
batch_size
)
return
self
.
net
(
x
)
def
_test_multi_impl
(
dtype
:
torch
.
dtype
):
# TODO remove or release this when tf32 op is ready
torch
.
backends
.
cuda
.
matmul
.
allow_tf32
=
False
torch
.
backends
.
cudnn
.
allow_tf32
=
False
np
.
random
.
seed
(
50051
)
if
dtype
!=
torch
.
float16
:
with
open
(
Path
(
__file__
).
parent
/
"data"
/
"test_spconv.pkl"
,
"rb"
)
as
f
:
(
voxels
,
coors
,
spatial_shape
)
=
pickle
.
load
(
f
)
else
:
# CPU fp16 is very slow, so we use a small data here.
spatial_shape
=
[
19
,
18
,
17
]
sparse_dict
=
generate_sparse_data
(
spatial_shape
,
[
1500
]
*
1
,
3
)
voxels
=
np
.
ascontiguousarray
(
sparse_dict
[
"features"
]).
astype
(
np
.
float32
)
coors
=
np
.
ascontiguousarray
(
sparse_dict
[
"indices"
][:,
[
3
,
0
,
1
,
2
]]).
astype
(
np
.
int32
)
device
=
torch
.
device
(
"cuda:0"
)
device_cpu
=
torch
.
device
(
"cpu:0"
)
voxels_th
=
torch
.
from_numpy
(
voxels
).
to
(
device_cpu
).
to
(
dtype
)
coors_th
=
torch
.
from_numpy
(
coors
).
to
(
device_cpu
).
int
()
voxels_th_cuda
=
torch
.
from_numpy
(
voxels
).
to
(
device
).
to
(
dtype
)
coors_th_cuda
=
torch
.
from_numpy
(
coors
).
to
(
device
).
int
()
net_cls
=
Net
if
dtype
==
torch
.
float16
:
# CPU fp16 is very slow, so we use a small network here.
net_cls
=
NetLight
# cpu
torch
.
manual_seed
(
50051
)
net_native_cpu
=
net_cls
(
spatial_shape
,
ConvAlgo
.
Native
).
to
(
device_cpu
).
to
(
dtype
)
# gpu_native
torch
.
manual_seed
(
50051
)
net_native_gpu
=
net_cls
(
spatial_shape
,
ConvAlgo
.
Native
).
to
(
device
).
to
(
dtype
)
torch
.
manual_seed
(
50051
)
net_imp_gpu
=
net_cls
(
spatial_shape
,
ConvAlgo
.
MaskImplicitGemm
).
to
(
device
).
to
(
dtype
)
torch
.
manual_seed
(
50051
)
net_simp_gpu
=
net_cls
(
spatial_shape
,
ConvAlgo
.
MaskSplitImplicitGemm
).
to
(
device
).
to
(
dtype
)
spconv
.
assign_name_for_sparse_modules
(
net_native_cpu
)
spconv
.
assign_name_for_sparse_modules
(
net_native_gpu
)
spconv
.
assign_name_for_sparse_modules
(
net_imp_gpu
)
spconv
.
assign_name_for_sparse_modules
(
net_simp_gpu
)
with
torch
.
no_grad
():
out
:
torch
.
Tensor
=
net_native_cpu
(
voxels_th
,
coors_th
,
1
).
dense
()
dout
=
np
.
random
.
uniform
(
-
0.2
,
0.2
,
out
.
shape
).
astype
(
np
.
float32
)
dout_t
=
torch
.
from_numpy
(
dout
).
to
(
device_cpu
).
to
(
dtype
)
dout_t_cu
=
torch
.
from_numpy
(
dout
).
to
(
device
).
to
(
dtype
)
out_cpu
=
net_native_cpu
(
voxels_th
,
coors_th
,
1
).
dense
()
out_cpu
.
backward
(
dout_t
)
out
=
net_native_gpu
(
voxels_th_cuda
,
coors_th_cuda
,
1
).
dense
()
out
.
backward
(
dout_t_cu
)
out_imp
=
net_imp_gpu
(
voxels_th_cuda
,
coors_th_cuda
,
1
).
dense
()
out_imp
.
backward
(
dout_t_cu
)
out_simp
=
net_simp_gpu
(
voxels_th_cuda
,
coors_th_cuda
,
1
).
dense
()
out_simp
.
backward
(
dout_t_cu
)
with
torch
.
no_grad
():
dense_cpu
=
out_cpu
.
cuda
()
dense_native
=
out
dense_imp
=
out_imp
dense_simp
=
out_simp
error_native
=
torch
.
linalg
.
norm
(
dense_cpu
-
dense_native
).
cpu
().
item
()
error_imp
=
torch
.
linalg
.
norm
(
dense_cpu
-
dense_imp
).
cpu
().
item
()
error_simp
=
torch
.
linalg
.
norm
(
dense_cpu
-
dense_simp
).
cpu
().
item
()
print
(
"error_native"
,
error_native
)
print
(
"error_imp"
,
error_imp
)
print
(
"error_simp"
,
error_simp
)
if
dtype
==
torch
.
float32
:
assert
error_native
<
0.01
assert
error_imp
<
0.01
assert
error_simp
<
0.01
else
:
assert
error_native
<
10
assert
error_imp
<
10
assert
error_simp
<
10
cpu_params
=
dict
(
net_native_cpu
.
named_parameters
())
native_params
=
dict
(
net_native_gpu
.
named_parameters
())
imp_params
=
dict
(
net_imp_gpu
.
named_parameters
())
simp_params
=
dict
(
net_simp_gpu
.
named_parameters
())
for
k
,
cpu_w
in
cpu_params
.
items
():
native_w
=
native_params
[
k
]
imp_w
=
imp_params
[
k
]
simp_w
=
simp_params
[
k
]
cpu_w_grad
=
cpu_w
.
grad
.
detach
().
cuda
()
native_w_grad
=
native_w
.
grad
.
detach
()
imp_w_grad
=
imp_w
.
grad
.
detach
()
simp_w_grad
=
simp_w
.
grad
.
detach
()
error_native
=
torch
.
linalg
.
norm
(
native_w_grad
-
cpu_w_grad
).
cpu
().
item
()
error_imp
=
torch
.
linalg
.
norm
(
native_w_grad
-
imp_w_grad
).
cpu
().
item
()
error_simp
=
torch
.
linalg
.
norm
(
native_w_grad
-
simp_w_grad
).
cpu
().
item
()
print
(
k
,
error_native
,
error_imp
,
error_simp
)
assert
error_imp
<
1
assert
error_simp
<
1
def
test_multi_impl
():
_test_multi_impl
(
torch
.
float32
)
_test_multi_impl
(
torch
.
float16
)
if
__name__
==
"__main__"
:
test_multi_impl
()
test/test_native_kernels.py
deleted
100644 → 0
View file @
4791f582
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
test_before_push.sh
0 → 100644
View file @
bf011c76
# developers must run this file before push or pull request.
# this script contains three parts:
# 1. unit tests for all gemm/conv kernels
# 2. comparison test: compare network fwd/bwd results between CPU, Native, ImplicitGemm
# 3. f32/f16 train/eval test based on mnist and some small datasets
echo
"-------------UNIT TEST START--------------"
pytest ./test
echo
"-------------UNIT TEST END--------------"
python ./example/mnist_sparse.py
--fp16
\ No newline at end of file
version.txt
View file @
bf011c76
2.
1.9
2.
2.0
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment