Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
77f1cf0b
Commit
77f1cf0b
authored
Sep 24, 2022
by
yan.yan
Browse files
fix windows build problem
parent
19a599e1
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
29 additions
and
20 deletions
+29
-20
spconv/algo.py
spconv/algo.py
+2
-0
spconv/csrc/sparse/all.py
spconv/csrc/sparse/all.py
+3
-2
spconv/csrc/sparse/alloc.py
spconv/csrc/sparse/alloc.py
+1
-1
spconv/csrc/utils/pcc.py
spconv/csrc/utils/pcc.py
+8
-9
test/test_all_algo.py
test/test_all_algo.py
+15
-8
No files found.
spconv/algo.py
View file @
77f1cf0b
...
@@ -164,6 +164,7 @@ class GemmTunerSimple(GemmTunerSimpleBase):
...
@@ -164,6 +164,7 @@ class GemmTunerSimple(GemmTunerSimpleBase):
if
key
in
self
.
_nvrtc_caches
:
if
key
in
self
.
_nvrtc_caches
:
return
self
.
_nvrtc_caches
[
key
]
return
self
.
_nvrtc_caches
[
key
]
mod
,
ker
=
self
.
_compile_nvrtc_module
(
desp
)
mod
,
ker
=
self
.
_compile_nvrtc_module
(
desp
)
print
(
f
"Can't find algo
{
desp
}
in prebuilt. compile with nvrtc..."
)
nvrtc_params
=
_get_nvrtc_params
(
mod
,
ker
,
"gemm_kernel"
)
nvrtc_params
=
_get_nvrtc_params
(
mod
,
ker
,
"gemm_kernel"
)
self
.
_nvrtc_caches
[
key
]
=
nvrtc_params
self
.
_nvrtc_caches
[
key
]
=
nvrtc_params
return
nvrtc_params
return
nvrtc_params
...
@@ -288,6 +289,7 @@ class SimpleGemm:
...
@@ -288,6 +289,7 @@ class SimpleGemm:
if
key
in
self
.
_nvrtc_caches
:
if
key
in
self
.
_nvrtc_caches
:
return
self
.
_nvrtc_caches
[
key
]
return
self
.
_nvrtc_caches
[
key
]
mod
,
ker
=
self
.
_compile_nvrtc_module
(
desp
)
mod
,
ker
=
self
.
_compile_nvrtc_module
(
desp
)
print
(
f
"Can't find algo
{
desp
}
in prebuilt. compile with nvrtc..."
)
nvrtc_params
=
_get_nvrtc_params
(
mod
,
ker
,
"gemm_kernel"
)
nvrtc_params
=
_get_nvrtc_params
(
mod
,
ker
,
"gemm_kernel"
)
self
.
_nvrtc_caches
[
key
]
=
nvrtc_params
self
.
_nvrtc_caches
[
key
]
=
nvrtc_params
return
nvrtc_params
return
nvrtc_params
...
...
spconv/csrc/sparse/all.py
View file @
77f1cf0b
...
@@ -126,6 +126,7 @@ class SpconvOps(pccm.Class):
...
@@ -126,6 +126,7 @@ class SpconvOps(pccm.Class):
defines
.
append
(
f
"#define SPCONV_ALLOC_
{
to_snake_case
(
name
).
upper
()
}
{
pccm
.
literal
(
v
)
}
"
)
defines
.
append
(
f
"#define SPCONV_ALLOC_
{
to_snake_case
(
name
).
upper
()
}
{
pccm
.
literal
(
v
)
}
"
)
define_str
=
"
\n
"
.
join
(
defines
)
define_str
=
"
\n
"
.
join
(
defines
)
self
.
add_global_code
(
define_str
)
self
.
add_global_code
(
define_str
)
self
.
build_meta
.
add_global_cflags
(
"cl"
,
"/DNOMINMAX"
)
# for name in dir(AllocKeys):
# for name in dir(AllocKeys):
# if not name.startswith("__"):
# if not name.startswith("__"):
# v = getattr(AllocKeys, name)
# v = getattr(AllocKeys, name)
...
@@ -1580,10 +1581,10 @@ class SpconvOps(pccm.Class):
...
@@ -1580,10 +1581,10 @@ class SpconvOps(pccm.Class):
}}
}}
if (!subm){{
if (!subm){{
size_t pair_single_size = kv * int64_t(num_act_in);
size_t pair_single_size = kv * int64_t(num_act_in);
auto ten = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
auto ten = tv::from_blob(workspace, {{
int64_t(
pair_single_size + 1
)
}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
res.insert({{
{
pccm
.
literal
(
AllocKeys
.
IndicePairsUniq
)
}
, ten}});
res.insert({{
{
pccm
.
literal
(
AllocKeys
.
IndicePairsUniq
)
}
, ten}});
workspace += ten.nbytes();
workspace += ten.nbytes();
auto ten2 = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
auto ten2 = tv::from_blob(workspace, {{
int64_t(
pair_single_size + 1
)
}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
res.insert({{
{
pccm
.
literal
(
AllocKeys
.
IndicePairsUniqBackup
)
}
, ten2}});
res.insert({{
{
pccm
.
literal
(
AllocKeys
.
IndicePairsUniqBackup
)
}
, ten2}});
workspace += ten2.nbytes();
workspace += ten2.nbytes();
}}
}}
...
...
spconv/csrc/sparse/alloc.py
View file @
77f1cf0b
...
@@ -220,7 +220,7 @@ class ThrustAllocator(pccm.Class):
...
@@ -220,7 +220,7 @@ class ThrustAllocator(pccm.Class):
code
.
arg
(
"ptr"
,
"char *"
)
code
.
arg
(
"ptr"
,
"char *"
)
code
.
arg
(
"num_bytes"
,
"size_t"
)
code
.
arg
(
"num_bytes"
,
"size_t"
)
code
.
raw
(
f
"""
code
.
raw
(
f
"""
return allocator_.free_noexcept(tv::from_blob(ptr, {{num_bytes}}, tv::uint8, 0));
return allocator_.free_noexcept(tv::from_blob(ptr, {{
int64_t(
num_bytes
)
}}, tv::uint8, 0));
"""
)
"""
)
return
code
return
code
...
...
spconv/csrc/utils/pcc.py
View file @
77f1cf0b
...
@@ -64,7 +64,6 @@ class PointCloudCompress(pccm.Class):
...
@@ -64,7 +64,6 @@ class PointCloudCompress(pccm.Class):
auto point_stride = points.stride(0);
auto point_stride = points.stride(0);
int64_t final_size = sizeof(int64_t) * 5 + sizeof(float) * 3;
int64_t final_size = sizeof(int64_t) * 5 + sizeof(float) * 3;
tv::Tensor res;
tv::Tensor res;
tv::ssprint(1);
tv::dispatch<float, double>(points.dtype(), [&](auto IP){{
tv::dispatch<float, double>(points.dtype(), [&](auto IP){{
using TPoint = TV_DECLTYPE(IP);
using TPoint = TV_DECLTYPE(IP);
...
@@ -88,13 +87,13 @@ class PointCloudCompress(pccm.Class):
...
@@ -88,13 +87,13 @@ class PointCloudCompress(pccm.Class):
auto pos_int = op::apply(floorf, pos_unit_voxel).cast<int32_t>();
auto pos_int = op::apply(floorf, pos_unit_voxel).cast<int32_t>();
auto pos_enc = (point / errors - pos_int.cast<float>() * float(256)).cast<uint8_t>();
auto pos_enc = (point / errors - pos_int.cast<float>() * float(256)).cast<uint8_t>();
tv::array<uint8_t, kEncodeDim> enc;
tv::array<uint8_t, kEncodeDim> enc;
tv::if_constexpr<(kEncodeDim > 3)>([&](auto _){{
enc[0] = pos_enc[0];
enc[1] = pos_enc[1];
enc[2] = pos_enc[2];
if (kEncodeDim > 3){{
TInten inten = intensity_data[0];
TInten inten = intensity_data[0];
enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2], uint8_t(inten)}});
enc[3] = uint8_t(inten);
intensity_data += inten_stride;
}}
}}, [&](auto _){{
enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2]}});
}});
auto pos_uint = pos_int + hash_t::direct_hash_offset();
auto pos_uint = pos_int + hash_t::direct_hash_offset();
uint64_t scalar = hash_t::encode(pos_int[0], pos_int[1], pos_int[2]);
uint64_t scalar = hash_t::encode(pos_int[0], pos_int[1], pos_int[2]);
auto iter = hash.find(scalar);
auto iter = hash.find(scalar);
...
@@ -225,7 +224,7 @@ class PointCloudCompress(pccm.Class):
...
@@ -225,7 +224,7 @@ class PointCloudCompress(pccm.Class):
error[2] = error_header[2];
error[2] = error_header[2];
res_ptr += sizeof(float) * 3;
res_ptr += sizeof(float) * 3;
tv::Tensor points;
tv::Tensor points;
tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&](auto I){{
tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&
, error
](auto I){{
constexpr int kTypeInt = TV_DECLTYPE(I)::value;
constexpr int kTypeInt = TV_DECLTYPE(I)::value;
constexpr int kEncodeDim = kTypeInt == static_cast<int>(EncodeType::XYZI_8) ? 4 : 3;
constexpr int kEncodeDim = kTypeInt == static_cast<int>(EncodeType::XYZI_8) ? 4 : 3;
points = tv::empty({{N, kEncodeDim}}, tv::float32);
points = tv::empty({{N, kEncodeDim}}, tv::float32);
...
@@ -241,7 +240,7 @@ class PointCloudCompress(pccm.Class):
...
@@ -241,7 +240,7 @@ class PointCloudCompress(pccm.Class):
auto point_cur_ptr = points_ptr;
auto point_cur_ptr = points_ptr;
for (int j = 0; j < cluster_size; ++j){{
for (int j = 0; j < cluster_size; ++j){{
auto& enc = enc_ptr[j];
auto& enc = enc_ptr[j];
auto
point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
tv::array<float, 3>
point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
point_cur_ptr[0] = point[0];
point_cur_ptr[0] = point[0];
point_cur_ptr[1] = point[1];
point_cur_ptr[1] = point[1];
point_cur_ptr[2] = point[2];
point_cur_ptr[2] = point[2];
...
...
test/test_all_algo.py
View file @
77f1cf0b
...
@@ -640,6 +640,7 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -640,6 +640,7 @@ def _test_native_conv_cuda(subm: bool):
arch
=
torch
.
cuda
.
get_device_capability
()
arch
=
torch
.
cuda
.
get_device_capability
()
stream
=
get_current_stream
()
stream
=
get_current_stream
()
force_nvrtc
=
False
for
shape
,
bs
,
C
,
K
,
k
,
s
,
p
,
d
,
dtype
in
tqdm
.
tqdm
(
params_grid
(
for
shape
,
bs
,
C
,
K
,
k
,
s
,
p
,
d
,
dtype
in
tqdm
.
tqdm
(
params_grid
(
shapes
,
batchsizes
,
in_channels
,
out_channels
,
ksizes
,
shapes
,
batchsizes
,
in_channels
,
out_channels
,
ksizes
,
strides
,
paddings
,
dilations
,
dtypes
)):
strides
,
paddings
,
dilations
,
dtypes
)):
...
@@ -718,7 +719,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -718,7 +719,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
out_indices
,
c_inds
=
out_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
else
:
else
:
GEMM
.
run_with_tuned_result
(
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
1
),
BestAlgoByProfile
(
desp
,
tester
.
arch
,
1
),
...
@@ -735,7 +737,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -735,7 +737,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
out_indices
,
c_inds
=
out_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
inited
=
True
inited
=
True
if
bias
is
not
None
and
tester
.
check_act
:
if
bias
is
not
None
and
tester
.
check_act
:
InferenceOps
.
bias_add_act_inplace
(
output_tv
,
bias
,
tv
.
gemm
.
Activation
.
ReLU
,
0
,
0
)
InferenceOps
.
bias_add_act_inplace
(
output_tv
,
bias
,
tv
.
gemm
.
Activation
.
ReLU
,
0
,
0
)
...
@@ -801,7 +804,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -801,7 +804,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
inp_indices
,
c_inds
=
inp_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
else
:
else
:
GEMM
.
run_with_tuned_result
(
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
1
),
BestAlgoByProfile
(
desp
,
tester
.
arch
,
1
),
...
@@ -818,7 +822,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -818,7 +822,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
inp_indices
,
c_inds
=
inp_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
inited
=
True
inited
=
True
din_my
=
inp_tv
.
cpu
().
numpy
()
din_my
=
inp_tv
.
cpu
().
numpy
()
...
@@ -879,7 +884,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -879,7 +884,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
tv
.
Tensor
(),
c_inds
=
tv
.
Tensor
(),
hint
=
AlgoHint
.
BackwardWeight
.
value
,
hint
=
AlgoHint
.
BackwardWeight
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
else
:
else
:
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
32
),
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
32
),
...
@@ -896,7 +902,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -896,7 +902,8 @@ def _test_native_conv_cuda(subm: bool):
b_inds
=
b_inds
,
b_inds
=
b_inds
,
hint
=
AlgoHint
.
BackwardWeight
.
value
,
hint
=
AlgoHint
.
BackwardWeight
.
value
,
alpha
=
1.0
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
dw_my
=
weight_tv
.
cpu
().
numpy
()
dw_my
=
weight_tv
.
cpu
().
numpy
()
if
dtype
!=
np
.
float16
:
if
dtype
!=
np
.
float16
:
...
@@ -909,8 +916,8 @@ def _test_native_conv_cuda(subm: bool):
...
@@ -909,8 +916,8 @@ def _test_native_conv_cuda(subm: bool):
def
test_all_algo_unit
():
def
test_all_algo_unit
():
# for i in range(5):
# for i in range(5):
_test_impgemm_conv_cuda
(
True
)
#
_test_impgemm_conv_cuda(True)
_test_impgemm_conv_cuda
(
False
)
#
_test_impgemm_conv_cuda(False)
_test_native_conv_cuda
(
True
)
_test_native_conv_cuda
(
True
)
_test_native_conv_cuda
(
False
)
_test_native_conv_cuda
(
False
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment