Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
77f1cf0b
Commit
77f1cf0b
authored
Sep 24, 2022
by
yan.yan
Browse files
fix windows build problem
parent
19a599e1
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
29 additions
and
20 deletions
+29
-20
spconv/algo.py
spconv/algo.py
+2
-0
spconv/csrc/sparse/all.py
spconv/csrc/sparse/all.py
+3
-2
spconv/csrc/sparse/alloc.py
spconv/csrc/sparse/alloc.py
+1
-1
spconv/csrc/utils/pcc.py
spconv/csrc/utils/pcc.py
+8
-9
test/test_all_algo.py
test/test_all_algo.py
+15
-8
No files found.
spconv/algo.py
View file @
77f1cf0b
...
...
@@ -164,6 +164,7 @@ class GemmTunerSimple(GemmTunerSimpleBase):
if
key
in
self
.
_nvrtc_caches
:
return
self
.
_nvrtc_caches
[
key
]
mod
,
ker
=
self
.
_compile_nvrtc_module
(
desp
)
print
(
f
"Can't find algo
{
desp
}
in prebuilt. compile with nvrtc..."
)
nvrtc_params
=
_get_nvrtc_params
(
mod
,
ker
,
"gemm_kernel"
)
self
.
_nvrtc_caches
[
key
]
=
nvrtc_params
return
nvrtc_params
...
...
@@ -288,6 +289,7 @@ class SimpleGemm:
if
key
in
self
.
_nvrtc_caches
:
return
self
.
_nvrtc_caches
[
key
]
mod
,
ker
=
self
.
_compile_nvrtc_module
(
desp
)
print
(
f
"Can't find algo
{
desp
}
in prebuilt. compile with nvrtc..."
)
nvrtc_params
=
_get_nvrtc_params
(
mod
,
ker
,
"gemm_kernel"
)
self
.
_nvrtc_caches
[
key
]
=
nvrtc_params
return
nvrtc_params
...
...
spconv/csrc/sparse/all.py
View file @
77f1cf0b
...
...
@@ -126,6 +126,7 @@ class SpconvOps(pccm.Class):
defines
.
append
(
f
"#define SPCONV_ALLOC_
{
to_snake_case
(
name
).
upper
()
}
{
pccm
.
literal
(
v
)
}
"
)
define_str
=
"
\n
"
.
join
(
defines
)
self
.
add_global_code
(
define_str
)
self
.
build_meta
.
add_global_cflags
(
"cl"
,
"/DNOMINMAX"
)
# for name in dir(AllocKeys):
# if not name.startswith("__"):
# v = getattr(AllocKeys, name)
...
...
@@ -1580,10 +1581,10 @@ class SpconvOps(pccm.Class):
}}
if (!subm){{
size_t pair_single_size = kv * int64_t(num_act_in);
auto ten = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
auto ten = tv::from_blob(workspace, {{
int64_t(
pair_single_size + 1
)
}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
res.insert({{
{
pccm
.
literal
(
AllocKeys
.
IndicePairsUniq
)
}
, ten}});
workspace += ten.nbytes();
auto ten2 = tv::from_blob(workspace, {{pair_single_size + 1}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
auto ten2 = tv::from_blob(workspace, {{
int64_t(
pair_single_size + 1
)
}}, use_int64_hash_k ? tv::int64 : tv::int32, 0);
res.insert({{
{
pccm
.
literal
(
AllocKeys
.
IndicePairsUniqBackup
)
}
, ten2}});
workspace += ten2.nbytes();
}}
...
...
spconv/csrc/sparse/alloc.py
View file @
77f1cf0b
...
...
@@ -220,7 +220,7 @@ class ThrustAllocator(pccm.Class):
code
.
arg
(
"ptr"
,
"char *"
)
code
.
arg
(
"num_bytes"
,
"size_t"
)
code
.
raw
(
f
"""
return allocator_.free_noexcept(tv::from_blob(ptr, {{num_bytes}}, tv::uint8, 0));
return allocator_.free_noexcept(tv::from_blob(ptr, {{
int64_t(
num_bytes
)
}}, tv::uint8, 0));
"""
)
return
code
...
...
spconv/csrc/utils/pcc.py
View file @
77f1cf0b
...
...
@@ -64,7 +64,6 @@ class PointCloudCompress(pccm.Class):
auto point_stride = points.stride(0);
int64_t final_size = sizeof(int64_t) * 5 + sizeof(float) * 3;
tv::Tensor res;
tv::ssprint(1);
tv::dispatch<float, double>(points.dtype(), [&](auto IP){{
using TPoint = TV_DECLTYPE(IP);
...
...
@@ -88,13 +87,13 @@ class PointCloudCompress(pccm.Class):
auto pos_int = op::apply(floorf, pos_unit_voxel).cast<int32_t>();
auto pos_enc = (point / errors - pos_int.cast<float>() * float(256)).cast<uint8_t>();
tv::array<uint8_t, kEncodeDim> enc;
tv::if_constexpr<(kEncodeDim > 3)>([&](auto _){{
enc[0] = pos_enc[0];
enc[1] = pos_enc[1];
enc[2] = pos_enc[2];
if (kEncodeDim > 3){{
TInten inten = intensity_data[0];
enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2], uint8_t(inten)}});
intensity_data += inten_stride;
}}, [&](auto _){{
enc = _(tv::array<uint8_t, kEncodeDim>{{pos_enc[0], pos_enc[1], pos_enc[2]}});
}});
enc[3] = uint8_t(inten);
}}
auto pos_uint = pos_int + hash_t::direct_hash_offset();
uint64_t scalar = hash_t::encode(pos_int[0], pos_int[1], pos_int[2]);
auto iter = hash.find(scalar);
...
...
@@ -225,7 +224,7 @@ class PointCloudCompress(pccm.Class):
error[2] = error_header[2];
res_ptr += sizeof(float) * 3;
tv::Tensor points;
tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&](auto I){{
tv::dispatch_int<static_cast<int>(EncodeType::XYZI_8), static_cast<int>(EncodeType::XYZ_8)>(static_cast<int>(type), [&
, error
](auto I){{
constexpr int kTypeInt = TV_DECLTYPE(I)::value;
constexpr int kEncodeDim = kTypeInt == static_cast<int>(EncodeType::XYZI_8) ? 4 : 3;
points = tv::empty({{N, kEncodeDim}}, tv::float32);
...
...
@@ -241,7 +240,7 @@ class PointCloudCompress(pccm.Class):
auto point_cur_ptr = points_ptr;
for (int j = 0; j < cluster_size; ++j){{
auto& enc = enc_ptr[j];
auto
point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
tv::array<float, 3>
point = op::slice<0, 3>(enc).template cast<float>() * error + offset;
point_cur_ptr[0] = point[0];
point_cur_ptr[1] = point[1];
point_cur_ptr[2] = point[2];
...
...
test/test_all_algo.py
View file @
77f1cf0b
...
...
@@ -640,6 +640,7 @@ def _test_native_conv_cuda(subm: bool):
arch
=
torch
.
cuda
.
get_device_capability
()
stream
=
get_current_stream
()
force_nvrtc
=
False
for
shape
,
bs
,
C
,
K
,
k
,
s
,
p
,
d
,
dtype
in
tqdm
.
tqdm
(
params_grid
(
shapes
,
batchsizes
,
in_channels
,
out_channels
,
ksizes
,
strides
,
paddings
,
dilations
,
dtypes
)):
...
...
@@ -718,7 +719,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
out_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
else
:
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
1
),
...
...
@@ -735,7 +737,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
out_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
inited
=
True
if
bias
is
not
None
and
tester
.
check_act
:
InferenceOps
.
bias_add_act_inplace
(
output_tv
,
bias
,
tv
.
gemm
.
Activation
.
ReLU
,
0
,
0
)
...
...
@@ -801,7 +804,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
inp_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
else
:
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
1
),
...
...
@@ -818,7 +822,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
inp_indices
,
hint
=
AlgoHint
.
Fowrard
.
value
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
inited
=
True
din_my
=
inp_tv
.
cpu
().
numpy
()
...
...
@@ -879,7 +884,8 @@ def _test_native_conv_cuda(subm: bool):
c_inds
=
tv
.
Tensor
(),
hint
=
AlgoHint
.
BackwardWeight
.
value
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
else
:
GEMM
.
run_with_tuned_result
(
BestAlgoByProfile
(
desp
,
tester
.
arch
,
32
),
...
...
@@ -896,7 +902,8 @@ def _test_native_conv_cuda(subm: bool):
b_inds
=
b_inds
,
hint
=
AlgoHint
.
BackwardWeight
.
value
,
alpha
=
1.0
,
beta
=
beta
)
beta
=
beta
,
force_nvrtc
=
force_nvrtc
)
dw_my
=
weight_tv
.
cpu
().
numpy
()
if
dtype
!=
np
.
float16
:
...
...
@@ -909,8 +916,8 @@ def _test_native_conv_cuda(subm: bool):
def
test_all_algo_unit
():
# for i in range(5):
_test_impgemm_conv_cuda
(
True
)
_test_impgemm_conv_cuda
(
False
)
#
_test_impgemm_conv_cuda(True)
#
_test_impgemm_conv_cuda(False)
_test_native_conv_cuda
(
True
)
_test_native_conv_cuda
(
False
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment