Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
77a7981a
Commit
77a7981a
authored
Sep 25, 2022
by
yan.yan
Browse files
fix CI problem
parent
d4de767e
Changes
7
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
123 additions
and
18 deletions
+123
-18
CHANGELOG.md
CHANGELOG.md
+4
-0
pyproject.toml
pyproject.toml
+1
-1
setup.py
setup.py
+8
-6
spconv/csrc/sparse/all.py
spconv/csrc/sparse/all.py
+7
-1
spconv/csrc/sparse/convops.py
spconv/csrc/sparse/convops.py
+46
-9
tools/build-wheels-dev.sh
tools/build-wheels-dev.sh
+56
-0
version.txt
version.txt
+1
-1
No files found.
CHANGELOG.md
View file @
77a7981a
# Changelog
# Changelog
## [2.2.2] - 2022-9-25
### Fixed
-
Fix CI problem: main function too long and cause OOM in CI vm.
## [2.2.1] - 2022-9-25
## [2.2.1] - 2022-9-25
### Fixed
### Fixed
-
Fix build problem
-
Fix build problem
...
...
pyproject.toml
View file @
77a7981a
[build-system]
[build-system]
requires
=
[
"setuptools>=41.0"
,
"wheel"
,
"pccm>=0.4.0"
,
"cumm>=0.3.
3
"
]
requires
=
[
"setuptools>=41.0"
,
"wheel"
,
"pccm>=0.4.0"
,
"cumm>=0.3.
4
"
]
build-backend
=
"setuptools.build_meta"
build-backend
=
"setuptools.build_meta"
setup.py
View file @
77a7981a
...
@@ -25,6 +25,7 @@ NAME = 'spconv'
...
@@ -25,6 +25,7 @@ NAME = 'spconv'
RELEASE_NAME
=
NAME
RELEASE_NAME
=
NAME
deps
=
[
"cumm"
]
deps
=
[
"cumm"
]
cuda_ver
=
os
.
environ
.
get
(
"CUMM_CUDA_VERSION"
,
""
)
cuda_ver
=
os
.
environ
.
get
(
"CUMM_CUDA_VERSION"
,
""
)
# is_ci_build = cuda_ver != ""
# is_ci_build = cuda_ver != ""
# if not cuda_ver:
# if not cuda_ver:
# nvcc_version = subprocess.check_output(["nvcc", "--version"
# nvcc_version = subprocess.check_output(["nvcc", "--version"
...
@@ -35,12 +36,12 @@ cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
...
@@ -35,12 +36,12 @@ cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
# cuda_ver = version_str
# cuda_ver = version_str
if
cuda_ver
:
if
cuda_ver
:
cuda_ver
=
cuda_ver
.
replace
(
"."
,
""
)
# 10.2 to 102
cuda_ver
_str
=
cuda_ver
.
replace
(
"."
,
""
)
# 10.2 to 102
RELEASE_NAME
+=
"-cu{}"
.
format
(
cuda_ver
)
RELEASE_NAME
+=
"-cu{}"
.
format
(
cuda_ver
_str
)
deps
=
[
"cumm-cu{}>=0.3.
2
"
.
format
(
cuda_ver
)]
deps
=
[
"cumm-cu{}>=0.3.
4
"
.
format
(
cuda_ver
_str
)]
else
:
else
:
deps
=
[
"cumm>=0.3.
2
"
]
deps
=
[
"cumm>=0.3.
4
"
]
...
@@ -176,8 +177,9 @@ if disable_jit is not None and disable_jit == "1":
...
@@ -176,8 +177,9 @@ if disable_jit is not None and disable_jit == "1":
cu
.
namespace
=
"cumm.gemm.main"
cu
.
namespace
=
"cumm.gemm.main"
std
=
"c++17"
std
=
"c++17"
if
cuda_ver
:
if
cuda_ver
:
cuda_ver_number
=
int
(
cuda_ver
)
cuda_ver_vec
=
list
(
map
(
int
,
cuda_ver
.
split
(
"."
)))
if
cuda_ver_number
<
110
:
cuda_ver_tuple
=
(
cuda_ver_vec
[
0
],
cuda_ver_vec
[
1
])
if
cuda_ver_tuple
[
0
]
<
11
:
std
=
"c++14"
std
=
"c++14"
else
:
else
:
std
=
"c++17"
std
=
"c++17"
...
...
spconv/csrc/sparse/all.py
View file @
77a7981a
...
@@ -29,6 +29,7 @@ from .gather import GatherCPU
...
@@ -29,6 +29,7 @@ from .gather import GatherCPU
from
.alloc
import
ExternalAllocator
,
ThrustAllocator
from
.alloc
import
ExternalAllocator
,
ThrustAllocator
from
spconv.constants
import
SPCONV_DIRECT_TABLE_HASH_SIZE_SCALE
,
AllocKeys
from
spconv.constants
import
SPCONV_DIRECT_TABLE_HASH_SIZE_SCALE
,
AllocKeys
import
re
import
re
import
os
class
CustomThrustLib
(
pccm
.
Class
):
class
CustomThrustLib
(
pccm
.
Class
):
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -131,7 +132,12 @@ class SpconvOps(pccm.Class):
...
@@ -131,7 +132,12 @@ class SpconvOps(pccm.Class):
define_str
=
"
\n
"
.
join
(
defines
)
define_str
=
"
\n
"
.
join
(
defines
)
self
.
add_global_code
(
define_str
)
self
.
add_global_code
(
define_str
)
self
.
build_meta
.
add_global_cflags
(
"cl"
,
"/DNOMINMAX"
)
self
.
build_meta
.
add_global_cflags
(
"cl"
,
"/DNOMINMAX"
)
# self.build_meta.add_global_cflags("nvcc", "-w")
cuda_ver
=
os
.
environ
.
get
(
"CUMM_CUDA_VERSION"
,
""
)
if
cuda_ver
:
cuda_ver_vec
=
list
(
map
(
int
,
cuda_ver
.
split
(
"."
)))
cuda_ver_tuple
=
(
cuda_ver_vec
[
0
],
cuda_ver_vec
[
1
])
if
cuda_ver_tuple
[
0
]
<
11
:
self
.
build_meta
.
add_global_cflags
(
"nvcc"
,
"-w"
)
# for name in dir(AllocKeys):
# for name in dir(AllocKeys):
# if not name.startswith("__"):
# if not name.startswith("__"):
...
...
spconv/csrc/sparse/convops.py
View file @
77a7981a
...
@@ -591,6 +591,7 @@ class GemmTunerSimple(pccm.ParameterizedClass):
...
@@ -591,6 +591,7 @@ class GemmTunerSimple(pccm.ParameterizedClass):
finally_algos.push_back(desp);
finally_algos.push_back(desp);
}}
}}
}}
}}
std::sort(finally_algos.begin(), finally_algos.end(), [](auto a, auto b){{return a.min_arch > b.min_arch;}});
return finally_algos;
return finally_algos;
"""
)
"""
)
return
code
.
ret
(
"std::vector<tv::gemm::GemmAlgoDesp>"
,
return
code
.
ret
(
"std::vector<tv::gemm::GemmAlgoDesp>"
,
...
@@ -702,9 +703,9 @@ class GemmTunerSimple(pccm.ParameterizedClass):
...
@@ -702,9 +703,9 @@ class GemmTunerSimple(pccm.ParameterizedClass):
trans_c, arch, shuffle_type, use_tf32);
trans_c, arch, shuffle_type, use_tf32);
auto c_ = c.clone_whole_storage();
auto c_ = c.clone_whole_storage();
std::vector<GemmTuneResult> all_profile_res;
std::vector<GemmTuneResult> all_profile_res;
std::
vector
<int> splitk_tests;
std::
unordered_set
<int> splitk_tests;
std::vector<float> times;
std::vector<float> times;
float min_time = -1;
for (auto& desp : avail){{
for (auto& desp : avail){{
tv::gemm::GemmParams params;
tv::gemm::GemmParams params;
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
...
@@ -722,12 +723,18 @@ class GemmTunerSimple(pccm.ParameterizedClass):
...
@@ -722,12 +723,18 @@ class GemmTunerSimple(pccm.ParameterizedClass):
params.stream = stream_int;
params.stream = stream_int;
if (desp.split_k_serial() && (hint &
{
AlgoHint
.
BackwardWeight
.
value
}
)){{
if (desp.split_k_serial() && (hint &
{
AlgoHint
.
BackwardWeight
.
value
}
)){{
splitk_tests = {{
{
', '
.
join
(
map
(
str
,
SPCONV_BWD_SPLITK
))
}
}};
splitk_tests = {{
{
', '
.
join
(
map
(
str
,
SPCONV_BWD_SPLITK
))
}
}};
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 10, int(a.dim(0))));
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 11, int(a.dim(0))));
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 12, int(a.dim(0))));
}} else {{
}} else {{
splitk_tests = {{1}};
splitk_tests = {{1}};
}}
}}
for (auto spk : splitk_tests){{
std::vector<int> splitk_tests_vec(splitk_tests.begin(), splitk_tests.end());
std::sort(splitk_tests_vec.begin(), splitk_tests_vec.end(), [](auto a, auto b){{return a > b;}});
for (auto spk : splitk_tests_vec){{
float total_time = 0.0;
float total_time = 0.0;
params.split_k_slices = spk;
params.split_k_slices = spk;
int actual_run = 0;
for (int j = 0; j < num_run; ++j){{
for (int j = 0; j < num_run; ++j){{
auto ev_start = tv::CUDAEvent();
auto ev_start = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent();
...
@@ -736,11 +743,22 @@ class GemmTunerSimple(pccm.ParameterizedClass):
...
@@ -736,11 +743,22 @@ class GemmTunerSimple(pccm.ParameterizedClass):
ev_end.record(stream_int);
ev_end.record(stream_int);
if (j > 0){{
if (j > 0){{
// skip first run
// skip first run
total_time += tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
auto cur_time = tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
total_time += cur_time;
actual_run++;
if (min_time > 0 && cur_time > min_time * 1.5){{
// early skip for slow kernels
break;
}}
}}
}}
}}
}}
total_time /=
(num_run - 1)
;
total_time /=
actual_run
;
times.push_back(total_time);
times.push_back(total_time);
if (min_time < 0){{
min_time = total_time;
}}else{{
min_time = std::min(min_time, total_time);
}}
all_profile_res.push_back(GemmTuneResult(desp, arch, spk));
all_profile_res.push_back(GemmTuneResult(desp, arch, spk));
}}
}}
}}
}}
...
@@ -1078,6 +1096,7 @@ class ConvTunerSimple(pccm.ParameterizedClass):
...
@@ -1078,6 +1096,7 @@ class ConvTunerSimple(pccm.ParameterizedClass):
finally_algos.push_back(desp);
finally_algos.push_back(desp);
}}
}}
}}
}}
std::sort(finally_algos.begin(), finally_algos.end(), [](auto a, auto b){{return a.min_arch > b.min_arch;}});
return finally_algos;
return finally_algos;
"""
)
"""
)
return
code
.
ret
(
"std::vector<tv::gemm::ConvAlgoDesp>"
,
return
code
.
ret
(
"std::vector<tv::gemm::ConvAlgoDesp>"
,
...
@@ -1145,9 +1164,10 @@ class ConvTunerSimple(pccm.ParameterizedClass):
...
@@ -1145,9 +1164,10 @@ class ConvTunerSimple(pccm.ParameterizedClass):
int channel_c = inp.dim(1);
int channel_c = inp.dim(1);
std::vector<ConvTuneResult> all_profile_res;
std::vector<ConvTuneResult> all_profile_res;
std::
vector
<int> splitk_tests;
std::
unordered_set
<int> splitk_tests;
std::vector<float> times;
std::vector<float> times;
tv::gemm::ConvOpType op_type_cpp = static_cast<tv::gemm::ConvOpType>(op_type);
tv::gemm::ConvOpType op_type_cpp = static_cast<tv::gemm::ConvOpType>(op_type);
float min_time = -1;
for (auto& desp : avail){{
for (auto& desp : avail){{
tv::gemm::ConvParams params(
{
NDIM_DONT_CARE
}
, op_type_cpp, tv::CUDAKernelTimer(false));
tv::gemm::ConvParams params(
{
NDIM_DONT_CARE
}
, op_type_cpp, tv::CUDAKernelTimer(false));
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
...
@@ -1176,12 +1196,18 @@ class ConvTunerSimple(pccm.ParameterizedClass):
...
@@ -1176,12 +1196,18 @@ class ConvTunerSimple(pccm.ParameterizedClass):
if (desp.split_k_serial() && (op_type_cpp == tv::gemm::ConvOpType::kBackwardWeight)){{
if (desp.split_k_serial() && (op_type_cpp == tv::gemm::ConvOpType::kBackwardWeight)){{
splitk_tests = {{
{
', '
.
join
(
map
(
str
,
SPCONV_BWD_SPLITK
))
}
}};
splitk_tests = {{
{
', '
.
join
(
map
(
str
,
SPCONV_BWD_SPLITK
))
}
}};
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 10, int(inp.dim(0))));
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 11, int(inp.dim(0))));
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 12, int(inp.dim(0))));
}} else {{
}} else {{
splitk_tests = {{1}};
splitk_tests = {{1}};
}}
}}
for (auto spk : splitk_tests){{
std::vector<int> splitk_tests_vec(splitk_tests.begin(), splitk_tests.end());
std::sort(splitk_tests_vec.begin(), splitk_tests_vec.end(), [](auto a, auto b){{return a > b;}});
for (auto spk : splitk_tests_vec){{
float total_time = 0.0;
float total_time = 0.0;
params.split_k_slices = spk;
params.split_k_slices = spk;
int actual_run = 0;
for (int j = 0; j < num_run; ++j){{
for (int j = 0; j < num_run; ++j){{
auto ev_start = tv::CUDAEvent();
auto ev_start = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent();
...
@@ -1190,11 +1216,22 @@ class ConvTunerSimple(pccm.ParameterizedClass):
...
@@ -1190,11 +1216,22 @@ class ConvTunerSimple(pccm.ParameterizedClass):
ev_end.record(stream_int);
ev_end.record(stream_int);
if (j > 0){{
if (j > 0){{
// skip first run
// skip first run
total_time += tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
auto cur_time = tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
total_time += cur_time;
actual_run++;
if (min_time > 0 && cur_time > min_time * 1.5){{
// early skip for slow kernels
break;
}}
}}
}}
}}
total_time /= (num_run - 1);
}}
total_time /= actual_run;
times.push_back(total_time);
times.push_back(total_time);
if (min_time < 0){{
min_time = total_time;
}}else{{
min_time = std::min(min_time, total_time);
}}
all_profile_res.push_back(ConvTuneResult(desp, arch, spk));
all_profile_res.push_back(ConvTuneResult(desp, arch, spk));
}}
}}
}}
}}
...
...
tools/build-wheels-dev.sh
0 → 100755
View file @
77a7981a
#!/bin/bash
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set
-e
-u
-x
function
repair_wheel
{
wheel
=
"
$1
"
outpath
=
"
$2
"
if
!
auditwheel show
"
$wheel
"
;
then
echo
"Skipping non-platform wheel
$wheel
"
else
auditwheel repair
"
$wheel
"
--plat
"
$PLAT
"
-w
"
$outpath
"
fi
}
gcc
-v
export
SPCONV_DISABLE_JIT
=
"1"
export
CUMM_CUDA_ARCH_LIST
=
"7.5"
# export SPCONV_PYTHON_LIST="3.7;3.8;3.9;3.10"
# Compile wheels, we only support 3.6-3.10.
# "/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
for
PYVER
in
${
SPCONV_PYTHON_LIST
//;/
}
do
PYVER2
=
`
echo
"
$PYVER
"
|
sed
's/\.//'
`
PYVER_CP
=
"cp
$PYVER2
-cp
$PYVER2
"
if
[
"
$PYVER2
"
=
"36"
]
;
then
PYVER_CP
=
"cp
$PYVER2
-cp
${
PYVER2
}
m"
fi
if
[
"
$PYVER2
"
=
"37"
]
;
then
PYVER_CP
=
"cp
$PYVER2
-cp
${
PYVER2
}
m"
fi
if
[[
$PYVER2
==
*
"311"
*
]]
;
then
PYVER_CP
=
"cp311-cp311"
fi
"/opt/python/
$PYVER_CP
/bin/pip"
wheel /io/
-v
--no-deps
-w
/io/wheelhouse_tmp
done
# Bundle external shared libraries into the wheels
for
whl
in
/io/wheelhouse_tmp/
*
.whl
;
do
repair_wheel
"
$whl
"
/io/dist
done
rm
-rf
/io/wheelhouse_tmp
\ No newline at end of file
version.txt
View file @
77a7981a
2.2.
1
2.2.
2
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment