Commit 77a7981a authored by yan.yan's avatar yan.yan
Browse files

fix CI problem

parent d4de767e
# Changelog # Changelog
## [2.2.2] - 2022-9-25
### Fixed
- Fix CI problem: main function too long and cause OOM in CI vm.
## [2.2.1] - 2022-9-25 ## [2.2.1] - 2022-9-25
### Fixed ### Fixed
- Fix build problem - Fix build problem
......
[build-system] [build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.3.3"] requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.3.4"]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
...@@ -25,6 +25,7 @@ NAME = 'spconv' ...@@ -25,6 +25,7 @@ NAME = 'spconv'
RELEASE_NAME = NAME RELEASE_NAME = NAME
deps = ["cumm"] deps = ["cumm"]
cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "") cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
# is_ci_build = cuda_ver != "" # is_ci_build = cuda_ver != ""
# if not cuda_ver: # if not cuda_ver:
# nvcc_version = subprocess.check_output(["nvcc", "--version" # nvcc_version = subprocess.check_output(["nvcc", "--version"
...@@ -35,12 +36,12 @@ cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "") ...@@ -35,12 +36,12 @@ cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
# cuda_ver = version_str # cuda_ver = version_str
if cuda_ver: if cuda_ver:
cuda_ver = cuda_ver.replace(".", "") # 10.2 to 102 cuda_ver_str = cuda_ver.replace(".", "") # 10.2 to 102
RELEASE_NAME += "-cu{}".format(cuda_ver) RELEASE_NAME += "-cu{}".format(cuda_ver_str)
deps = ["cumm-cu{}>=0.3.2".format(cuda_ver)] deps = ["cumm-cu{}>=0.3.4".format(cuda_ver_str)]
else: else:
deps = ["cumm>=0.3.2"] deps = ["cumm>=0.3.4"]
...@@ -176,8 +177,9 @@ if disable_jit is not None and disable_jit == "1": ...@@ -176,8 +177,9 @@ if disable_jit is not None and disable_jit == "1":
cu.namespace = "cumm.gemm.main" cu.namespace = "cumm.gemm.main"
std = "c++17" std = "c++17"
if cuda_ver: if cuda_ver:
cuda_ver_number = int(cuda_ver) cuda_ver_vec = list(map(int, cuda_ver.split(".")))
if cuda_ver_number < 110: cuda_ver_tuple = (cuda_ver_vec[0], cuda_ver_vec[1])
if cuda_ver_tuple[0] < 11:
std = "c++14" std = "c++14"
else: else:
std = "c++17" std = "c++17"
......
...@@ -29,6 +29,7 @@ from .gather import GatherCPU ...@@ -29,6 +29,7 @@ from .gather import GatherCPU
from .alloc import ExternalAllocator, ThrustAllocator from .alloc import ExternalAllocator, ThrustAllocator
from spconv.constants import SPCONV_DIRECT_TABLE_HASH_SIZE_SCALE, AllocKeys from spconv.constants import SPCONV_DIRECT_TABLE_HASH_SIZE_SCALE, AllocKeys
import re import re
import os
class CustomThrustLib(pccm.Class): class CustomThrustLib(pccm.Class):
def __init__(self): def __init__(self):
...@@ -131,7 +132,12 @@ class SpconvOps(pccm.Class): ...@@ -131,7 +132,12 @@ class SpconvOps(pccm.Class):
define_str = "\n".join(defines) define_str = "\n".join(defines)
self.add_global_code(define_str) self.add_global_code(define_str)
self.build_meta.add_global_cflags("cl", "/DNOMINMAX") self.build_meta.add_global_cflags("cl", "/DNOMINMAX")
# self.build_meta.add_global_cflags("nvcc", "-w") cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
if cuda_ver:
cuda_ver_vec = list(map(int, cuda_ver.split(".")))
cuda_ver_tuple = (cuda_ver_vec[0], cuda_ver_vec[1])
if cuda_ver_tuple[0] < 11:
self.build_meta.add_global_cflags("nvcc", "-w")
# for name in dir(AllocKeys): # for name in dir(AllocKeys):
# if not name.startswith("__"): # if not name.startswith("__"):
......
...@@ -591,6 +591,7 @@ class GemmTunerSimple(pccm.ParameterizedClass): ...@@ -591,6 +591,7 @@ class GemmTunerSimple(pccm.ParameterizedClass):
finally_algos.push_back(desp); finally_algos.push_back(desp);
}} }}
}} }}
std::sort(finally_algos.begin(), finally_algos.end(), [](auto a, auto b){{return a.min_arch > b.min_arch;}});
return finally_algos; return finally_algos;
""") """)
return code.ret("std::vector<tv::gemm::GemmAlgoDesp>", return code.ret("std::vector<tv::gemm::GemmAlgoDesp>",
...@@ -702,9 +703,9 @@ class GemmTunerSimple(pccm.ParameterizedClass): ...@@ -702,9 +703,9 @@ class GemmTunerSimple(pccm.ParameterizedClass):
trans_c, arch, shuffle_type, use_tf32); trans_c, arch, shuffle_type, use_tf32);
auto c_ = c.clone_whole_storage(); auto c_ = c.clone_whole_storage();
std::vector<GemmTuneResult> all_profile_res; std::vector<GemmTuneResult> all_profile_res;
std::vector<int> splitk_tests; std::unordered_set<int> splitk_tests;
std::vector<float> times; std::vector<float> times;
float min_time = -1;
for (auto& desp : avail){{ for (auto& desp : avail){{
tv::gemm::GemmParams params; tv::gemm::GemmParams params;
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{ if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
...@@ -722,12 +723,18 @@ class GemmTunerSimple(pccm.ParameterizedClass): ...@@ -722,12 +723,18 @@ class GemmTunerSimple(pccm.ParameterizedClass):
params.stream = stream_int; params.stream = stream_int;
if (desp.split_k_serial() && (hint & {AlgoHint.BackwardWeight.value})){{ if (desp.split_k_serial() && (hint & {AlgoHint.BackwardWeight.value})){{
splitk_tests = {{{', '.join(map(str, SPCONV_BWD_SPLITK))}}}; splitk_tests = {{{', '.join(map(str, SPCONV_BWD_SPLITK))}}};
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 10, int(a.dim(0))));
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 11, int(a.dim(0))));
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 12, int(a.dim(0))));
}} else {{ }} else {{
splitk_tests = {{1}}; splitk_tests = {{1}};
}} }}
for (auto spk : splitk_tests){{ std::vector<int> splitk_tests_vec(splitk_tests.begin(), splitk_tests.end());
std::sort(splitk_tests_vec.begin(), splitk_tests_vec.end(), [](auto a, auto b){{return a > b;}});
for (auto spk : splitk_tests_vec){{
float total_time = 0.0; float total_time = 0.0;
params.split_k_slices = spk; params.split_k_slices = spk;
int actual_run = 0;
for (int j = 0; j < num_run; ++j){{ for (int j = 0; j < num_run; ++j){{
auto ev_start = tv::CUDAEvent(); auto ev_start = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent(); auto ev_end = tv::CUDAEvent();
...@@ -736,11 +743,22 @@ class GemmTunerSimple(pccm.ParameterizedClass): ...@@ -736,11 +743,22 @@ class GemmTunerSimple(pccm.ParameterizedClass):
ev_end.record(stream_int); ev_end.record(stream_int);
if (j > 0){{ if (j > 0){{
// skip first run // skip first run
total_time += tv::CUDAEvent::sync_and_duration(ev_start, ev_end); auto cur_time = tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
total_time += cur_time;
actual_run++;
if (min_time > 0 && cur_time > min_time * 1.5){{
// early skip for slow kernels
break;
}}
}} }}
}} }}
total_time /= (num_run - 1); total_time /= actual_run;
times.push_back(total_time); times.push_back(total_time);
if (min_time < 0){{
min_time = total_time;
}}else{{
min_time = std::min(min_time, total_time);
}}
all_profile_res.push_back(GemmTuneResult(desp, arch, spk)); all_profile_res.push_back(GemmTuneResult(desp, arch, spk));
}} }}
}} }}
...@@ -1078,6 +1096,7 @@ class ConvTunerSimple(pccm.ParameterizedClass): ...@@ -1078,6 +1096,7 @@ class ConvTunerSimple(pccm.ParameterizedClass):
finally_algos.push_back(desp); finally_algos.push_back(desp);
}} }}
}} }}
std::sort(finally_algos.begin(), finally_algos.end(), [](auto a, auto b){{return a.min_arch > b.min_arch;}});
return finally_algos; return finally_algos;
""") """)
return code.ret("std::vector<tv::gemm::ConvAlgoDesp>", return code.ret("std::vector<tv::gemm::ConvAlgoDesp>",
...@@ -1145,9 +1164,10 @@ class ConvTunerSimple(pccm.ParameterizedClass): ...@@ -1145,9 +1164,10 @@ class ConvTunerSimple(pccm.ParameterizedClass):
int channel_c = inp.dim(1); int channel_c = inp.dim(1);
std::vector<ConvTuneResult> all_profile_res; std::vector<ConvTuneResult> all_profile_res;
std::vector<int> splitk_tests; std::unordered_set<int> splitk_tests;
std::vector<float> times; std::vector<float> times;
tv::gemm::ConvOpType op_type_cpp = static_cast<tv::gemm::ConvOpType>(op_type); tv::gemm::ConvOpType op_type_cpp = static_cast<tv::gemm::ConvOpType>(op_type);
float min_time = -1;
for (auto& desp : avail){{ for (auto& desp : avail){{
tv::gemm::ConvParams params({NDIM_DONT_CARE}, op_type_cpp, tv::CUDAKernelTimer(false)); tv::gemm::ConvParams params({NDIM_DONT_CARE}, op_type_cpp, tv::CUDAKernelTimer(false));
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{ if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
...@@ -1176,12 +1196,18 @@ class ConvTunerSimple(pccm.ParameterizedClass): ...@@ -1176,12 +1196,18 @@ class ConvTunerSimple(pccm.ParameterizedClass):
if (desp.split_k_serial() && (op_type_cpp == tv::gemm::ConvOpType::kBackwardWeight)){{ if (desp.split_k_serial() && (op_type_cpp == tv::gemm::ConvOpType::kBackwardWeight)){{
splitk_tests = {{{', '.join(map(str, SPCONV_BWD_SPLITK))}}}; splitk_tests = {{{', '.join(map(str, SPCONV_BWD_SPLITK))}}};
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 10, int(inp.dim(0))));
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 11, int(inp.dim(0))));
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 12, int(inp.dim(0))));
}} else {{ }} else {{
splitk_tests = {{1}}; splitk_tests = {{1}};
}} }}
for (auto spk : splitk_tests){{ std::vector<int> splitk_tests_vec(splitk_tests.begin(), splitk_tests.end());
std::sort(splitk_tests_vec.begin(), splitk_tests_vec.end(), [](auto a, auto b){{return a > b;}});
for (auto spk : splitk_tests_vec){{
float total_time = 0.0; float total_time = 0.0;
params.split_k_slices = spk; params.split_k_slices = spk;
int actual_run = 0;
for (int j = 0; j < num_run; ++j){{ for (int j = 0; j < num_run; ++j){{
auto ev_start = tv::CUDAEvent(); auto ev_start = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent(); auto ev_end = tv::CUDAEvent();
...@@ -1190,11 +1216,22 @@ class ConvTunerSimple(pccm.ParameterizedClass): ...@@ -1190,11 +1216,22 @@ class ConvTunerSimple(pccm.ParameterizedClass):
ev_end.record(stream_int); ev_end.record(stream_int);
if (j > 0){{ if (j > 0){{
// skip first run // skip first run
total_time += tv::CUDAEvent::sync_and_duration(ev_start, ev_end); auto cur_time = tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
total_time += cur_time;
actual_run++;
if (min_time > 0 && cur_time > min_time * 1.5){{
// early skip for slow kernels
break;
}} }}
}} }}
total_time /= (num_run - 1); }}
total_time /= actual_run;
times.push_back(total_time); times.push_back(total_time);
if (min_time < 0){{
min_time = total_time;
}}else{{
min_time = std::min(min_time, total_time);
}}
all_profile_res.push_back(ConvTuneResult(desp, arch, spk)); all_profile_res.push_back(ConvTuneResult(desp, arch, spk));
}} }}
}} }}
......
#!/bin/bash
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e -u -x
function repair_wheel {
wheel="$1"
outpath="$2"
if ! auditwheel show "$wheel"; then
echo "Skipping non-platform wheel $wheel"
else
auditwheel repair "$wheel" --plat "$PLAT" -w "$outpath"
fi
}
gcc -v
export SPCONV_DISABLE_JIT="1"
export CUMM_CUDA_ARCH_LIST="7.5"
# export SPCONV_PYTHON_LIST="3.7;3.8;3.9;3.10"
# Compile wheels, we only support 3.6-3.10.
# "/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
for PYVER in ${SPCONV_PYTHON_LIST//;/ }
do
PYVER2=`echo "$PYVER" | sed 's/\.//'`
PYVER_CP="cp$PYVER2-cp$PYVER2"
if [ "$PYVER2" = "36" ]; then
PYVER_CP="cp$PYVER2-cp${PYVER2}m"
fi
if [ "$PYVER2" = "37" ]; then
PYVER_CP="cp$PYVER2-cp${PYVER2}m"
fi
if [[ $PYVER2 == *"311"* ]]; then
PYVER_CP="cp311-cp311"
fi
"/opt/python/$PYVER_CP/bin/pip" wheel /io/ -v --no-deps -w /io/wheelhouse_tmp
done
# Bundle external shared libraries into the wheels
for whl in /io/wheelhouse_tmp/*.whl; do
repair_wheel "$whl" /io/dist
done
rm -rf /io/wheelhouse_tmp
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment