Commit 77a7981a authored by yan.yan's avatar yan.yan
Browse files

fix CI problem

parent d4de767e
# Changelog
## [2.2.2] - 2022-9-25
### Fixed
- Fix CI problem: main function too long and cause OOM in CI vm.
## [2.2.1] - 2022-9-25
### Fixed
- Fix build problem
......
[build-system]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.3.3"]
requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm>=0.3.4"]
build-backend = "setuptools.build_meta"
......@@ -25,6 +25,7 @@ NAME = 'spconv'
RELEASE_NAME = NAME
deps = ["cumm"]
cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
# is_ci_build = cuda_ver != ""
# if not cuda_ver:
# nvcc_version = subprocess.check_output(["nvcc", "--version"
......@@ -35,12 +36,12 @@ cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
# cuda_ver = version_str
if cuda_ver:
cuda_ver = cuda_ver.replace(".", "") # 10.2 to 102
cuda_ver_str = cuda_ver.replace(".", "") # 10.2 to 102
RELEASE_NAME += "-cu{}".format(cuda_ver)
deps = ["cumm-cu{}>=0.3.2".format(cuda_ver)]
RELEASE_NAME += "-cu{}".format(cuda_ver_str)
deps = ["cumm-cu{}>=0.3.4".format(cuda_ver_str)]
else:
deps = ["cumm>=0.3.2"]
deps = ["cumm>=0.3.4"]
......@@ -176,8 +177,9 @@ if disable_jit is not None and disable_jit == "1":
cu.namespace = "cumm.gemm.main"
std = "c++17"
if cuda_ver:
cuda_ver_number = int(cuda_ver)
if cuda_ver_number < 110:
cuda_ver_vec = list(map(int, cuda_ver.split(".")))
cuda_ver_tuple = (cuda_ver_vec[0], cuda_ver_vec[1])
if cuda_ver_tuple[0] < 11:
std = "c++14"
else:
std = "c++17"
......
......@@ -29,6 +29,7 @@ from .gather import GatherCPU
from .alloc import ExternalAllocator, ThrustAllocator
from spconv.constants import SPCONV_DIRECT_TABLE_HASH_SIZE_SCALE, AllocKeys
import re
import os
class CustomThrustLib(pccm.Class):
def __init__(self):
......@@ -131,7 +132,12 @@ class SpconvOps(pccm.Class):
define_str = "\n".join(defines)
self.add_global_code(define_str)
self.build_meta.add_global_cflags("cl", "/DNOMINMAX")
# self.build_meta.add_global_cflags("nvcc", "-w")
cuda_ver = os.environ.get("CUMM_CUDA_VERSION", "")
if cuda_ver:
cuda_ver_vec = list(map(int, cuda_ver.split(".")))
cuda_ver_tuple = (cuda_ver_vec[0], cuda_ver_vec[1])
if cuda_ver_tuple[0] < 11:
self.build_meta.add_global_cflags("nvcc", "-w")
# for name in dir(AllocKeys):
# if not name.startswith("__"):
......
......@@ -591,6 +591,7 @@ class GemmTunerSimple(pccm.ParameterizedClass):
finally_algos.push_back(desp);
}}
}}
std::sort(finally_algos.begin(), finally_algos.end(), [](auto a, auto b){{return a.min_arch > b.min_arch;}});
return finally_algos;
""")
return code.ret("std::vector<tv::gemm::GemmAlgoDesp>",
......@@ -702,9 +703,9 @@ class GemmTunerSimple(pccm.ParameterizedClass):
trans_c, arch, shuffle_type, use_tf32);
auto c_ = c.clone_whole_storage();
std::vector<GemmTuneResult> all_profile_res;
std::vector<int> splitk_tests;
std::unordered_set<int> splitk_tests;
std::vector<float> times;
float min_time = -1;
for (auto& desp : avail){{
tv::gemm::GemmParams params;
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
......@@ -722,12 +723,18 @@ class GemmTunerSimple(pccm.ParameterizedClass):
params.stream = stream_int;
if (desp.split_k_serial() && (hint & {AlgoHint.BackwardWeight.value})){{
splitk_tests = {{{', '.join(map(str, SPCONV_BWD_SPLITK))}}};
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 10, int(a.dim(0))));
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 11, int(a.dim(0))));
splitk_tests.insert(int(a.dim(0)) / std::min(1 << 12, int(a.dim(0))));
}} else {{
splitk_tests = {{1}};
}}
for (auto spk : splitk_tests){{
std::vector<int> splitk_tests_vec(splitk_tests.begin(), splitk_tests.end());
std::sort(splitk_tests_vec.begin(), splitk_tests_vec.end(), [](auto a, auto b){{return a > b;}});
for (auto spk : splitk_tests_vec){{
float total_time = 0.0;
params.split_k_slices = spk;
int actual_run = 0;
for (int j = 0; j < num_run; ++j){{
auto ev_start = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent();
......@@ -736,11 +743,22 @@ class GemmTunerSimple(pccm.ParameterizedClass):
ev_end.record(stream_int);
if (j > 0){{
// skip first run
total_time += tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
auto cur_time = tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
total_time += cur_time;
actual_run++;
if (min_time > 0 && cur_time > min_time * 1.5){{
// early skip for slow kernels
break;
}}
}}
}}
total_time /= (num_run - 1);
total_time /= actual_run;
times.push_back(total_time);
if (min_time < 0){{
min_time = total_time;
}}else{{
min_time = std::min(min_time, total_time);
}}
all_profile_res.push_back(GemmTuneResult(desp, arch, spk));
}}
}}
......@@ -1078,6 +1096,7 @@ class ConvTunerSimple(pccm.ParameterizedClass):
finally_algos.push_back(desp);
}}
}}
std::sort(finally_algos.begin(), finally_algos.end(), [](auto a, auto b){{return a.min_arch > b.min_arch;}});
return finally_algos;
""")
return code.ret("std::vector<tv::gemm::ConvAlgoDesp>",
......@@ -1145,9 +1164,10 @@ class ConvTunerSimple(pccm.ParameterizedClass):
int channel_c = inp.dim(1);
std::vector<ConvTuneResult> all_profile_res;
std::vector<int> splitk_tests;
std::unordered_set<int> splitk_tests;
std::vector<float> times;
tv::gemm::ConvOpType op_type_cpp = static_cast<tv::gemm::ConvOpType>(op_type);
float min_time = -1;
for (auto& desp : avail){{
tv::gemm::ConvParams params({NDIM_DONT_CARE}, op_type_cpp, tv::CUDAKernelTimer(false));
if (desp.is_nvrtc || prebuilt_names_.find(desp.__repr__()) == prebuilt_names_.end()){{
......@@ -1176,12 +1196,18 @@ class ConvTunerSimple(pccm.ParameterizedClass):
if (desp.split_k_serial() && (op_type_cpp == tv::gemm::ConvOpType::kBackwardWeight)){{
splitk_tests = {{{', '.join(map(str, SPCONV_BWD_SPLITK))}}};
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 10, int(inp.dim(0))));
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 11, int(inp.dim(0))));
splitk_tests.insert(int(inp.dim(0)) / std::min(1 << 12, int(inp.dim(0))));
}} else {{
splitk_tests = {{1}};
}}
for (auto spk : splitk_tests){{
std::vector<int> splitk_tests_vec(splitk_tests.begin(), splitk_tests.end());
std::sort(splitk_tests_vec.begin(), splitk_tests_vec.end(), [](auto a, auto b){{return a > b;}});
for (auto spk : splitk_tests_vec){{
float total_time = 0.0;
params.split_k_slices = spk;
int actual_run = 0;
for (int j = 0; j < num_run; ++j){{
auto ev_start = tv::CUDAEvent();
auto ev_end = tv::CUDAEvent();
......@@ -1190,11 +1216,22 @@ class ConvTunerSimple(pccm.ParameterizedClass):
ev_end.record(stream_int);
if (j > 0){{
// skip first run
total_time += tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
auto cur_time = tv::CUDAEvent::sync_and_duration(ev_start, ev_end);
total_time += cur_time;
actual_run++;
if (min_time > 0 && cur_time > min_time * 1.5){{
// early skip for slow kernels
break;
}}
}}
total_time /= (num_run - 1);
}}
total_time /= actual_run;
times.push_back(total_time);
if (min_time < 0){{
min_time = total_time;
}}else{{
min_time = std::min(min_time, total_time);
}}
all_profile_res.push_back(ConvTuneResult(desp, arch, spk));
}}
}}
......
#!/bin/bash
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -e -u -x
function repair_wheel {
wheel="$1"
outpath="$2"
if ! auditwheel show "$wheel"; then
echo "Skipping non-platform wheel $wheel"
else
auditwheel repair "$wheel" --plat "$PLAT" -w "$outpath"
fi
}
gcc -v
export SPCONV_DISABLE_JIT="1"
export CUMM_CUDA_ARCH_LIST="7.5"
# export SPCONV_PYTHON_LIST="3.7;3.8;3.9;3.10"
# Compile wheels, we only support 3.6-3.10.
# "/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
for PYVER in ${SPCONV_PYTHON_LIST//;/ }
do
PYVER2=`echo "$PYVER" | sed 's/\.//'`
PYVER_CP="cp$PYVER2-cp$PYVER2"
if [ "$PYVER2" = "36" ]; then
PYVER_CP="cp$PYVER2-cp${PYVER2}m"
fi
if [ "$PYVER2" = "37" ]; then
PYVER_CP="cp$PYVER2-cp${PYVER2}m"
fi
if [[ $PYVER2 == *"311"* ]]; then
PYVER_CP="cp311-cp311"
fi
"/opt/python/$PYVER_CP/bin/pip" wheel /io/ -v --no-deps -w /io/wheelhouse_tmp
done
# Bundle external shared libraries into the wheels
for whl in /io/wheelhouse_tmp/*.whl; do
repair_wheel "$whl" /io/dist
done
rm -rf /io/wheelhouse_tmp
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment