Commit 9f9d5b79 authored by yan.yan's avatar yan.yan
Browse files

still working on windows CI problem

parent ccb1f1db
......@@ -15,7 +15,7 @@ jobs:
runs-on: windows-latest
strategy:
matrix:
python-version: ['3.6', '3.7', '3.8', '3.9', '3.10']
python-version: ['3.7', '3.8', '3.9', '3.10']
cuda-version: ['10.2', '11.1', '11.4']
steps:
- uses: actions/checkout@master
......@@ -64,7 +64,7 @@ jobs:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: ['3.8'] # this version is only used for upload.
python-version: ['3.7', '3.8', '3.9', '3.10'] # this version is only used for upload.
cuda-version: ['102', '111', '114']
steps:
......@@ -88,7 +88,7 @@ jobs:
PLAT: manylinux2014_x86_64
if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || env.CUDA_VERSION == '114'
run: |
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} -v `pwd`:/io $DOCKER_IMAGE bash -c "/io/tools/build-wheels.sh"
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} -e SPCONV_PYTHON_LIST=${{env.PYTHON_VERSION}} -v `pwd`:/io $DOCKER_IMAGE bash -c "/io/tools/build-wheels.sh"
- name: Publish a Python distribution to PyPI
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
......
......@@ -109,3 +109,5 @@ venv.bak/
.vscode
__version__.py
wheelhouse_tmp
\ No newline at end of file
......@@ -20,7 +20,8 @@ from pccm.utils import project_is_editable, project_is_installed
from .constants import PACKAGE_NAME, PACKAGE_ROOT
if project_is_installed(PACKAGE_NAME) and project_is_editable(PACKAGE_NAME):
from cumm.gemm.main import GemmMainUnitTest, SHUFFLE_SIMT_PARAMS, SHUFFLE_VOLTA_PARAMS, SHUFFLE_TURING_PARAMS
from spconv.core import SHUFFLE_SIMT_PARAMS, SHUFFLE_VOLTA_PARAMS, SHUFFLE_TURING_PARAMS
from cumm.gemm.main import GemmMainUnitTest
from spconv.csrc.sparse.all import SpconvOps
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS)
cu.namespace = "cumm.gemm.main"
......
......@@ -12,7 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from enum import Enum
from cumm.gemm.main import gen_shuffle_params, GemmAlgoParams
from cumm.gemm import kernel
from typing import List
from cumm.gemm.algospec.core import TensorOpParams
class ConvAlgo(Enum):
Native = "Native"
MaskImplicitGemm = "MaskImplicitGemm"
......@@ -25,3 +28,177 @@ class AlgoHint(Enum):
BackwardInput = 0b010
BackwardWeight = 0b100
# we can't add more kernels here because build in github action is very slow.
# TODO two step build: build gemm kernels first, then bind for every python
SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params(
(64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params(
(128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.SimtDP4A, None),
# *gen_shuffle_params(
# (128, 128, 32),
# (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params(
(64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params(
(64, 256, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (64, 256, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 128, 16),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 512, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (128, 128, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(128, 128, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 128, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (64, 128, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (128, 64, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(128, 64, 8),
(64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 64, 8),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 64, 16),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 32, 16),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 32, 32),
(32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# fall back kernels if mat is misaligned for half
# *gen_shuffle_params(
# (128, 128, 8),
# (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 64, 32),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 32, 32),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (64, 64, 16),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 128, 16),
(32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 64, 8),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
]
SHUFFLE_VOLTA_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params(
(64, 64, 32),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params(
# (128, 128, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(128, 256, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(256, 128, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(128, 64, 32),
(64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(64, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
]
# SHUFFLE_VOLTA_PARAMS = []
SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params(
(64, 64, 32),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (128, 128, 32),
# (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 64, 64),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 128, 64),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 256, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(256, 128, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 64, 32),
(64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params(
# (128, 128, 32),
# (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(128, 256, 32),
(64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(256, 128, 32),
(64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
]
......@@ -27,12 +27,22 @@ function repair_wheel {
export SPCONV_DISABLE_JIT="1"
export CUMM_CUDA_ARCH_LIST="all"
# export SPCONV_PYTHON_LIST="3.7;3.8;3.9;3.10"
# Compile wheels, we only support 3.6-3.10.
# "/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
"/opt/python/cp37-cp37m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
"/opt/python/cp38-cp38/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
"/opt/python/cp39-cp39/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
"/opt/python/cp310-cp310/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
for PYVER in ${SPCONV_PYTHON_LIST//;/ }
do
PYVER2=`echo "$PYVER" | sed 's/\.//'`
PYVER_CP="cp$PYVER2-cp$PYVER2"
if [ "$PYVER2" = "36" ]; then
PYVER_CP="cp$PYVER2-cp${PYVER2}m"
fi
if [ "$PYVER2" = "37" ]; then
PYVER_CP="cp$PYVER2-cp${PYVER2}m"
fi
"/opt/python/$PYVER_CP/bin/pip" wheel /io/ -v --no-deps -w /io/wheelhouse_tmp
done
# Bundle external shared libraries into the wheels
for whl in /io/wheelhouse_tmp/*.whl; do
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment