"vscode:/vscode.git/clone" did not exist on "b8d90fb7531d6ef1d53e36d6c5819030dc633633"
Commit 79a3eaf2 authored by yan.yan's avatar yan.yan
Browse files

time to release!

parent b67ad2a8
...@@ -11,53 +11,53 @@ on: ...@@ -11,53 +11,53 @@ on:
- '*' - '*'
jobs: jobs:
# build-windows: build-windows:
# runs-on: windows-latest runs-on: windows-latest
# strategy: strategy:
# matrix: matrix:
# python-version: ['3.7', '3.8', '3.9', '3.10'] python-version: ['3.7', '3.8', '3.9', '3.10']
# cuda-version: ['10.2', '11.1', '11.4'] cuda-version: ['10.2', '11.1', '11.4']
# steps: steps:
# - uses: actions/checkout@master - uses: actions/checkout@master
# - name: Install CUDA - name: Install CUDA
# env: env:
# CUDA_VERSION: ${{ matrix.cuda-version }} CUDA_VERSION: ${{ matrix.cuda-version }}
# PYTHON_VERSION: ${{ matrix.python-version }} PYTHON_VERSION: ${{ matrix.python-version }}
# cuda: ${{ matrix.cuda-version }} cuda: ${{ matrix.cuda-version }}
# if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || (env.CUDA_VERSION == '11.1' && env.PYTHON_VERSION == '3.10') if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) && (env.CUDA_VERSION != '') ) || (env.CUDA_VERSION == '11.1' && env.PYTHON_VERSION == '3.10')
# shell: powershell shell: powershell
# run: .\tools\install_windows_cuda.ps1 run: .\tools\install_windows_cuda.ps1
# - name: Set up Python ${{ matrix.python-version }} - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v2 uses: actions/setup-python@v2
# with: with:
# python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
# - uses: ilammy/msvc-dev-cmd@v1 - uses: ilammy/msvc-dev-cmd@v1
# - name: Install pep build - name: Install pep build
# run: | run: |
# python -m pip install build --user python -m pip install build --user
# python -m pip install --upgrade pip twine wheel python -m pip install --upgrade pip twine wheel
# python -m pip install pytest setuptools python -m pip install pytest setuptools
# - name: Build a windows binary wheel - name: Build a windows binary wheel
# env: env:
# CUDA_VERSION: ${{ matrix.cuda-version }} CUDA_VERSION: ${{ matrix.cuda-version }}
# PYTHON_VERSION: ${{ matrix.python-version }} PYTHON_VERSION: ${{ matrix.python-version }}
# if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) || (env.CUDA_VERSION == '11.1' && env.PYTHON_VERSION == '3.10') if: (github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) || (env.CUDA_VERSION == '11.1' && env.PYTHON_VERSION == '3.10')
# run: | run: |
# $Env:CUMM_CUDA_VERSION = "${{ matrix.cuda-version }}" $Env:CUMM_CUDA_VERSION = "${{ matrix.cuda-version }}"
# $Env:CUMM_CUDA_ARCH_LIST = "all" $Env:CUMM_CUDA_ARCH_LIST = "all"
# $Env:SPCONV_DISABLE_JIT = "1" $Env:SPCONV_DISABLE_JIT = "1"
# pip install pccm pybind11 pip install pccm pybind11
# python -m build --wheel --outdir dist/ . python -m build --wheel --outdir dist/ .
# shell: powershell shell: powershell
# - name: Publish a Python distribution to PyPI - name: Publish a Python distribution to PyPI
# if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags') if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
# run: | run: |
# $Env:TWINE_USERNAME = "__token__" $Env:TWINE_USERNAME = "__token__"
# $Env:TWINE_PASSWORD = "${{ secrets.pypi_password }}" $Env:TWINE_PASSWORD = "${{ secrets.pypi_password }}"
# twine upload dist/* twine upload dist/*
# shell: powershell shell: powershell
build: build:
# needs: build-windows # needs: build-windows
......
...@@ -18,8 +18,6 @@ ...@@ -18,8 +18,6 @@
[![Build Status](https://github.com/traveller59/spconv/workflows/build/badge.svg)](https://github.com/traveller59/spconv/actions?query=workflow%3Abuild) [![Build Status](https://github.com/traveller59/spconv/workflows/build/badge.svg)](https://github.com/traveller59/spconv/actions?query=workflow%3Abuild)
# WORK IN PROGRESS, DON'T USE!!!
## Breaking changes in Spconv 2.x ## Breaking changes in Spconv 2.x
* ```spconv.xxx``` move to ```spconv.pytorch.xxx```, change all ```import spconv``` to ```import spconv.pytorch as spconv``` and ```from spconv.xxx import``` to ```from spconv.pytorch.xxx import```. * ```spconv.xxx``` move to ```spconv.pytorch.xxx```, change all ```import spconv``` to ```import spconv.pytorch as spconv``` and ```from spconv.xxx import``` to ```from spconv.pytorch.xxx import```.
......
...@@ -156,7 +156,7 @@ if disable_jit is not None and disable_jit == "1": ...@@ -156,7 +156,7 @@ if disable_jit is not None and disable_jit == "1":
else: else:
std = "c++17" std = "c++17"
ext_modules: List[Extension] = [ ext_modules: List[Extension] = [
PCCMExtension([cu], PCCMExtension([cu, SpconvOps()],
"spconv/core_cc", "spconv/core_cc",
Path(__file__).resolve().parent / "spconv", Path(__file__).resolve().parent / "spconv",
objects_folder="objects", objects_folder="objects",
......
...@@ -32,173 +32,173 @@ class AlgoHint(Enum): ...@@ -32,173 +32,173 @@ class AlgoHint(Enum):
# TODO two step build: build gemm kernels first, then bind for every python # TODO two step build: build gemm kernels first, then bind for every python
SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [ SHUFFLE_SIMT_PARAMS: List[GemmAlgoParams] = [
# *gen_shuffle_params( *gen_shuffle_params(
# (64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"], (64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
# 2, kernel.GemmAlgo.SimtDP4A, None), 2, kernel.GemmAlgo.SimtDP4A, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"], (128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"],
# 2, kernel.GemmAlgo.SimtDP4A, None), 2, kernel.GemmAlgo.SimtDP4A, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (128, 128, 32), (128, 128, 32),
# (32, 64, 32), ["s8,s8,s32,s32,s32"], 2, (32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.SimtDP4A, None), kernel.GemmAlgo.SimtDP4A, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (128, 128, 32), (128, 128, 32),
# (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2, (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.SimtDP4A, None), kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params( *gen_shuffle_params(
(64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"], (64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.SimtDP4A, None), 2, kernel.GemmAlgo.SimtDP4A, None),
*gen_shuffle_params(
(64, 256, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 256, 8), # (64, 256, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params( *gen_shuffle_params(
# # (64, 256, 8), (32, 128, 16),
# # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (32, 128, 16), (32, 512, 8),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (32, 512, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (128, 128, 8),
# # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 8), # (128, 128, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(128, 128, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 128, 8),
(32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 128, 8), # (64, 128, 8),
# (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (64, 128, 8),
# # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (128, 64, 8),
# # (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params(
# (128, 64, 8),
# (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 64, 8), # (128, 64, 8),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 64, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (32, 64, 16), (128, 64, 8),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), (64, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (64, 32, 16), (64, 64, 8),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( *gen_shuffle_params(
# (32, 32, 32), (32, 64, 16),
# (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None), (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # fall back kernels if mat is misaligned for half *gen_shuffle_params(
# # *gen_shuffle_params( (64, 32, 16),
# # (128, 128, 8), (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None), *gen_shuffle_params(
# *gen_shuffle_params( (32, 32, 32),
# (32, 64, 32), (32, 32, 8), ["f32,f32,f32,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # fall back kernels if mat is misaligned for half
# *gen_shuffle_params(
# (32, 32, 32),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# # *gen_shuffle_params(
# # (64, 64, 16),
# # (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 128, 16), # (128, 128, 8),
# (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 64, 32),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(32, 32, 32),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 64, 8), # (64, 64, 16),
# (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None), # (32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 128, 16),
(32, 64, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
*gen_shuffle_params(
(64, 64, 8),
(32, 32, 8), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, kernel.GemmAlgo.Simt, None),
] ]
SHUFFLE_VOLTA_PARAMS: List[GemmAlgoParams] = [ SHUFFLE_VOLTA_PARAMS: List[GemmAlgoParams] = [
*gen_shuffle_params(
(64, 64, 32),
(32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params( # *gen_shuffle_params(
# (64, 64, 32), # (128, 128, 32),
# (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# # *gen_shuffle_params(
# # (128, 128, 32),
# # (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params(
# (128, 256, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params(
# (256, 128, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), # kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params( *gen_shuffle_params(
# (128, 64, 32), (128, 256, 32),
# (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
# *gen_shuffle_params( *gen_shuffle_params(
# (64, 128, 32), (256, 128, 32),
# (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))), kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(128, 64, 32),
(64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
*gen_shuffle_params(
(64, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Volta, TensorOpParams((8, 8, 4))),
] ]
# SHUFFLE_VOLTA_PARAMS = [] # SHUFFLE_VOLTA_PARAMS = []
SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [ SHUFFLE_TURING_PARAMS: List[GemmAlgoParams] = [
# *gen_shuffle_params( *gen_shuffle_params(
# (64, 64, 32), (64, 64, 32),
# (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))), kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 32), # (128, 128, 32),
# (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# # *gen_shuffle_params(
# # (128, 128, 32),
# # (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# # kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (64, 64, 64),
# (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (64, 128, 64),
# (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (128, 256, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (256, 128, 32),
# (64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params(
# (128, 64, 32),
# (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, # (64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))), # kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params( *gen_shuffle_params(
# (64, 128, 32), (64, 64, 64),
# (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2, (32, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))), kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
# *gen_shuffle_params( *gen_shuffle_params(
# (64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"], (64, 128, 64),
# 2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), (32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 256, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(256, 128, 32),
(64, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(128, 64, 32),
(64, 32, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 128, 32),
(32, 64, 32), ["f16,f16,f16,f16,f16", "f16,f16,f16,f32,f32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((16, 8, 8))),
*gen_shuffle_params(
(64, 64, 32), (32, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(128, 128, 32),
(32, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params( # *gen_shuffle_params(
# (128, 128, 32), # (128, 128, 32),
# (32, 64, 32), ["s8,s8,s32,s32,s32"], 2, # (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# # *gen_shuffle_params(
# # (128, 128, 32),
# # (64, 32, 32), ["s8,s8,s8,s32,s32", "s8,s8,s32,s32,s32"], 2,
# # kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params(
# (128, 256, 32),
# (64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params(
# (256, 128, 32),
# (64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
# kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), # kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# *gen_shuffle_params( *gen_shuffle_params(
# (128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"], (128, 256, 32),
# 2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), (64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
# *gen_shuffle_params( kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
# (64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"], *gen_shuffle_params(
# 2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))), (256, 128, 32),
(64, 64, 32), ["s8,s8,s32,s32,s32"], 2,
kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(128, 64, 32), (64, 32, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
*gen_shuffle_params(
(64, 128, 32), (32, 64, 32), ["s8,s8,s32,s32,s32"],
2, kernel.GemmAlgo.Turing, TensorOpParams((8, 8, 16))),
] ]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment