Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
2309ebe5
Commit
2309ebe5
authored
Feb 02, 2023
by
yan.yan
Browse files
v2.3.3: fix some problem in int8
parent
b52636d1
Changes
8
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
73 additions
and
36 deletions
+73
-36
CHANGELOG.md
CHANGELOG.md
+5
-0
README.md
README.md
+2
-4
pyproject.toml
pyproject.toml
+1
-0
setup.py
setup.py
+2
-2
spconv/core.py
spconv/core.py
+56
-26
test/test_all_algo.py
test/test_all_algo.py
+5
-2
tools/build-wheels-dev.sh
tools/build-wheels-dev.sh
+1
-1
version.txt
version.txt
+1
-1
No files found.
CHANGELOG.md
View file @
2309ebe5
# Changelog
## [2.3.3] - 2023-02-02
### Fixed
-
Fix int8 nvrtc error when use prebuilt
-
Fix int8 kernel when run on turing GPU
## [2.3.2] - 2023-01-20
### Changed
-
change version
...
...
README.md
View file @
2309ebe5
...
...
@@ -57,11 +57,9 @@
| CUDA 11.4 |
[
![PyPI Version
][
pypi-ver-114
]
]
[
pypi-url-114] | ```pip install spconv-cu114```| [![pypi monthly download
][
pypi-download-114
]
][pypi-url-114]|
| CUDA 11.6 |
[
![PyPI Version
][
pypi-ver-116
]
]
[
pypi-url-116] | ```pip install spconv-cu116```| [![pypi monthly download
][
pypi-download-116
]
][pypi-url-116]|
| CUDA 11.7 |
[
![PyPI Version
][
pypi-ver-117
]
]
[
pypi-url-117] | ```pip install spconv-cu117```| [![pypi monthly download
][
pypi-download-117
]
][pypi-url-117]|
| CUDA 11.8
*
|
[
![PyPI Version
][
pypi-ver-118
]
]
[
pypi-url-118] | ```pip install spconv-cu118```| [![pypi monthly download
][
pypi-download-118
]
][pypi-url-118]|
| CUDA 11.8 |
[
![PyPI Version
][
pypi-ver-118
]
]
[
pypi-url-118] | ```pip install spconv-cu118```| [![pypi monthly download
][
pypi-download-118
]
][pypi-url-118]|
| CUDA 12.0 |
[
![PyPI Version
][
pypi-ver-120
]
]
[
pypi-url-120] | ```pip install spconv-cu120```| [![pypi monthly download
][
pypi-download-120
]
][pypi-url-120]|
*
: sm_89 and sm_90 is added in CUDA 11.8. If you use RTX 4090 or H100, you should use this version.
<!-- | CUDA 12.0 | [![PyPI Version][pypi-ver-120]][pypi-url-120] | ```pip install spconv-cu120```| [![pypi monthly download][pypi-download-120]][pypi-url-120]| -->
```
spconv``` is a project that provide heavily-optimized sparse convolution implementation with tensor core support. check [benchmark](docs/BENCHMARK.md) to see how fast spconv 2.x runs.
...
...
pyproject.toml
View file @
2309ebe5
[build-system]
requires
=
[
"setuptools>=41.0"
,
"wheel"
,
"pccm>=0.4.0"
,
"cumm>=0.4.5"
]
# requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm @ file:///io/dist/cumm_cu120-0.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"]
# requires = ["setuptools>=41.0", "wheel", "pccm>=0.4.0", "cumm @ file:///io/dist/cumm_cu117-0.4.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl"]
build-backend
=
"setuptools.build_meta"
setup.py
View file @
2309ebe5
...
...
@@ -167,8 +167,8 @@ if disable_jit is not None and disable_jit == "1":
all_shuffle
=
SHUFFLE_SIMT_PARAMS
+
SHUFFLE_VOLTA_PARAMS
+
SHUFFLE_TURING_PARAMS
+
SHUFFLE_AMPERE_PARAMS
all_imp
=
(
IMPLGEMM_SIMT_PARAMS
+
IMPLGEMM_VOLTA_PARAMS
+
IMPLGEMM_TURING_PARAMS
+
IMPLGEMM_AMPERE_PARAMS
)
all_shuffle
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_shuffle
))
all_imp
=
list
(
filter
(
lambda
x
:
not
x
.
is_nvrtc
,
all_imp
))
#
all_shuffle = list(filter(lambda x: not x.is_nvrtc, all_shuffle))
#
all_imp = list(filter(lambda x: not x.is_nvrtc, all_imp))
cu
=
GemmMainUnitTest
(
all_shuffle
)
convcu
=
ConvMainUnitTest
(
all_imp
)
...
...
spconv/core.py
View file @
2309ebe5
...
...
@@ -840,7 +840,7 @@ IMPLGEMM_TURING_PARAMS = [
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
...
...
@@ -855,7 +855,7 @@ IMPLGEMM_TURING_PARAMS = [
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
0
,
...
...
@@ -1127,7 +1127,7 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
...
...
@@ -1142,7 +1142,7 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
...
...
@@ -1157,13 +1157,13 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
64
,
64
,
64
),
(
32
,
32
,
64
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
32
,
32
,
32
),
(
16
,
32
,
32
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1172,14 +1172,13 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
64
,
128
,
64
),
(
32
,
64
,
64
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
32
,
32
,
32
),
(
16
,
16
,
32
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1188,14 +1187,13 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
64
,
128
,
32
),
(
32
,
64
,
32
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
32
,
32
,
32
),
(
32
,
16
,
32
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1204,14 +1202,14 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
64
,
64
),
(
64
,
32
,
64
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
64
,
64
,
64
),
(
32
,
32
,
64
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1220,14 +1218,14 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
# TODO 16,8,32 produce wrong result.
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
64
,
32
),
(
64
,
32
,
32
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
64
,
128
,
64
),
(
32
,
64
,
64
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1236,14 +1234,30 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
16
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
64
,
128
,
32
),
(
32
,
64
,
32
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
[
"s8,s8,s8,s32,f32"
,
"s8,s8,s8,s32,f16"
],
NHWC
,
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
25
6
,
64
),
(
64
,
128
,
64
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
6
4
,
64
),
(
64
,
32
,
64
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1252,14 +1266,14 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
256
,
128
,
64
)
,
(
128
,
64
,
64
),
# TODO 16,8,32 produce wrong result.
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
64
,
32
)
,
(
64
,
32
,
32
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1268,14 +1282,14 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
128
,
128
),
(
64
,
64
,
128
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
256
,
64
),
(
64
,
128
,
64
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
...
...
@@ -1284,13 +1298,29 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
256
,
128
,
64
),
(
128
,
64
,
64
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
2
,
[
"s8,s8,s8,s32,f32"
,
"s8,s8,s8,s32,f16"
],
NHWC
,
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
is_nvrtc
=
True
,
int8_inference
=
True
),
*
gen_conv_params
(
ConvFwdAndBwdInput
,
(
128
,
128
,
64
),
(
64
,
64
,
64
),
NDIM_DONT_CARE
,
ConvIterAlgo
.
Optimized
,
...
...
@@ -1300,7 +1330,7 @@ if not SPCONV_INT8_DEBUG:
NHWC
,
NHWC
,
GemmAlgo
.
Turing
,
TensorOp
((
16
,
8
,
32
)),
TensorOp
((
8
,
8
,
16
)),
mask_sparse
=
True
,
increment_k_first
=
True
,
access_per_vector
=
1
,
...
...
test/test_all_algo.py
View file @
2309ebe5
...
...
@@ -330,10 +330,10 @@ def _test_impgemm_conv_cuda(subm: bool):
device
=
torch
.
device
(
"cuda:0"
)
shapes
=
[[
19
,
18
,
17
]]
batchsizes
=
[
1
]
dtypes
=
[(
np
.
float32
,
np
.
float32
),
(
np
.
float16
,
np
.
float16
)]
#
dtypes = [(np.float32, np.float32), (np.float16, np.float16)]
# dtypes = [np.float16]
# dtypes = [(np.int8, np.int8), (np.int8, np.float32), (np.int8, np.float16)]
#
dtypes = [(np.int8, np.int8)]
dtypes
=
[(
np
.
int8
,
np
.
int8
)]
# dtypes = [(np.float16, np.float16)]
test_case
=
TestCase
()
...
...
@@ -341,6 +341,9 @@ def _test_impgemm_conv_cuda(subm: bool):
# out_channels = [32, 48, 64]
in_channels
=
[
32
,
47
]
out_channels
=
[
32
,
48
,
62
]
in_channels
=
[
16
]
out_channels
=
[
16
]
# in_channels = [16]
# out_channels = [16]
...
...
tools/build-wheels-dev.sh
View file @
2309ebe5
...
...
@@ -26,7 +26,7 @@ function repair_wheel {
}
gcc
-v
export
SPCONV_DISABLE_JIT
=
"1"
export
CUMM_CUDA_ARCH_LIST
=
"
7.5
"
export
CUMM_CUDA_ARCH_LIST
=
"
8.6
"
# export SPCONV_PYTHON_LIST="3.7;3.8;3.9;3.10"
# Compile wheels, we only support 3.6-3.10.
# "/opt/python/cp36-cp36m/bin/pip" wheel /io/ --no-deps -w /io/wheelhouse_tmp
...
...
version.txt
View file @
2309ebe5
2.3.
2
2.3.
3
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment