Commit d03b947a authored by yan.yan's avatar yan.yan
Browse files

Merge branch 'master' into develop

parents 9d1e33d6 8aa0f1f7
......@@ -24,11 +24,32 @@ jobs:
with:
filters: |
needbuild:
- '.github/workflows/**'
- 'setup.py'
- 'spconv/csrc/**'
- 'spconv/algo.py'
- 'spconv/core.py'
- 'pyproject.toml'
- name: Install Boost
env:
CUDA_VERSION: ${{ matrix.cuda-version }}
PYTHON_VERSION: ${{ matrix.python-version }}
cuda: ${{ matrix.cuda-version }}
BOOST_VERSION: boost_1_77_0
if: |
(
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
(
(steps.changes.outputs.needbuild == 'true') &&
(env.PYTHON_VERSION == '3.10')
)
)
shell: powershell
run: |
$ProgressPreference = 'SilentlyContinue'
Invoke-WebRequest -Uri "https://boostorg.jfrog.io/artifactory/main/release/1.77.0/source/boost_1_77_0.zip" -UseBasicParsing -OutFile $HOME/boost.zip
Expand-Archive $HOME/boost.zip -DestinationPath $HOME/boost
- name: Install CUDA
env:
CUDA_VERSION: ${{ matrix.cuda-version }}
......@@ -43,7 +64,9 @@ jobs:
)
)
shell: powershell
run: .\tools\install_windows_cuda.ps1
run: |
.\tools\install_windows_cuda.ps1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
......@@ -59,6 +82,8 @@ jobs:
env:
CUDA_VERSION: ${{ matrix.cuda-version }}
PYTHON_VERSION: ${{ matrix.python-version }}
BOOST_VERSION: boost_1_77_0
CUMM_CUDA_VERSION: ${{ matrix.cuda-version }}
if: |
(env.CUDA_VERSION != '') && (
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
......@@ -68,10 +93,11 @@ jobs:
)
)
run: |
$Env:CUMM_CUDA_VERSION = "${{ matrix.cuda-version }}"
$Env:CUMM_CUDA_ARCH_LIST = "all"
$Env:SPCONV_DISABLE_JIT = "1"
pip install pccm pybind11
# download boost header only
$Env:BOOST_ROOT = "$HOME/boost/boost_1_77_0"
# ls "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v${{ matrix.cuda-version }}\include\thrust"
python -m build --wheel --outdir dist/ .
shell: powershell
......@@ -99,6 +125,7 @@ jobs:
with:
filters: |
needbuild:
- '.github/workflows/**'
- 'setup.py'
- 'spconv/csrc/**'
- 'spconv/algo.py'
......@@ -111,10 +138,15 @@ jobs:
python-version: ${{ matrix.python-version }}
- name: Install pep build
env:
BOOST_VERSION: boost_1_77_0
run: |
python -m pip install build --user
python -m pip install --upgrade pip twine wheel
python -m pip install pytest setuptools
mkdir -p third_party
wget https://boostorg.jfrog.io/artifactory/main/release/1.77.0/source/$BOOST_VERSION.zip -O third_party/boost.zip
unzip third_party/boost.zip -d third_party/boost
- name: Build a cuda wheel
env:
......@@ -122,6 +154,7 @@ jobs:
PYTHON_VERSION: ${{ matrix.python-version }}
DOCKER_IMAGE: scrin/manylinux2014-cuda:cu${{ matrix.cuda-version }}-devel-1.0.0
PLAT: manylinux2014_x86_64
BOOST_VERSION: boost_1_77_0
if: |
(env.CUDA_VERSION != '') && (
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
......@@ -132,7 +165,10 @@ jobs:
)
run: |
chmod +x tools/build-wheels.sh
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} -e SPCONV_PYTHON_LIST=${{env.PYTHON_VERSION}} -v `pwd`:/io $DOCKER_IMAGE bash -c "source /etc/bashrc && /io/tools/build-wheels.sh"
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} \
-e SPCONV_PYTHON_LIST=${{env.PYTHON_VERSION}} \
-e BOOST_ROOT=/io/third_party/boost/$BOOST_VERSION \
-v `pwd`:/io $DOCKER_IMAGE bash -c "source /etc/bashrc && /io/tools/build-wheels.sh"
- name: Build a cpu wheel
env:
......@@ -140,6 +176,7 @@ jobs:
PYTHON_VERSION: ${{ matrix.python-version }}
DOCKER_IMAGE: scrin/manylinux2014-cuda:cu114-devel-1.0.0
PLAT: manylinux2014_x86_64
BOOST_VERSION: boost_1_77_0
if: |
(env.CUDA_VERSION == '') && (
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
......@@ -150,7 +187,10 @@ jobs:
)
run: |
chmod +x tools/build-wheels.sh
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} -e SPCONV_PYTHON_LIST=${{env.PYTHON_VERSION}} -v `pwd`:/io $DOCKER_IMAGE bash -c "source /etc/bashrc && /io/tools/build-wheels.sh"
docker run --rm -e PLAT=$PLAT -e CUMM_CUDA_VERSION=${{ matrix.cuda-version }} \
-e SPCONV_PYTHON_LIST=${{env.PYTHON_VERSION}} \
-e BOOST_ROOT=/io/third_party/boost/$BOOST_VERSION \
-v `pwd`:/io $DOCKER_IMAGE bash -c "source /etc/bashrc && /io/tools/build-wheels.sh"
- name: Publish a Python distribution to PyPI
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
......
# Changelog
## [2.1.17] - 2021-11-29
### Fixed
- Fix a bug in sparse add.
### Added
- Add more wrong usage check
- Add insert_exist_keys for hash table
## [2.1.16] - 2021-11-28
### Fixed
- Fix strange compile problem in windows
## [2.1.15] - 2021-11-28
### Fixed
- Fix missing pccm.Class in setup.py
## [2.1.14] - 2021-11-28
### Added
- Add hash table
- update cumm version
- Add AddTableMisaligned for sptensors with same shape but different indices.
### Fixed
- Fix a bug already fixed in 2.1.10 but introduced in 2.1.12 again.
## [2.1.13] - 2021-?-?
### Added
- Add some ops from spconv 1.x, see spconv.utils for more details.
- Add some debug tool for users to attach more info in issue.
## [2.1.12] - 2021-11-23
### Added
- Add a method for voxel generator to get pc_voxel_id, which is usually used in semantic segmentation
......
......@@ -61,7 +61,7 @@ Spconv 1.x users **NEED READ [THIS](docs/SPCONV_2_BREAKING_CHANGEs.md)** before
* fp32 (not tf32) training/inference speed is increased (+50~80%)
* fp16 training/inference speed is greatly increased when your layer support tensor core (channel size must be multiple of 8).
* int8 op is ready, but we still need some time to figure out how to run int8 in pytorch.
* [doesn't depend on pytorch binary](docs/FAQ.md#What-does-no-dependency-on-pytorch-mean), but you may need at least pytorch >= 1.6.0 to run spconv 2.x.
* [doesn't depend on pytorch binary](docs/FAQ.md#What-does-no-dependency-on-pytorch-mean), but you may need at least pytorch >= 1.5.0 to run spconv 2.x.
* since spconv 2.x doesn't depend on pytorch binary (never in future), it's impossible to support torch.jit/libtorch inference.
## Spconv 2.x Development and Roadmap
......@@ -108,18 +108,32 @@ CUDA 11.1 will be removed in spconv 2.2 because pytorch 1.10 don't provide prebu
```pip install spconv-cu114``` for CUDA 11.4
**NOTE** It's safe to have different **minor** cuda version between system and conda (pytorch) **in Linux**. for example, you can use spconv-cu114 with anaconda version of pytorch cuda 11.1 in a OS with CUDA 11.2 installed.
**NOTE** It's safe to have different **minor** cuda version between system and conda (pytorch) in **CUDA >= 11.0** because of [CUDA Minor Version Compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/#minor-version-compatibility). For example, you can use spconv-cu114 with anaconda version of pytorch cuda 11.1 in a OS with CUDA 11.2 installed.
For CUDA 10, we don't know whether ```spconv-cu102``` works with CUDA 10.0 and 10.1. Users can have a try.
**NOTE** In Linux, you can install spconv-cuxxx without install CUDA to system! only suitable NVIDIA driver is required. for CUDA 11, we need driver >= 450.82.
#### Prebuilt GPU Support Matrix
See [this page](https://arnon.dk/matching-sm-architectures-arch-and-gencode-for-various-nvidia-cards/) to check supported GPU names by arch.
| CUDA version | GPU Arch List |
| -------------- |:---------------------:|
| 10.2 | 50,52,60,61,70,75 |
| 11.x | 52,60,61,70,75,80,86 |
| 12.x | 60,61,70,75,80,86,90 |
### Build from source for development (JIT, recommend)
The c++ code will be built automatically when you change c++ code in project.
For NVIDIA Embedded Platforms, you need to specify cuda arch before build: ```export CUMM_CUDA_ARCH_LIST="7.2"``` for xavier.
For NVIDIA Embedded Platforms, you need to specify cuda arch before build: ```export CUMM_CUDA_ARCH_LIST="7.2"``` for xavier, ```export CUMM_CUDA_ARCH_LIST="6.2"``` for TX2, ```export CUMM_CUDA_ARCH_LIST="8.7"``` for orin.
You need to remove ```cumm``` in ```requires``` section in pyproject.toml after install editable ```cumm``` and before install spconv due to pyproject limit (can't find editable installed ```cumm```).
You need to ensure ```pip list | grep spconv``` and ```pip list | grep cumm``` show nothing before install editable spconv/cumm.
#### Linux
0. uninstall spconv and cumm installed by pip
......@@ -157,7 +171,9 @@ You need to rebuild ```cumm``` first if you are build along a CUDA version that
5. run ```pip install pccm cumm wheel```
6. run ```python setup.py bdist_wheel```+```pip install dists/xxx.whl```
## Know issues
* Spconv 2.x F16 runs slow in A100.
## Note
......
......@@ -16,7 +16,7 @@
## Simple Benchmark
### Network Benchmark without batchnorm (F32/F16) in RTX 3080 Laptop GPU
### Network Benchmark without batchnorm (F32/F16) in RTX 3080 Laptop GPU 150W
Network Code: test/benchmark.py
......@@ -25,6 +25,20 @@ Network Code: test/benchmark.py
| Forward | 43ms | 21.7ms/13.7ms | 23.5ms/11.2ms | 22ms/12.2ms |
| Backward | 80ms | 41.9ms/25.2ms | 51.0ms/13.8ms | 41.1ms/12.2ms |
| F16 Forward | Native| Implicit Gemm | Implicit Gemm Split Mask |
| -------------- |:---------------------:|---------------------:| ---------------------:|
| RTX 3080 Laptop 150W@1755MHz | 13.7ms | 11.2ms | 12.2ms |
| RTX A6000 | 19.1ms | 11.7ms | 14.0ms |
| TESLA V100 | 17.9ms | 11.4ms | 13.4ms |
| A100 | 23.8ms | 12.4ms | 15.1ms |
| F16 Backward | Native| Implicit Gemm | Implicit Gemm Split Mask |
| -------------- |:---------------------:|---------------------:| ---------------------:|
| RTX 3080 Laptop 150W@1755MHz | 25.2ms | 13.8ms | 12.2ms |
| RTX A6000 | 28.1ms | 9.2ms | 8.9ms |
| TESLA V100 | 33.9ms | 12.2ms | 12.9ms |
| A100 | 37.6ms | 12.2ms | 13.9ms |
### Network Gemm Kernel Benchmark FP16 in RTX 3080 Laptop GPU
Network Code: test/benchmark.py
......
......@@ -26,30 +26,3 @@
* spconv 2.x in Windows 10 is 1.5x~2x slower than Linux. use Linux if possible.
See [benchmark](BENCHMARK.md) for more performance details of different algorithms.
## Algorithm Overview
### Native Explicit (deprecated and removed in spconv 2.x)
native algorithm (explicit, no fused) is standard gather-gemm-scatter algorithm. Assume we compute 3x3 conv, We can split it to 9 of 1x1 conv which can be computed by matmul, then sum them to get final result.
For sparse convolution, we also do split-gemm-sum to calculate conv, but we need to collect data first because it's sparse.
### Native
Fused version of above algorithm. 1.5x-2x faster than non-fused version.
### Implicit Gemm
```Native``` algorithm do minimal mma (matrix multiply add), but it need to serialize IO. The pipeline of ```Native``` is gather-gemm-scatter-gather-gemm-scatter-...
```Implicit Gemm``` fuse all calculation to one kernel and perform overlapped gather-mma-scatter to save a lot of time.
![Image Overlapped Gemm](https://raw.githubusercontent.com/NVIDIA/cutlass/master/media/images/software-pipeline.png)
In my test, ```Implicit Gemm``` is almost 2x faster than ```Native```.
### Implicit Gemm Split Mask
TODO
In my test, ```Implicit Gemm Split Mask``` is slightly faster than ```Implicit Gemm```, but the indice generation is slower, so currently we use ```Implicit Gemm``` by default.
\ No newline at end of file
......@@ -16,6 +16,47 @@
# Usage
## Short API description
```Python
import spconv.pytorch as spconv
from spconv.pytorch import functional as Fsp
from torch import nn
from spconv.pytorch.utils import PointToVoxel
from spconv.pytorch.hash import HashTable
```
| Layer APIs | Common Usage | Dense Version |Note |
|----------------------------------- |:------------------------:|----------------------------:|----------------------------:|
| ```spconv.SparseConv3d``` | Downsample | ```nn.Conv3d``` | Use ```indice_key``` to save data for inverse |
| ```spconv.SubMConv3d``` | Convolution | N/A | Use ```indice_key``` to save data for reuse |
| ```spconv.SparseInverseConv3d``` | Upsample | N/A | Use pre-saved ```indice_key``` to upsample |
| ```spconv.SparseConvTranspose3d``` | Upsample (don't use this)| ```nn.ConvTranspose3d``` | VERY SLOW and CAN'T RECOVER ORIGIN POINT CLOUD |
| ```spconv.SparseMaxPool3d``` | Downsample | ```nn.MaxPool3d``` | Use ```indice_key``` to save data for inverse |
| ```spconv.SparseSequential``` | Container | ```nn.Sequential``` | support layers above and ```nn.ReLU, nn.BatchNorm, ...```|
| Functional APIs | Usage |
|----------------------------------- |:------------------------:|
| ```Fsp.sparse_add``` | Add sparse tensors with same shape and different indices |
| Input APIs | Usage |
|----------------------------------- |:------------------------:|
| ```PointToVoxel``` | point cloud to voxels |
| Misc APIs | Usage |
|----------------------------------- |:------------------------:|
| ```HashTable``` | hash table, one-slot |
| Layer APIs | [torchsparse](https://github.com/mit-han-lab/torchsparse) | [MinkowskiEngine](https://github.com/NVIDIA/MinkowskiEngine) |
|----------------------------------- |:------------------------:|:------------------------:|
| ```spconv.SparseConv3d``` | ```Conv3d(stride!=1, transpose=False)``` |```MinkowskiConvolution(stride!=1)```|
| ```spconv.SubMConv3d``` | ```Conv3d(stride=1, transpose=False)``` | ```MinkowskiConvolution(stride=1)```|
| ```spconv.SparseInverseConv3d``` | ```Conv3d(stride!=1, transpose=True)``` |```MinkowskiConvolutionTranspose```|
| ```spconv.SparseConvTranspose3d``` | N/A |```MinkowskiConvolutionTranspose```|
| ```spconv.SparseMaxPool3d``` | N/A | ```MinkowskiMaxPooling```|
## Concept
* Sparse Conv Tensor: like hybird [torch.sparse_coo_tensor](https://pytorch.org/docs/stable/sparse.html#sparse-coo-docs) but only have two difference: 1. SparseConvTensor only have one dense dim, 2. indice of SparseConvTensor is transposed. see torch doc for more details.
......@@ -102,6 +143,29 @@ class ExampleNet(nn.Module):
return self.net(x)
```
### Sparse Add
In sematic segmentation network, we may use conv1x3, 3x1 and 3x3 in a block, but it's impossible to sum result from these layers because regular add requires same indices.
spconv >= 2.1.17 provide a operation to add sparse tensors with different indices (shape must same), but with limits:
```Python
from spconv.pytorch import functional as Fsp
res_1x3 = conv1x3(x)
res_3x1 = conv3x1(x)
# WRONG
# because we can't "inverse" this operation
wrong_usage_cant_inverse = Fsp.sparse_add(res_1x3, res_3x1)
# CORRECT
# res_3x3 already contains all indices of res_1x3 and res_3x1,
# so output spatial structure isn't changed, we can "inverse" back.
res_3x3 = conv3x3(x)
correct = Fsp.sparse_add(res_1x3, res_3x1, res_3x3)
```
If you use a network without ```SparseInverseConv```, limits above aren't exists, the only drawback of ```sparse_add``` is that it run slower than simple aligned add.
### Fast Mixed Percision Training
see example/mnist_sparse. we support ```torch.cuda.amp```.
......
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from spconv.pytorch.hash import HashTable
def main():
"""Fixed-Size CUDA Hash Table:
this hash table can't delete keys after insert, and can't resize.
You need to pre-define a fixed-length of hash table, recommend 2x size
of your key num.
"""
is_cpus = [True, False]
max_size = 1000
k_dtype = torch.int32
v_dtype = torch.int64
for is_cpu in is_cpus:
if is_cpu:
dev = torch.device("cpu")
table = HashTable(dev, k_dtype, v_dtype)
else:
dev = torch.device("cuda:0")
table = HashTable(dev, k_dtype, v_dtype, max_size=max_size)
keys = torch.tensor([5, 3, 7, 4, 6, 2, 10, 8], dtype=k_dtype, device=dev)
values = torch.tensor([1, 6, 4, 77, 23, 756, 12, 12], dtype=v_dtype, device=dev)
keys_query = torch.tensor([8, 10, 2, 6, 4, 7, 3, 5], dtype=k_dtype, device=dev)
table.insert(keys, values)
vq, _ = table.query(keys_query)
print(vq)
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
table.assign_arange_()
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
print("----------Insert Exist Keys----------")
is_empty = table.insert_exist_keys(keys, values)
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -156,6 +156,9 @@ if disable_jit is not None and disable_jit == "1":
from cumm.conv.main import ConvMainUnitTest
from cumm.constants import CUMM_CPU_ONLY_BUILD
from spconv.csrc.sparse.all import SpconvOps
from spconv.csrc.utils import BoxOps
from spconv.csrc.hash.core import HashTable
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS + SHUFFLE_TURING_PARAMS)
convcu = ConvMainUnitTest(IMPLGEMM_SIMT_PARAMS + IMPLGEMM_VOLTA_PARAMS + IMPLGEMM_TURING_PARAMS)
convcu.namespace = "cumm.conv.main"
......@@ -168,9 +171,9 @@ if disable_jit is not None and disable_jit == "1":
std = "c++14"
else:
std = "c++17"
cus = [cu, convcu, SpconvOps()]
cus = [cu, convcu, SpconvOps(), BoxOps(), HashTable()]
if CUMM_CPU_ONLY_BUILD:
cus = [SpconvOps()]
cus = [SpconvOps(), BoxOps(), HashTable()]
ext_modules: List[Extension] = [
PCCMExtension(cus,
"spconv/core_cc",
......
......@@ -28,6 +28,9 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
from cumm.conv.main import ConvMainUnitTest
from spconv.csrc.sparse.all import SpconvOps
from spconv.csrc.utils import BoxOps
from spconv.csrc.hash.core import HashTable
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS +
SHUFFLE_TURING_PARAMS)
cu.namespace = "cumm.gemm.main"
......@@ -38,7 +41,7 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
if InWindows:
# windows have command line limit, so we use objects_folder to reduce command size.
objects_folder = "objects"
pccm.builder.build_pybind([cu, convcu, SpconvOps()],
pccm.builder.build_pybind([cu, convcu, SpconvOps(), BoxOps(), HashTable()],
PACKAGE_ROOT / "core_cc",
namespace_root=PACKAGE_ROOT,
objects_folder=objects_folder,
......
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class HashTable:
key_itemsize: int
value_itemsize: int
is_cpu: bool
insert_count: int
def __init__(self, is_cpu: bool, key_itemsize: int, value_itemsize: int, keys_data: Tensor, values_data: Tensor, stream: int = 0) -> None:
"""
Args:
is_cpu:
key_itemsize:
value_itemsize:
keys_data:
values_data:
stream:
"""
...
def clear(self, stream: int = 0) -> None:
"""
in this function, if values is empty, it will be assigned to zero.
Args:
stream:
"""
...
def insert(self, keys: Tensor, values: Tensor = Tensor(), stream: int = 0) -> None:
"""
in this function, if values is empty, it will be assigned to zero.
Args:
keys:
values:
stream:
"""
...
def query(self, keys: Tensor, values: Tensor, is_empty: Tensor, stream: int) -> None:
"""
query keys, save to values, and save is_empty to is_empty
Args:
keys:
values:
is_empty:
stream:
"""
...
def assign_arange_(self, count: Tensor, stream: int = 0) -> None:
"""
this function assign "arange(NumItem)" to table values.
useful in "unique-like" operations.
unlike insert/query, this method only support i32/i64/u32/u64 for value.
count must be u32/u64.
Args:
count:
stream:
"""
...
def size_cpu(self) -> int:
"""
this function can only be used to get cpu hash table size.
"""
...
def items(self, keys: Tensor, values: Tensor, count: Tensor, stream: int) -> None:
"""
get items.
Args:
keys:
values:
count:
stream:
"""
...
def insert_exist_keys(self, keys: Tensor, values: Tensor, is_empty: Tensor, stream: int) -> None:
"""
insert v of given k if k exists. won't insert any new key.
Args:
keys:
values:
is_empty:
stream:
"""
...
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class BoxOps:
@staticmethod
def has_boost() -> bool: ...
@staticmethod
def non_max_suppression_cpu(boxes: Tensor, order: Tensor, thresh: float, eps: float = 0) -> List[int]:
"""
Args:
boxes:
order:
thresh:
eps:
"""
...
......@@ -23,3 +23,6 @@ from spconv.core_cc.csrc.sparse.all import SpconvOps
BUILD_CUMM_VERSION = SpconvOps.cumm_version()
BUILD_PCCM_VERSION = SpconvOps.pccm_version()
from spconv.core_cc.csrc.utils.boxops import BoxOps
HAS_BOOST = BoxOps.has_boost()
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This diff is collapsed.
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .boxops import BoxOps
\ No newline at end of file
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pccm
from pathlib import Path
import os
from cumm.common import TensorView, TensorViewCPU, TensorViewKernel, ThrustLib
from spconv.constants import BOOST_ROOT
class BoostGeometryLib(pccm.Class):
def __init__(self):
super().__init__()
assert BOOST_ROOT is not None
self.build_meta.add_includes(BOOST_ROOT)
self.add_include("boost/geometry.hpp")
class BoxOps(pccm.Class):
def __init__(self):
super().__init__()
self.add_dependency(TensorView)
@pccm.pybind.mark
@pccm.static_function
def has_boost(self):
code = pccm.FunctionCode()
code.raw(f"return {pccm.boolean(BOOST_ROOT is not None)};")
return code.ret("bool")
@pccm.pybind.mark(nogil=True)
@pccm.static_function
def non_max_suppression_cpu(self):
code = pccm.FunctionCode()
code.arg("boxes, order", "tv::Tensor")
code.arg("thresh", "float")
code.arg("eps", "float", "0")
code.raw(f"""
auto ndets = boxes.dim(0);
std::vector<int> keep(ndets);
tv::dispatch<float, double>(boxes.dtype(), [&](auto I1){{
using DType = TV_DECLTYPE(I1);
auto boxes_r = boxes.tview<const DType, 2>();
tv::dispatch<int, int64_t, uint32_t, uint64_t>(order.dtype(), [&](auto I2){{
using T2 = TV_DECLTYPE(I2);
auto order_r = order.tview<const T2, 1>();
std::vector<DType> areas;
for (int i = 0; i < ndets; ++i){{
areas[i] = (boxes_r(i, 2) - boxes_r(i, 0) + eps) *
(boxes_r(i, 3) - boxes_r(i, 1) + eps);
}}
std::vector<int> suppressed(ndets, 0);
int i, j;
DType xx1, xx2, w, h, inter, ovr;
for (int _i = 0; _i < ndets; ++_i) {{
i = order_r(_i);
if (suppressed[i] == 1)
continue;
keep.push_back(i);
for (int _j = _i + 1; _j < ndets; ++_j) {{
j = order_r(_j);
if (suppressed[j] == 1)
continue;
xx2 = std::min(boxes_r(i, 2), boxes_r(j, 2));
xx1 = std::max(boxes_r(i, 0), boxes_r(j, 0));
w = xx2 - xx1 + eps;
if (w > 0) {{
xx2 = std::min(boxes_r(i, 3), boxes_r(j, 3));
xx1 = std::max(boxes_r(i, 1), boxes_r(j, 1));
h = xx2 - xx1 + eps;
if (h > 0) {{
inter = w * h;
ovr = inter / (areas[i] + areas[j] - inter);
if (ovr >= thresh)
suppressed[j] = 1;
}}
}}
}}
}}
}});
}});
return keep;
""")
return code.ret("std::vector<int>")
@pccm.pybind.mark(nogil=True)
@pccm.static_function
def rotate_non_max_suppression_cpu(self):
code = pccm.FunctionCode()
code.arg("box_corners, order, standup_iou", "tv::Tensor")
code.arg("thresh", "float")
code.arg("eps", "float", "0")
if BOOST_ROOT is None:
return code.make_invalid()
code.add_dependency(BoostGeometryLib)
code.raw(f"""
auto ndets = box_corners.dim(0);
std::vector<int> keep(ndets);
tv::dispatch<float, double>(box_corners.dtype(), [&](auto I1){{
using DType = TV_DECLTYPE(I1);
auto box_corners_r = box_corners.tview<const DType, 3>();
auto standup_iou_r = standup_iou.tview<const DType, 2>();
tv::dispatch<int, int64_t, uint32_t, uint64_t>(order.dtype(), [&](auto I2){{
using T2 = TV_DECLTYPE(I2);
auto order_r = order.tview<const T2, 1>();
std::vector<int> suppressed(ndets, 0);
int i, j;
namespace bg = boost::geometry;
typedef bg::model::point<DType, 2, bg::cs::cartesian> point_t;
typedef bg::model::polygon<point_t> polygon_t;
polygon_t poly, qpoly;
std::vector<polygon_t> poly_inter, poly_union;
DType inter_area, union_area, overlap;
for (int _i = 0; _i < ndets; ++_i) {{
i = order_r(_i);
if (suppressed[i] == 1)
continue;
keep.push_back(i);
for (int _j = _i + 1; _j < ndets; ++_j) {{
j = order_r(_j);
if (suppressed[j] == 1)
continue;
if (standup_iou_r(i, j) <= 0.0)
continue;
// std::cout << "pre_poly" << std::endl;
bg::append(poly,
point_t(box_corners_r(i, 0, 0), box_corners_r(i, 0, 1)));
bg::append(poly,
point_t(box_corners_r(i, 1, 0), box_corners_r(i, 1, 1)));
bg::append(poly,
point_t(box_corners_r(i, 2, 0), box_corners_r(i, 2, 1)));
bg::append(poly,
point_t(box_corners_r(i, 3, 0), box_corners_r(i, 3, 1)));
bg::append(poly,
point_t(box_corners_r(i, 0, 0), box_corners_r(i, 0, 1)));
bg::append(qpoly,
point_t(box_corners_r(j, 0, 0), box_corners_r(j, 0, 1)));
bg::append(qpoly,
point_t(box_corners_r(j, 1, 0), box_corners_r(j, 1, 1)));
bg::append(qpoly,
point_t(box_corners_r(j, 2, 0), box_corners_r(j, 2, 1)));
bg::append(qpoly,
point_t(box_corners_r(j, 3, 0), box_corners_r(j, 3, 1)));
bg::append(qpoly,
point_t(box_corners_r(j, 0, 0), box_corners_r(j, 0, 1)));
bg::intersection(poly, qpoly, poly_inter);
if (!poly_inter.empty()) {{
inter_area = bg::area(poly_inter.front());
bg::union_(poly, qpoly, poly_union);
if (!poly_union.empty()) {{ // ignore invalid box
union_area = bg::area(poly_union.front());
overlap = inter_area / union_area;
if (overlap >= thresh)
suppressed[j] = 1;
poly_union.clear();
}}
}}
poly.clear();
qpoly.clear();
poly_inter.clear();
}}
}}
}});
}});
return keep;
""")
return code.ret("std::vector<int>")
@pccm.pybind.mark(nogil=True)
@pccm.static_function
def rbbox_iou(self):
code = pccm.FunctionCode()
code.arg("box_corners, qbox_corners, standup_iou, overlaps", "tv::Tensor")
code.arg("standup_thresh", "float")
code.arg("inter_only", "bool")
if BOOST_ROOT is None:
return code.make_invalid()
code.add_dependency(BoostGeometryLib)
code.raw(f"""
auto N = box_corners.dim(0);
auto K = qbox_corners.dim(0);
if (N == 0 || K == 0) {{
return;
}}
tv::dispatch<float, double>(box_corners.dtype(), [&](auto I1){{
using DType = TV_DECLTYPE(I1);
auto box_corners_r = box_corners.tview<const DType, 3>();
auto qbox_corners_r = qbox_corners.tview<const DType, 3>();
auto standup_iou_r = standup_iou.tview<const DType, 2>();
auto overlaps_rw = overlaps.tview<DType, 2>();
namespace bg = boost::geometry;
typedef bg::model::point<DType, 2, bg::cs::cartesian> point_t;
typedef bg::model::polygon<point_t> polygon_t;
polygon_t poly, qpoly;
std::vector<polygon_t> poly_inter, poly_union;
DType inter_area, union_area;
for (int k = 0; k < K; ++k) {{
for (int n = 0; n < N; ++n) {{
if (standup_iou_r(n, k) <= standup_thresh)
continue;
bg::append(poly, point_t(box_corners_r(n, 0, 0), box_corners_r(n, 0, 1)));
bg::append(poly, point_t(box_corners_r(n, 1, 0), box_corners_r(n, 1, 1)));
bg::append(poly, point_t(box_corners_r(n, 2, 0), box_corners_r(n, 2, 1)));
bg::append(poly, point_t(box_corners_r(n, 3, 0), box_corners_r(n, 3, 1)));
bg::append(poly, point_t(box_corners_r(n, 0, 0), box_corners_r(n, 0, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(k, 0, 0), qbox_corners_r(k, 0, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(k, 1, 0), qbox_corners_r(k, 1, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(k, 2, 0), qbox_corners_r(k, 2, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(k, 3, 0), qbox_corners_r(k, 3, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(k, 0, 0), qbox_corners_r(k, 0, 1)));
bg::intersection(poly, qpoly, poly_inter);
if (!poly_inter.empty()) {{
inter_area = bg::area(poly_inter.front());
if (inter_only){{
overlaps_rw(n, k) = inter_area;
}}else{{
bg::union_(poly, qpoly, poly_union);
if (!poly_union.empty()) {{
union_area = bg::area(poly_union.front());
overlaps_rw(n, k) = inter_area / union_area;
}}
poly_union.clear();
}}
}}
poly.clear();
qpoly.clear();
poly_inter.clear();
}}
}}
}});
return;
""")
return code
@pccm.pybind.mark(nogil=True)
@pccm.static_function
def rbbox_iou_aligned(self):
code = pccm.FunctionCode()
code.arg("box_corners, qbox_corners, overlaps", "tv::Tensor")
code.arg("inter_only", "bool")
if BOOST_ROOT is None:
return code.make_invalid()
code.add_dependency(BoostGeometryLib)
code.raw(f"""
auto N = box_corners.dim(0);
auto K = qbox_corners.dim(0);
TV_ASSERT_RT_ERR(N == K, "aligned iou must have same number of box")
if (N == 0 || K == 0) {{
return;
}}
tv::dispatch<float, double>(box_corners.dtype(), [&](auto I1){{
using DType = TV_DECLTYPE(I1);
auto box_corners_r = box_corners.tview<const DType, 3>();
auto qbox_corners_r = qbox_corners.tview<const DType, 3>();
auto overlaps_rw = overlaps.tview<DType, 1>();
namespace bg = boost::geometry;
typedef bg::model::point<DType, 2, bg::cs::cartesian> point_t;
typedef bg::model::polygon<point_t> polygon_t;
polygon_t poly, qpoly;
std::vector<polygon_t> poly_inter, poly_union;
DType inter_area, union_area;
for (int n = 0; n < N; ++n) {{
bg::append(poly, point_t(box_corners_r(n, 0, 0), box_corners_r(n, 0, 1)));
bg::append(poly, point_t(box_corners_r(n, 1, 0), box_corners_r(n, 1, 1)));
bg::append(poly, point_t(box_corners_r(n, 2, 0), box_corners_r(n, 2, 1)));
bg::append(poly, point_t(box_corners_r(n, 3, 0), box_corners_r(n, 3, 1)));
bg::append(poly, point_t(box_corners_r(n, 0, 0), box_corners_r(n, 0, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(n, 0, 0), qbox_corners_r(n, 0, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(n, 1, 0), qbox_corners_r(n, 1, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(n, 2, 0), qbox_corners_r(n, 2, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(n, 3, 0), qbox_corners_r(n, 3, 1)));
bg::append(qpoly,
point_t(qbox_corners_r(n, 0, 0), qbox_corners_r(n, 0, 1)));
bg::intersection(poly, qpoly, poly_inter);
if (!poly_inter.empty()) {{
inter_area = bg::area(poly_inter.front());
if (inter_only){{
overlaps_rw(n) = inter_area;
}}else{{
bg::union_(poly, qpoly, poly_union);
if (!poly_union.empty()) {{
union_area = bg::area(poly_union.front());
overlaps_rw(n) = inter_area / union_area;
}}
poly_union.clear();
}}
}}
poly.clear();
qpoly.clear();
poly_inter.clear();
}}
}});
return;
""")
return code
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import pickle
from pathlib import Path
from spconv.constants import SPCONV_DEBUG_SAVE_PATH
def spconv_save_debug_data(data):
if SPCONV_DEBUG_SAVE_PATH:
try:
save_path = Path(SPCONV_DEBUG_SAVE_PATH)
assert save_path.parent.exists(), "parent of SPCONV_DEBUG_SAVE_PATH must exist"
with save_path.open("wb") as f:
pickle.dump(data, f)
print((f"spconv save debug data to {SPCONV_DEBUG_SAVE_PATH}, "
"you can submit issue with log and this debug data attached."))
except Exception as e:
print((f"spconv try to save debug data to {SPCONV_DEBUG_SAVE_PATH}, "
f"but failed with exception {e}. please check your SPCONV_DEBUG_SAVE_PATH"))
else:
print((f"SPCONV_DEBUG_SAVE_PATH not found, "
"you can specify SPCONV_DEBUG_SAVE_PATH as debug data save path "
"to save debug data which can be attached in a issue."))
......@@ -3,6 +3,7 @@ from pathlib import Path
import numpy as np
import torch
from spconv.pytorch.core import SparseConvTensor
from spconv.pytorch import functional, ops
from spconv.pytorch.conv import (SparseConv1d, SparseConv2d, SparseConv3d,
SparseConv4d, SparseConvTranspose1d,
......@@ -11,7 +12,6 @@ from spconv.pytorch.conv import (SparseConv1d, SparseConv2d, SparseConv3d,
SparseInverseConv2d, SparseInverseConv3d,
SparseInverseConv4d, SubMConv1d, SubMConv2d,
SubMConv3d, SubMConv4d)
from spconv.pytorch.core import SparseConvTensor
from spconv.pytorch.identity import Identity
from spconv.pytorch.modules import (SparseModule, SparseSequential,
assign_name_for_sparse_modules)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment