Commit 076bdb05 authored by yan.yan's avatar yan.yan
Browse files

v2.1.14: add hash table, fix small bug

parent d406d9e2
......@@ -30,14 +30,14 @@ jobs:
- 'spconv/algo.py'
- 'spconv/core.py'
- 'pyproject.toml'
- name: Install CUDA
- name: Install Boost
env:
CUDA_VERSION: ${{ matrix.cuda-version }}
PYTHON_VERSION: ${{ matrix.python-version }}
cuda: ${{ matrix.cuda-version }}
BOOST_VERSION: boost_1_77_0
if: |
(env.CUDA_VERSION != '') && (
(
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
(
(steps.changes.outputs.needbuild == 'true') &&
......@@ -49,7 +49,24 @@ jobs:
$ProgressPreference = 'SilentlyContinue'
Invoke-WebRequest -Uri "https://boostorg.jfrog.io/artifactory/main/release/1.77.0/source/boost_1_77_0.zip" -UseBasicParsing -OutFile $HOME/boost.zip
Expand-Archive $HOME/boost.zip -DestinationPath $HOME/boost
- name: Install CUDA
env:
CUDA_VERSION: ${{ matrix.cuda-version }}
PYTHON_VERSION: ${{ matrix.python-version }}
cuda: ${{ matrix.cuda-version }}
if: |
(env.CUDA_VERSION != '') && (
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
(
(steps.changes.outputs.needbuild == 'true') &&
(env.PYTHON_VERSION == '3.10')
)
)
shell: powershell
run: |
.\tools\install_windows_cuda.ps1
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
......@@ -66,6 +83,7 @@ jobs:
CUDA_VERSION: ${{ matrix.cuda-version }}
PYTHON_VERSION: ${{ matrix.python-version }}
BOOST_VERSION: boost_1_77_0
CUMM_CUDA_VERSION: ${{ matrix.cuda-version }}
if: |
(env.CUDA_VERSION != '') && (
(github.event_name == 'push' && (startsWith(github.ref, 'refs/tags')) ) ||
......@@ -75,7 +93,6 @@ jobs:
)
)
run: |
$Env:CUMM_CUDA_VERSION = "${{ matrix.cuda-version }}"
$Env:CUMM_CUDA_ARCH_LIST = "all"
$Env:SPCONV_DISABLE_JIT = "1"
pip install pccm pybind11
......
# Changelog
## [2.1.14] - 2021-11-28
### Added
- Add hash table
- update cumm version
- Add AddTableMisaligned for sptensors with same shape but different indices.
### Fixed
- Fix a bug already fixed in 2.1.10 but introduced in 2.1.12 again.
## [2.1.13] - 2021-?-?
### Added
- Add some ops from spconv 1.x, see spconv.utils for more details.
......
......@@ -171,7 +171,9 @@ You need to rebuild ```cumm``` first if you are build along a CUDA version that
5. run ```pip install pccm cumm wheel```
6. run ```python setup.py bdist_wheel```+```pip install dists/xxx.whl```
## Know issues
* Spconv 2.x F16 runs slow in A100.
## Note
......
......@@ -27,15 +27,17 @@ Network Code: test/benchmark.py
| F16 Forward | Native| Implicit Gemm | Implicit Gemm Split Mask |
| -------------- |:---------------------:|---------------------:| ---------------------:|
| RTX 3080 Laptop 150W | 13.7ms | 11.2ms | 12.2ms |
| RTX 3080 Laptop 150W@1755MHz | 13.7ms | 11.2ms | 12.2ms |
| RTX A6000 | 19.1ms | 11.7ms | 14.0ms |
| TESLA V100 | 17.9ms | 11.4ms | 13.4ms |
| A100 | 23.8ms | 12.4ms | 15.1ms |
| F16 Backward | Native| Implicit Gemm | Implicit Gemm Split Mask |
| -------------- |:---------------------:|---------------------:| ---------------------:|
| RTX 3080 Laptop 150W | 25.2ms | 13.8ms | 12.2ms |
| RTX 3080 Laptop 150W@1755MHz | 25.2ms | 13.8ms | 12.2ms |
| RTX A6000 | 28.1ms | 9.2ms | 8.9ms |
| TESLA V100 | 33.9ms | 12.2ms | 12.9ms |
| A100 | 37.6ms | 12.2ms | 13.9ms |
### Network Gemm Kernel Benchmark FP16 in RTX 3080 Laptop GPU
......
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import torch
from spconv.pytorch.hash import HashTable
def main():
"""Fixed-Size CUDA Hash Table:
this hash table can't delete keys after insert, and can't resize.
You need to pre-define a fixed-length of hash table, recommend 2x size
of your key num.
"""
is_cpus = [True, False]
max_size = 1000
k_dtype = torch.int32
v_dtype = torch.int64
for is_cpu in is_cpus:
if is_cpu:
dev = torch.device("cpu")
table = HashTable(dev, k_dtype, v_dtype)
else:
dev = torch.device("cuda:0")
table = HashTable(dev, k_dtype, v_dtype, max_size=max_size)
keys = torch.tensor([5, 3, 7, 4, 6, 2, 10, 8], dtype=k_dtype, device=dev)
values = torch.tensor([1, 6, 4, 77, 23, 756, 12, 12], dtype=v_dtype, device=dev)
keys_query = torch.tensor([8, 10, 2, 6, 4, 7, 3, 5], dtype=k_dtype, device=dev)
table.insert(keys, values)
vq, _ = table.query(keys_query)
print(vq)
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
table.assign_arange_()
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -29,6 +29,7 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
from spconv.csrc.sparse.all import SpconvOps
from spconv.csrc.utils import BoxOps
from spconv.csrc.hash.core import HashTable
cu = GemmMainUnitTest(SHUFFLE_SIMT_PARAMS + SHUFFLE_VOLTA_PARAMS +
SHUFFLE_TURING_PARAMS)
......@@ -40,7 +41,7 @@ if project_is_installed(PACKAGE_NAME) and project_is_editable(
if InWindows:
# windows have command line limit, so we use objects_folder to reduce command size.
objects_folder = "objects"
pccm.builder.build_pybind([cu, convcu, SpconvOps(), BoxOps()],
pccm.builder.build_pybind([cu, convcu, SpconvOps(), BoxOps(), HashTable()],
PACKAGE_ROOT / "core_cc",
namespace_root=PACKAGE_ROOT,
objects_folder=objects_folder,
......
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import overload, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union
from pccm.stubs import EnumValue, EnumClassValue
from cumm.tensorview import Tensor
class HashTable:
key_itemsize: int
value_itemsize: int
is_cpu: bool
insert_count: int
def __init__(self, is_cpu: bool, key_itemsize: int, value_itemsize: int, keys_data: Tensor, values_data: Tensor, stream: int = 0) -> None:
"""
Args:
is_cpu:
key_itemsize:
value_itemsize:
keys_data:
values_data:
stream:
"""
...
def clear(self, stream: int = 0) -> None:
"""
in this function, if values is empty, it will be assigned to zero.
Args:
stream:
"""
...
def insert(self, keys: Tensor, values: Tensor = Tensor(), stream: int = 0) -> None:
"""
in this function, if values is empty, it will be assigned to zero.
Args:
keys:
values:
stream:
"""
...
def query(self, keys: Tensor, values: Tensor, is_empty: Tensor, stream: int) -> None:
"""
query keys, save to values, and save is_empty to is_empty
Args:
keys:
values:
is_empty:
stream:
"""
...
def assign_arange_(self, count: Tensor, stream: int = 0) -> None:
"""
this function assign "arange(NumItem)" to table values.
useful in "unique-like" operations.
unlike insert/query, this method only support i32/i64/u32/u64 for value.
count must be u32/u64.
Args:
count:
stream:
"""
...
def size_cpu(self) -> int:
"""
this function can only be used to get cpu hash table size.
"""
...
def items(self, keys: Tensor, values: Tensor, count: Tensor, stream: int) -> None:
"""
get items.
Args:
keys:
values:
count:
stream:
"""
...
......@@ -14,36 +14,3 @@ class BoxOps:
eps:
"""
...
@staticmethod
def rotate_non_max_suppression_cpu(box_corners: Tensor, order: Tensor, standup_iou: Tensor, thresh: float, eps: float = 0) -> List[int]:
"""
Args:
box_corners:
order:
standup_iou:
thresh:
eps:
"""
...
@staticmethod
def rbbox_iou(box_corners: Tensor, qbox_corners: Tensor, standup_iou: Tensor, overlaps: Tensor, standup_thresh: float, inter_only: bool) -> None:
"""
Args:
box_corners:
qbox_corners:
standup_iou:
overlaps:
standup_thresh:
inter_only:
"""
...
@staticmethod
def rbbox_iou_aligned(box_corners: Tensor, qbox_corners: Tensor, overlaps: Tensor, inter_only: bool) -> None:
"""
Args:
box_corners:
qbox_corners:
overlaps:
inter_only:
"""
...
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
This diff is collapsed.
......@@ -3,6 +3,7 @@ from pathlib import Path
import numpy as np
import torch
from spconv.pytorch.core import SparseConvTensor
from spconv.pytorch import functional, ops
from spconv.pytorch.conv import (SparseConv1d, SparseConv2d, SparseConv3d,
SparseConv4d, SparseConvTranspose1d,
......@@ -11,7 +12,6 @@ from spconv.pytorch.conv import (SparseConv1d, SparseConv2d, SparseConv3d,
SparseInverseConv2d, SparseInverseConv3d,
SparseInverseConv4d, SubMConv1d, SubMConv2d,
SubMConv3d, SubMConv4d)
from spconv.pytorch.core import SparseConvTensor
from spconv.pytorch.identity import Identity
from spconv.pytorch.modules import (SparseModule, SparseSequential,
assign_name_for_sparse_modules)
......
......@@ -18,7 +18,6 @@ import numpy as np
import torch
from spconv.core import ConvAlgo
from spconv.pytorch.constants import PYTORCH_VERSION
from spconv.pytorch.ops import ThrustSortAllocator
from spconv.tools import CUDAKernelTimer
if PYTORCH_VERSION >= [1, 8, 0]:
......@@ -39,6 +38,24 @@ else:
pass
class ThrustSortAllocator:
def __init__(self, device: torch.device) -> None:
super().__init__()
self.alloced_objs = {}
self.device = device
def alloc(self, n: int):
if n in self.alloced_objs:
return self.alloced_objs[n].data_ptr()
for n_cur, ten in self.alloced_objs.items():
if n < n_cur:
return ten.data_ptr()
ten = torch.empty([n], dtype=torch.uint8, device=self.device)
self.alloced_objs[n] = ten
return ten.data_ptr()
class IndiceData(object):
def __init__(self, out_indices, indices, indice_pairs, indice_pair_num,
spatial_shape, out_spatial_shape, is_subm: bool, algo: ConvAlgo):
......
......@@ -46,6 +46,9 @@ def torch_tensor_to_tv(ten: torch.Tensor,
dtype = _TORCH_DTYPE_TO_TV[ten.dtype]
return tv.from_blob(ptr, shape, dtype, tv_device)
def torch_tensors_to_tv(*tens: torch.Tensor):
return (torch_tensor_to_tv(t) for t in tens)
def get_current_stream():
return torch.cuda.current_stream().cuda_stream
......
......@@ -20,15 +20,18 @@ from torch import nn
from torch.autograd import Function
from typing import Optional, TypeVar
from spconv.tools import CUDAKernelTimer
from spconv.pytorch import ops
from spconv.pytorch import ops, SparseConvTensor
from spconv.pytorch.constants import PYTORCH_VERSION
from spconv.debug_utils import spconv_save_debug_data
from torch.autograd.function import once_differentiable
import numpy as np
from pathlib import Path
from spconv.pytorch.hash import HashTable
from cumm.gemm.layout import to_stride
from typing import List
_MAX_INT32 = 2147483647
_T = TypeVar("_T")
def identity_decorator(func: _T) -> _T:
......@@ -357,3 +360,69 @@ indice_inverse_conv = SparseInverseConvFunction.apply
indice_subm_conv = SubMConvFunction.apply
indice_maxpool = SparseMaxPoolFunction.apply
indice_maxpool_implicit_gemm = SparseMaxPoolImplicitGemmFunction.apply
def _indice_to_scalar(indices: torch.Tensor, shape: List[int]):
assert indices.shape[1] == len(shape)
stride = to_stride(np.array(shape, dtype=np.int64))
scalar_inds = indices[:, -1]
for i in range(len(shape) - 1):
scalar_inds += stride[i] * indices[:, i]
return scalar_inds.contiguous()
def sparse_add_hash_based(*tens: SparseConvTensor):
table_size = 0
for ten in tens:
assert ten.spatial_shape == tens[0].spatial_shape
assert ten.batch_size == tens[0].batch_size
assert ten.features.shape[1] == tens[0].features.shape[1]
table_size += ten.features.shape[0]
first = tens[0]
feat = first.features
shape = [first.batch_size, *first.spatial_shape]
whole_shape = int(np.prod(shape))
table_size *= 2
k_type = torch.int32
if whole_shape >= _MAX_INT32:
k_type = torch.int64
table = HashTable(first.features.device, k_type, torch.int32, table_size)
scalars: List[torch.Tensor] = []
for ten in tens:
indices = ten.indices
if whole_shape >= _MAX_INT32:
indices = indices.long()
scalar = _indice_to_scalar(indices, shape)
scalars.append(scalar)
table.insert(scalar)
# assign arange to values of hash table
count = table.assign_arange_()
count_val = count.item()
out_features = torch.zeros([int(count_val), feat.shape[1]], dtype=feat.dtype, device=feat.device)
out_indices = torch.zeros([int(count_val), first.indices.shape[1]], dtype=first.indices.dtype, device=first.indices.device)
for ten, scalar in zip(tens, scalars):
out_inds, _ = table.query(scalar)
out_inds = out_inds.long()
out_features[out_inds] += ten.features
out_indices[out_inds] = ten.indices
res = SparseConvTensor(out_features, out_indices, first.spatial_shape, first.batch_size,
benchmark=first.benchmark)
res.benchmark_record = first.benchmark_record
res._timer = first._timer
res.thrust_allocator = first.thrust_allocator
return res
def sparse_add(a: SparseConvTensor, b: SparseConvTensor):
assert a.spatial_shape == b.spatial_shape
assert a.batch_size == b.batch_size
assert a.features.shape[1] == a.features.shape[1]
res_shape = [a.batch_size, *a.spatial_shape, a.features.shape[1]]
a_th = torch.sparse_coo_tensor(a.indices.T, a.features, res_shape, requires_grad=True)
b_th = torch.sparse_coo_tensor(b.indices.T, b.features, res_shape, requires_grad=True)
c_th = (a_th + b_th).coalesce()
c_th_inds = c_th.indices().T.contiguous().int()
c_th_values = c_th.values()
assert c_th_values.is_contiguous()
return SparseConvTensor(c_th_values, c_th_inds, a.spatial_shape, a.batch_size)
# Copyright 2021 Yan Yan
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from typing import Optional
import torch
from cumm import tensorview as tv
from spconv.pytorch.cppcore import torch_tensor_to_tv, get_current_stream
from spconv.core_cc.csrc.hash.core import HashTable as _HashTable
_TORCH_DTYPE_TO_ITEMSIZE = {
torch.int32: 4,
torch.int64: 8,
torch.float32: 4,
torch.float64: 8,
}
class HashTable:
"""simple hash table for 32 and 64 bit data. support both cpu and cuda.
for cuda, it's a fixed-size table, you must provide maximum size
(recommend 2 * num).
see spconv/pytorch/functional/sparse_add_hash_based, a real example
that show how to use hash table to implement
sparse add (same shape, different indices)
"""
def __init__(self, device: torch.device, key_dtype: torch.dtype,
value_dtype: torch.dtype,
max_size: int = -1) -> None:
is_cpu = device.type == "cpu"
self.is_cpu = is_cpu
self.key_dtype = key_dtype
self.value_dtype = value_dtype
key_data_tv = tv.Tensor()
value_data_tv = tv.Tensor()
if is_cpu:
self.keys_data = None
self.values_data = None
else:
assert max_size > 0, "you must provide max_size for fixed-size cuda hash table, usually *2 of num of keys"
assert device is not None, "you must specify device for cuda hash table."
self.keys_data = torch.empty([max_size], dtype=key_dtype, device=device)
self.values_data = torch.empty([max_size], dtype=value_dtype, device=device)
key_data_tv = torch_tensor_to_tv(self.keys_data)
value_data_tv = torch_tensor_to_tv(self.values_data)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
self.key_itemsize = _TORCH_DTYPE_TO_ITEMSIZE[self.key_dtype]
self.value_itemsize = _TORCH_DTYPE_TO_ITEMSIZE[self.value_dtype]
self._valid_value_dtype_for_arange = set([torch.int32, torch.int64])
self._table = _HashTable(is_cpu, self.key_itemsize, self.value_itemsize, key_data_tv, value_data_tv, stream)
def insert(self, keys: torch.Tensor, values: Optional[torch.Tensor] = None):
"""insert hash table by keys and values
if values is None, only key is inserted, the value is undefined.
"""
keys_tv = torch_tensor_to_tv(keys)
values_tv = tv.Tensor()
if values is not None:
values_tv = torch_tensor_to_tv(values)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
return self._table.insert(keys_tv, values_tv, stream)
def query(self, keys: torch.Tensor, values: Optional[torch.Tensor] = None):
keys_tv = torch_tensor_to_tv(keys)
if values is None:
values = torch.empty([keys.shape[0]], dtype=self.value_dtype, device=keys.device)
values_tv = torch_tensor_to_tv(values)
stream = 0
if not self.is_cpu:
stream = get_current_stream()
is_empty = torch.empty([keys.shape[0]], dtype=torch.uint8, device=keys.device)
is_empty_tv = torch_tensor_to_tv(is_empty)
self._table.query(keys_tv, values_tv, is_empty_tv, stream)
return values, is_empty
def assign_arange_(self):
count_tv = tv.Tensor()
count = torch.Tensor()
stream = 0
if not self.is_cpu:
stream = get_current_stream()
else:
assert self.value_dtype in self._valid_value_dtype_for_arange
if not self.is_cpu:
assert self.values_data is not None
if self.key_itemsize == 4:
count = torch.zeros([1], dtype=torch.int32, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint32)
elif self.key_itemsize == 8:
count = torch.zeros([1], dtype=torch.int64, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint64)
else:
raise NotImplementedError
else:
max_size = self._table.size_cpu()
count = torch.tensor([max_size], dtype=torch.int64)
self._table.assign_arange_(count_tv, stream)
return count
def items(self, max_size: int = -1):
count_tv = tv.Tensor()
count = torch.Tensor()
stream = 0
if not self.is_cpu:
stream = get_current_stream()
if not self.is_cpu:
assert self.values_data is not None
if self.key_itemsize == 4:
count = torch.zeros([1], dtype=torch.int32, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint32)
elif self.key_itemsize == 8:
count = torch.zeros([1], dtype=torch.int64, device=self.values_data.device)
count_tv = torch_tensor_to_tv(count, dtype=tv.uint64)
else:
raise NotImplementedError
if not self.is_cpu:
assert self.values_data is not None
if max_size == -1:
max_size = self.values_data.shape[0]
keys = torch.empty([max_size], dtype=self.key_dtype, device=self.values_data.device)
values = torch.empty([max_size], dtype=self.value_dtype, device=self.values_data.device)
else:
max_size = self._table.size_cpu()
count = torch.tensor([max_size], dtype=torch.int64)
keys = torch.empty([max_size], dtype=self.key_dtype)
values = torch.empty([max_size], dtype=self.value_dtype)
keys_tv = torch_tensor_to_tv(keys)
values_tv = torch_tensor_to_tv(values)
self._table.items(keys_tv, values_tv, count_tv, stream)
return keys, values, count
def main():
is_cpus = [True, False]
max_size = 1000
k_dtype = torch.int32
v_dtype = torch.int64
for is_cpu in is_cpus:
if is_cpu:
dev = torch.device("cpu")
table = HashTable(dev, k_dtype, v_dtype)
else:
dev = torch.device("cuda:0")
table = HashTable(dev, k_dtype, v_dtype, max_size=max_size)
keys = torch.tensor([5, 3, 7, 4, 6, 2, 10, 8], dtype=k_dtype, device=dev)
values = torch.tensor([1, 6, 4, 77, 23, 756, 12, 12], dtype=v_dtype, device=dev)
keys_query = torch.tensor([8, 10, 2, 6, 4, 7, 3, 5], dtype=k_dtype, device=dev)
table.insert(keys, values)
vq, _ = table.query(keys_query)
print(vq)
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
table.assign_arange_()
ks, vs, cnt = table.items()
cnt_item = cnt.item()
print(cnt, ks[:cnt_item], vs[:cnt_item])
if __name__ == "__main__":
main()
\ No newline at end of file
......@@ -22,6 +22,7 @@ import numpy as np
import spconv
from spconv.core import AlgoHint, ConvAlgo
from typing import List, Optional, Union
from spconv.pytorch.core import ThrustSortAllocator
from spconv.pytorch.cppcore import torch_tensor_to_tv, get_current_stream
from spconv.core_cc.csrc.sparse.all import SpconvOps
import spconv.core_cc as _ext
......@@ -43,24 +44,6 @@ from spconv.tools import CUDAKernelTimer
DEBUG = False
class ThrustSortAllocator:
def __init__(self, device: torch.device) -> None:
super().__init__()
self.alloced_objs = {}
self.device = device
def alloc(self, n: int):
if n in self.alloced_objs:
return self.alloced_objs[n].data_ptr()
for n_cur, ten in self.alloced_objs.items():
if n < n_cur:
return ten.data_ptr()
ten = torch.empty([n], dtype=torch.uint8, device=self.device)
self.alloced_objs[n] = ten
return ten.data_ptr()
def get_conv_output_size(input_size, kernel_size, stride, padding, dilation):
ndim = len(input_size)
output_size = []
......@@ -1482,3 +1465,4 @@ def indice_maxpool_implicit_gemm_backward(features, out_features, out_bp,
out_bp_tv, din_tv,
indice_pairs_tv, stream)
return din
......@@ -19,37 +19,68 @@ from torch.autograd import Function
from spconv.pytorch.modules import SparseModule
from spconv.pytorch.core import SparseConvTensor
from typing import List
from spconv.pytorch import functional as F
class JoinTable(SparseModule): # Module):
class JoinTable(SparseModule):
def forward(self, input: List[SparseConvTensor]):
msg = "you can't use JoinTable in two sptensor with different indices."
for ten in input:
assert ten.spatial_shape == input[0].spatial_shape, msg
assert ten.batch_size == input[0].batch_size, msg
assert ten.features.shape[1] == input[0].features.shape[1], msg
assert ten.indices.shape[0] == input[0].indices.shape[0], msg
output = SparseConvTensor(torch.cat([i.features for i in input], 1),
input[0].indices, input[0].spatial_shape,
input[0].batch_size, input[0].grid,
input[0].voxel_num, input[0].indice_dict)
output.benchmark_record = input[1].benchmark_record
output.thrust_allocator = input[1].thrust_allocator
output._timer = input[1]._timer
return output
def input_spatial_size(self, out_size):
return out_size
class AddTable(SparseModule): # Module):
class AddTable(SparseModule):
def forward(self, input: List[SparseConvTensor]):
msg = "you can't use AddTable in two sptensor with different indices. use AddTableMisaligned instead."
for ten in input:
assert ten.spatial_shape == input[0].spatial_shape, msg
assert ten.batch_size == input[0].batch_size, msg
assert ten.features.shape[1] == input[0].features.shape[1], msg
assert ten.indices.shape[0] == input[0].indices.shape[0], msg
output = SparseConvTensor(sum([i.features for i in input]),
input[0].indices, input[0].spatial_shape,
input[0].batch_size, input[0].grid,
input[0].voxel_num, input[0].indice_dict)
output.benchmark_record = input[1].benchmark_record
output.thrust_allocator = input[1].thrust_allocator
output._timer = input[1]._timer
return output
def input_spatial_size(self, out_size):
return out_size
class AddTableMisaligned(SparseModule):
"""add sptensors with same shape but different indices.
slower than AddTable.
WARNING: you shouldn't use this in segmentation network such as U-Net
because add misaligned tensors will clear downsample indices and make
SparseInverseConvXd not working.
"""
def forward(self, input: List[SparseConvTensor]):
return F.sparse_add_hash_based(*input)
def input_spatial_size(self, out_size):
return out_size
class ConcatTable(SparseModule): # Module):
class ConcatTable(SparseModule):
def forward(self, input):
return [module(input) for module in self._modules.values()]
......
......@@ -140,7 +140,6 @@ class PointToVoxel(object):
num_voxels = res[0].shape[0]
else:
pc_tv = torch_tensor_to_tv(pc)
stream = get_current_stream()
voxels_tv = torch_tensor_to_tv(self.voxels)
indices_tv = torch_tensor_to_tv(self.indices)
num_per_voxel_tv = torch_tensor_to_tv(self.num_per_voxel)
......
2.1.13
2.1.14
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment