Commit 23d9faaf authored by yanyan's avatar yanyan
Browse files

working on cutlass

parent 853302aa
...@@ -43,6 +43,7 @@ add_subdirectory(third_party/pybind11) ...@@ -43,6 +43,7 @@ add_subdirectory(third_party/pybind11)
set(ALL_LIBS ${TORCH_LIBRARIES}) set(ALL_LIBS ${TORCH_LIBRARIES})
set(ALL_INCLUDE ${PROJECT_SOURCE_DIR}/include) set(ALL_INCLUDE ${PROJECT_SOURCE_DIR}/include)
set(MP11_INCLUDE ${PROJECT_SOURCE_DIR}/third_party/mp11/include)
if (SPCONV_BuildCUDA) if (SPCONV_BuildCUDA)
set(ALL_LIBS ${ALL_LIBS} ${CUDA_CUDART} ${CUDA_CUBLAS}) set(ALL_LIBS ${ALL_LIBS} ${CUDA_CUDART} ${CUDA_CUBLAS})
......
global:
console_url: localhost:50091
envs:
PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION: python # c++ libprotobuf and python will conflicit
analyzers: # only one analyzer is allowed for one type for now.
PythonAnalyzer:
SimpleCPPAnalyzer: # $<astex> devops.devs = ["_ci_dev_xxx"] </astex> is allowed in raw sources.
includes: ["*.cpp", "*.cu", "*.cc", "*.h", "*.hpp", "*.hxx", "*.cxx"]
observers:
# run test functions when that function change or marked function change.
test:
type: TestObserver
# run dev functions when that function change or marked function change.
dev:
type: DevObserver
pattern: _ci_dev_.*
clangdev:
type: CPPDevObserver
main_pattern: dev_.*\.(cc|cpp|cxx)
pattern: .*\.(cc|cpp|cxx|h|hpp|hxx)
compiler: clang++
executable: build/codeai_dev
includes: [
include,
/usr/local/cuda/include,
/home/yy/anaconda3/include,
/home/yy/anaconda3/include/python3.7m,
third_party/pybind11/include,
third_party/include,
/home/yy/library/boost_1_72_0,
]
libpaths: [
/home/yy/anaconda3/lib,
]
libraries: [-lnvinfer, -lpython3.7m, -lcublas, -lcudart, -ljpeg]
std: c++2a
options: [-Wall, -Wextra]
cudadev:
type: CPPDevObserver
main_pattern: dev_.*\.cu
pattern: .*\.(cc|cpp|cxx|h|hpp|hxx|cu)
compiler: nvcc
executable: build/codeai_dev_cuda
run_cmd: [$(executable)]
sources: []
includes: [
include,
/usr/local/cuda/include,
/home/yy/anaconda3/include,
/home/yy/anaconda3/include/python3.7m,
third_party/pybind11/include,
third_party/cutlass/include,
]
libpaths: [
/usr/local/cuda/lib64,
/home/yy/anaconda3/lib,
]
libraries: [-lpython3.7m, -lcudart, -lcublas, -ljpeg]
std: c++14
options: [
-Wno-deprecated-declarations,
"-gencode=arch=compute_52,code=sm_61",
"-gencode=arch=compute_61,code=sm_61",
"-gencode=arch=compute_60,code=sm_60",
"-gencode=arch=compute_70,code=sm_70",
"-gencode=arch=compute_75,code=sm_75",
]
torchdev:
type: CPPDevObserver
main_pattern: torchdev_.*\.(cu|cpp|cc|cxx)
pattern: .*\.(cc|cpp|cxx|h|hpp|hxx|cu)
compiler: clang++
executable: build/codeai_dev_torch
run_cmd: [$(executable)]
fail_cmds: # run cmd when pervious run fail with retcode
-6: [gdb, -ex, run, -ex, bt, -ex, quit, $(executable)] # segfault in unix
includes: [
include,
/home/yy/anaconda3/lib/python3.7/site-packages/torch/include,
/home/yy/anaconda3/lib/python3.7/site-packages/torch/include/torch/csrc/api/include,
/usr/local/cuda/include,
/home/yy/anaconda3/include,
/home/yy/anaconda3/include/python3.7m,
third_party/pybind11/include,
third_party/cutlass/include,
]
libpaths: [
/home/yy/anaconda3/lib/python3.7/site-packages/torch/lib,
/usr/local/cuda/lib64,
/home/yy/anaconda3/lib,
]
libraries: [-lpython3.7m, -lcublas, -lcudart, -ljpeg, -lpthread,
"-Wl,--no-as-needed,-lc10",
"-Wl,--no-as-needed,-ltorch",
"-Wl,--no-as-needed,-ltorch_cpu",
"-Wl,--no-as-needed,-lc10_cuda",
"-Wl,--no-as-needed,-ltorch_cuda"]
std: c++2a
options: [--cuda-gpu-arch=sm_61, -Wno-deprecated-declarations, -D_GLIBCXX_USE_CXX11_ABI=0]
cutlass @ e33d90b3
Subproject commit e33d90b36109f67915a80c532ebbb978b72c7bd2
...@@ -9,6 +9,10 @@ template <class... T> struct mp_list {}; ...@@ -9,6 +9,10 @@ template <class... T> struct mp_list {};
template <class T, T... I> template <class T, T... I>
using mp_list_c = mp_list<std::integral_constant<T, I>...>; using mp_list_c = mp_list<std::integral_constant<T, I>...>;
template <int... I>
using mp_list_int_c = mp_list<std::integral_constant<int, I>...>;
namespace detail { namespace detail {
template <class... Ts, class F> template <class... Ts, class F>
......
...@@ -418,7 +418,8 @@ bool dispatch_container_noexcept(Iterator begin, Iterator end, F &&f) { ...@@ -418,7 +418,8 @@ bool dispatch_container_noexcept(Iterator begin, Iterator end, F &&f) {
return; return;
} }
if (count >= val_lst_size) { if (count >= val_lst_size) {
TV_THROW_INVALID_ARG("iterator length invalid:", val_lst_size); equal = false;
return;
} }
constexpr auto c = decltype(E)::value; constexpr auto c = decltype(E)::value;
if (c != *iter) { if (c != *iter) {
......
...@@ -54,7 +54,7 @@ class CMakeBuild(build_ext): ...@@ -54,7 +54,7 @@ class CMakeBuild(build_ext):
'-DCMAKE_PREFIX_PATH={}'.format(LIBTORCH_ROOT), '-DCMAKE_PREFIX_PATH={}'.format(LIBTORCH_ROOT),
'-DPYBIND11_PYTHON_VERSION={}'.format(PYTHON_VERSION), '-DPYBIND11_PYTHON_VERSION={}'.format(PYTHON_VERSION),
'-DSPCONV_BuildTests=OFF', '-DSPCONV_BuildTests=OFF',
'-DPYTORCH_VERSION={}'.format(PYTORCH_VERSION_NUMBER) '-DPYTORCH_VERSION={}'.format(PYTORCH_VERSION_NUMBER),
] # -arch=sm_61 ] # -arch=sm_61
if not torch.cuda.is_available() and SPCONV_FORCE_BUILD_CUDA is None: if not torch.cuda.is_available() and SPCONV_FORCE_BUILD_CUDA is None:
cmake_args += ['-DSPCONV_BuildCUDA=OFF'] cmake_args += ['-DSPCONV_BuildCUDA=OFF']
......
...@@ -10,7 +10,7 @@ if(OpenMP_CXX_FOUND) ...@@ -10,7 +10,7 @@ if(OpenMP_CXX_FOUND)
endif() endif()
target_include_directories(spconv PRIVATE ${ALL_INCLUDE} ) target_include_directories(spconv PRIVATE ${ALL_INCLUDE} ${MP11_INCLUDE} )
set_property(TARGET spconv PROPERTY CUDA_STANDARD 14) set_property(TARGET spconv PROPERTY CUDA_STANDARD 14)
set_property(TARGET spconv PROPERTY CXX_STANDARD 14) set_property(TARGET spconv PROPERTY CXX_STANDARD 14)
set_target_properties(spconv PROPERTIES CUDA_SEPARABLE_COMPILATION ON) set_target_properties(spconv PROPERTIES CUDA_SEPARABLE_COMPILATION ON)
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
// limitations under the License. // limitations under the License.
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <boost/mp11.hpp>
#include <chrono> #include <chrono>
#include <cuhash/hash_table.h> #include <cuhash/hash_table.h>
#include <limits> #include <limits>
...@@ -79,7 +80,7 @@ int create_conv_indice_pair_p1_cuda( ...@@ -79,7 +80,7 @@ int create_conv_indice_pair_p1_cuda(
pa, di, ou); pa, di, ou);
TV_CHECK_CUDA_ERR_V2("prepareIndicePairsKernel failed"); TV_CHECK_CUDA_ERR_V2("prepareIndicePairsKernel failed");
} }
// tv::ssprint("prepareIndicePairsKernel", timer.report() / 1000.0); // tv::ssprint("prepareIndicePairsKernel", timer.report() / 1000.0);
#ifdef TV_LOG_KERNEL_INFO #ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes attr; cudaFuncAttributes attr;
checkCudaErrors(cudaFuncGetAttributes( checkCudaErrors(cudaFuncGetAttributes(
...@@ -226,6 +227,18 @@ int create_submconv_indice_pair_cuda( ...@@ -226,6 +227,18 @@ int create_submconv_indice_pair_cuda(
spatialVolume *= outSpatialShape[i]; spatialVolume *= outSpatialShape[i];
} }
auto dispatcher = tv::DispatchIntNoexcept<tv::mp_list_c<int, 1, 3, 5>>(); auto dispatcher = tv::DispatchIntNoexcept<tv::mp_list_c<int, 1, 3, 5>>();
namespace mp11 = boost::mp11;
using kernel2_candidates_t =
mp11::mp_product<tv::mp_list, tv::mp_list_int_c<1, 3, 5>,
tv::mp_list_int_c<1, 3, 5>>;
using kernel3_candidates_t =
mp11::mp_product<tv::mp_list, tv::mp_list_int_c<1, 3, 5>,
tv::mp_list_int_c<1, 3, 5>,
tv::mp_list_int_c<1, 3, 5>>;
using kernel3_candidates_final_t = mp11::mp_push_back<kernel3_candidates_t>;
auto dispatcher2 = tv::DispatchContainerNoexcept<kernel2_candidates_t>();
auto dispatcher3 = tv::DispatchContainerNoexcept<kernel3_candidates_final_t>();
if (useHash) { if (useHash) {
auto table = cuhash::HashTable(); auto table = cuhash::HashTable();
...@@ -263,43 +276,33 @@ int create_submconv_indice_pair_cuda( ...@@ -263,43 +276,33 @@ int create_submconv_indice_pair_cuda(
if (NDim == 2) { if (NDim == 2) {
tv::SimpleVector<Index, 2> ou_(outSpatialShape.begin(), tv::SimpleVector<Index, 2> ou_(outSpatialShape.begin(),
outSpatialShape.end()); outSpatialShape.end());
dispatcher2(kernelSize.begin(), kernelSize.end(), [&](auto K) {
dispatcher(kernelSize[0], [&](auto K0C) { constexpr int K0 = mp11::mp_at_c<decltype(K), 0>::value;
dispatcher(kernelSize[1], [&](auto K1C) { constexpr int K1 = mp11::mp_at_c<decltype(K), 1>::value;
constexpr int K0 = decltype(K0C)::value; found = true;
constexpr int K1 = decltype(K1C)::value; getSubMIndicePairsHashUnrollKernel2<Index, K0, K1>
found = true; <<<tv::cuda::getBlocks(numActIn), tv::cuda::CUDA_NUM_THREADS,
getSubMIndicePairsHashUnrollKernel2<Index, K0, K1> 0, stream>>>(tv::torch2tv<Index>(indicesIn),
<<<tv::cuda::getBlocks(numActIn), tv::torch2tv<Index>(indicePairs),
tv::cuda::CUDA_NUM_THREADS, 0, stream>>>( tv::torch2tv<Index>(indiceNum), ou_,
tv::torch2tv<Index>(indicesIn), spatialVolume, tableSize, tableData,
tv::torch2tv<Index>(indicePairs), constants, stash_constants, stash_count);
tv::torch2tv<Index>(indiceNum), ou_, spatialVolume,
tableSize, tableData, constants, stash_constants,
stash_count);
});
}); });
} else if (NDim == 3) { } else if (NDim == 3) {
tv::SimpleVector<Index, 3> ou_(outSpatialShape.begin(), tv::SimpleVector<Index, 3> ou_(outSpatialShape.begin(),
outSpatialShape.end()); outSpatialShape.end());
dispatcher3(kernelSize.begin(), kernelSize.end(), [&](auto K) {
dispatcher(kernelSize[0], [&](auto K0C) { constexpr int K0 = mp11::mp_at_c<decltype(K), 0>::value;
dispatcher(kernelSize[1], [&](auto K1C) { constexpr int K1 = mp11::mp_at_c<decltype(K), 1>::value;
dispatcher(kernelSize[2], [&](auto K2C) { constexpr int K2 = mp11::mp_at_c<decltype(K), 2>::value;
constexpr int K0 = decltype(K0C)::value; found = true;
constexpr int K1 = decltype(K1C)::value; getSubMIndicePairsHashUnrollKernel3<Index, K0, K1, K2>
constexpr int K2 = decltype(K2C)::value; <<<tv::cuda::getBlocks(numActIn), tv::cuda::CUDA_NUM_THREADS,
found = true; 0, stream>>>(tv::torch2tv<Index>(indicesIn),
getSubMIndicePairsHashUnrollKernel3<Index, K0, K1, K2> tv::torch2tv<Index>(indicePairs),
<<<tv::cuda::getBlocks(numActIn), tv::torch2tv<Index>(indiceNum), ou_,
tv::cuda::CUDA_NUM_THREADS, 0, stream>>>( spatialVolume, tableSize, tableData,
tv::torch2tv<Index>(indicesIn), constants, stash_constants, stash_count);
tv::torch2tv<Index>(indicePairs),
tv::torch2tv<Index>(indiceNum), ou_, spatialVolume,
tableSize, tableData, constants, stash_constants,
stash_count);
});
});
}); });
} }
} }
...@@ -338,24 +341,35 @@ int create_submconv_indice_pair_cuda( ...@@ -338,24 +341,35 @@ int create_submconv_indice_pair_cuda(
if (NDim == 2) { if (NDim == 2) {
tv::SimpleVector<Index, 2> ou_(outSpatialShape.begin(), tv::SimpleVector<Index, 2> ou_(outSpatialShape.begin(),
outSpatialShape.end()); outSpatialShape.end());
dispatcher2(kernelSize.begin(), kernelSize.end(), [&](auto K) {
dispatcher(kernelSize[0], [&](auto K0C) { constexpr int K0 = mp11::mp_at_c<decltype(K), 0>::value;
dispatcher(kernelSize[1], [&](auto K1C) { constexpr int K1 = mp11::mp_at_c<decltype(K), 1>::value;
constexpr int K0 = decltype(K0C)::value; found = true;
constexpr int K1 = decltype(K1C)::value; getSubMIndicePairsUnrollKernel2<Index, IndexGrid, K0, K1>
found = true; <<<tv::cuda::getBlocks(numActIn), tv::cuda::CUDA_NUM_THREADS,
getSubMIndicePairsUnrollKernel2<Index, IndexGrid, K0, K1> 0, stream>>>(tv::torch2tv<Index>(indicesIn),
<<<tv::cuda::getBlocks(numActIn), tv::torch2tv<IndexGrid>(gridsOut),
tv::cuda::CUDA_NUM_THREADS, 0, stream>>>( tv::torch2tv<Index>(indicePairs),
tv::torch2tv<Index>(indicesIn), tv::torch2tv<Index>(indiceNum), ou_,
tv::torch2tv<IndexGrid>(gridsOut), spatialVolume);
tv::torch2tv<Index>(indicePairs),
tv::torch2tv<Index>(indiceNum), ou_, spatialVolume);
});
}); });
} else if (NDim == 3) { } else if (NDim == 3) {
tv::SimpleVector<Index, 3> ou_(outSpatialShape.begin(), tv::SimpleVector<Index, 3> ou_(outSpatialShape.begin(),
outSpatialShape.end()); outSpatialShape.end());
dispatcher3(kernelSize.begin(), kernelSize.end(), [&](auto K) {
constexpr int K0 = mp11::mp_at_c<decltype(K), 0>::value;
constexpr int K1 = mp11::mp_at_c<decltype(K), 1>::value;
constexpr int K2 = mp11::mp_at_c<decltype(K), 2>::value;
found = true;
getSubMIndicePairsUnrollKernel3<Index, IndexGrid, K0, K1, K2>
<<<tv::cuda::getBlocks(numActIn), tv::cuda::CUDA_NUM_THREADS,
0, stream>>>(tv::torch2tv<Index>(indicesIn),
tv::torch2tv<IndexGrid>(gridsOut),
tv::torch2tv<Index>(indicePairs),
tv::torch2tv<Index>(indiceNum), ou_,
spatialVolume);
});
/*
dispatcher(kernelSize[0], [&](auto K0C) { dispatcher(kernelSize[0], [&](auto K0C) {
dispatcher(kernelSize[1], [&](auto K1C) { dispatcher(kernelSize[1], [&](auto K1C) {
dispatcher(kernelSize[2], [&](auto K2C) { dispatcher(kernelSize[2], [&](auto K2C) {
...@@ -372,7 +386,7 @@ int create_submconv_indice_pair_cuda( ...@@ -372,7 +386,7 @@ int create_submconv_indice_pair_cuda(
tv::torch2tv<Index>(indiceNum), ou_, spatialVolume); tv::torch2tv<Index>(indiceNum), ou_, spatialVolume);
}); });
}); });
}); });*/
} }
} }
if (!found) { if (!found) {
...@@ -396,7 +410,8 @@ int create_submconv_indice_pair_cuda( ...@@ -396,7 +410,8 @@ int create_submconv_indice_pair_cuda(
resetGridSubMKernel<Index, IndexGrid, NDim> resetGridSubMKernel<Index, IndexGrid, NDim>
<<<tv::cuda::getBlocks(numActIn), tv::cuda::CUDA_NUM_THREADS, 0, <<<tv::cuda::getBlocks(numActIn), tv::cuda::CUDA_NUM_THREADS, 0,
stream>>>(indicesIn.data_ptr<Index>(), stream>>>(indicesIn.data_ptr<Index>(),
tv::torch2tv<IndexGrid>(gridsOut), ou, numActIn, spatialVolume); tv::torch2tv<IndexGrid>(gridsOut), ou, numActIn,
spatialVolume);
TV_CHECK_CUDA_ERR_V2("resetGridKernel failed"); TV_CHECK_CUDA_ERR_V2("resetGridKernel failed");
} }
}); });
......
...@@ -35,7 +35,7 @@ using half_vec_t = ...@@ -35,7 +35,7 @@ using half_vec_t =
template <typename T> template <typename T>
using half_vec_sadd_t = using half_vec_sadd_t =
std::conditional_t<std::is_same<T, at::Half>::value, int4, int4>; std::conditional_t<std::is_same<T, at::Half>::value, int4, int4>;
using kernel_block_t = tv::mp_list_c<int, 64, 32, 16>; using kernel_block_t = tv::mp_list_c<int, 64, 32, 16, 8>;
void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features, void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features,
torch::Tensor indices, int size) { torch::Tensor indices, int size) {
...@@ -45,6 +45,7 @@ void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features, ...@@ -45,6 +45,7 @@ void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features,
auto stream = at::cuda::getCurrentCUDAStream(); auto stream = at::cuda::getCurrentCUDAStream();
auto dtype = features.scalar_type(); auto dtype = features.scalar_type();
auto inds_dtype = indices.scalar_type(); auto inds_dtype = indices.scalar_type();
// auto timer = spconv::CudaContextTimer<>();
tv::DispatchTorch<float_types_t>()(dtype, [&](auto TValue) { tv::DispatchTorch<float_types_t>()(dtype, [&](auto TValue) {
using T = decltype(TValue); using T = decltype(TValue);
using vecload_type_t = half_vec_t<T>; using vecload_type_t = half_vec_t<T>;
...@@ -53,8 +54,7 @@ void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features, ...@@ -53,8 +54,7 @@ void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features,
bool notFound = true; bool notFound = true;
constexpr int vecloadFactor = sizeof(vecload_type_t) / sizeof(T); constexpr int vecloadFactor = sizeof(vecload_type_t) / sizeof(T);
tv::mp_for_each<kernel_block_t>([=, &buffer, &features, &indices, tv::mp_for_each<kernel_block_t>([&](auto NumTLP) {
&notFound](auto NumTLP) {
constexpr int NumILP = NumTLP / 4; constexpr int NumILP = NumTLP / 4;
// constexpr int NumILP = NumTLP / (64 / (NumTLP / vecloadFactor)); // constexpr int NumILP = NumTLP / (64 / (NumTLP / vecloadFactor));
int nHotBlock = (size / NumTLP) * NumTLP; int nHotBlock = (size / NumTLP) * NumTLP;
...@@ -87,6 +87,7 @@ void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features, ...@@ -87,6 +87,7 @@ void sparse_gather_cuda(torch::Tensor buffer, torch::Tensor features,
features.data_ptr<T>(), features.data_ptr<T>(),
indices.data_ptr<Index>() + nHotBlock, indices.data_ptr<Index>() + nHotBlock,
size - nHotBlock, numPlanes / vecloadFactor); size - nHotBlock, numPlanes / vecloadFactor);
#ifdef TV_LOG_KERNEL_INFO #ifdef TV_LOG_KERNEL_INFO
cudaFuncAttributes attr; cudaFuncAttributes attr;
checkCudaErrors(cudaFuncGetAttributes( checkCudaErrors(cudaFuncGetAttributes(
...@@ -143,8 +144,7 @@ void sparse_scatter_add_cuda(torch::Tensor buffer, torch::Tensor outFeatures, ...@@ -143,8 +144,7 @@ void sparse_scatter_add_cuda(torch::Tensor buffer, torch::Tensor outFeatures,
constexpr int vecloadFactor = constexpr int vecloadFactor =
sizeof(vecload_type_t) / sizeof(T); // important for half. sizeof(vecload_type_t) / sizeof(T); // important for half.
tv::mp_for_each<kernel_block_t>([=, &outFeatures, &buffer, &indices, tv::mp_for_each<kernel_block_t>([&](auto NumTLP) {
&notFound](auto NumTLP) {
// constexpr int NumILP = NumTLP / (64 / (NumTLP / // constexpr int NumILP = NumTLP / (64 / (NumTLP /
// vecloadFactor)); // vecloadFactor));
constexpr int NumILP = NumTLP / 4; constexpr int NumILP = NumTLP / 4;
......
...@@ -70,7 +70,7 @@ class Net(nn.Module): ...@@ -70,7 +70,7 @@ class Net(nn.Module):
def main(): def main():
voxels, coors, spatial_shape = waymo_data() voxels, coors, spatial_shape = waymo_data()
voxels_th = torch.from_numpy(voxels).cuda().float() voxels_th = torch.from_numpy(voxels).cuda().float()
coors_th = torch.from_numpy(coors).cuda() coors_th = torch.from_numpy(coors).cuda().int()
net = Net(spatial_shape[::-1]).cuda().eval().float() net = Net(spatial_shape[::-1]).cuda().eval().float()
print(coors_th.shape) print(coors_th.shape)
out = net(voxels_th, coors_th, 1) out = net(voxels_th, coors_th, 1)
......
...@@ -349,7 +349,7 @@ def scatter_nd(indices, updates, shape): ...@@ -349,7 +349,7 @@ def scatter_nd(indices, updates, shape):
class TestSpConv(TestCase): class TestSpConv(TestCase):
def testSpConv3d(self): def testSpConv3d(self):
np.random.seed(484) np.random.seed(484)
devices = ["cpu:0"] devices = ["cuda:0"]
shapes = [[19, 18, 17]] shapes = [[19, 18, 17]]
batchsizes = [1, 2] batchsizes = [1, 2]
...@@ -752,8 +752,8 @@ def main_subm(algo, dtype=torch.float32): ...@@ -752,8 +752,8 @@ def main_subm(algo, dtype=torch.float32):
if __name__ == '__main__': if __name__ == '__main__':
main(algo=spconv.ConvAlgo.Native, dtype=torch.float32) main_subm(algo=spconv.ConvAlgo.Native, dtype=torch.float32)
main(algo=spconv.ConvAlgo.Native, dtype=torch.half) # main(algo=spconv.ConvAlgo.Native, dtype=torch.half)
# TestCase().assertAllClose(out_my, out_ref) # TestCase().assertAllClose(out_my, out_ref)
# unittest.main() # unittest.main()
# TestSpConv().testSpConv3d() # TestSpConv().testSpConv3d()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment