Commit cfaa1a3a authored by yanyan's avatar yanyan
Browse files

add Minkowski conv kernel

parent 9ce18407
// Copyright 2019 Yan Yan // Copyright 2019-2020 Yan Yan
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
......
// Copyright 2019 Yan Yan // Copyright 2019-2020 Yan Yan
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
......
// Copyright 2019 Yan Yan // Copyright 2019-2020 Yan Yan
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
......
...@@ -246,8 +246,9 @@ torch::Tensor indiceConvNative(torch::Tensor features, torch::Tensor filters, ...@@ -246,8 +246,9 @@ torch::Tensor indiceConvNative(torch::Tensor features, torch::Tensor filters,
return output; return output;
} }
template <int Algo>
torch::Tensor torch::Tensor
indiceConvSparseConvNet(torch::Tensor features, torch::Tensor filters, indiceConvFused(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum, torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, int64_t _inverse, int64_t _subM) { int64_t numActOut, int64_t _inverse, int64_t _subM) {
auto kernelVolume = indiceNum.size(0); auto kernelVolume = indiceNum.size(0);
...@@ -277,11 +278,11 @@ indiceConvSparseConvNet(torch::Tensor features, torch::Tensor filters, ...@@ -277,11 +278,11 @@ indiceConvSparseConvNet(torch::Tensor features, torch::Tensor filters,
continue; continue;
} }
if (device == torch::kCPU) { if (device == torch::kCPU) {
TV_THROW_INVALID_ARG("SparseConvNet only support gpu"); TV_THROW_INVALID_ARG("fused only support gpu");
} }
#ifdef TV_CUDA #ifdef TV_CUDA
else if (device == torch::kCUDA) { else if (device == torch::kCUDA) {
fused_conv_cuda(output, features, filters[i], indicePairs[inverse][i], FusedConvDispatch<Algo>::fwd(output, features, filters[i], indicePairs[inverse][i],
indicePairs[!inverse][i], nHot); indicePairs[!inverse][i], nHot);
} }
#endif #endif
...@@ -421,37 +422,6 @@ torch::Tensor indiceConvBatch(torch::Tensor features, torch::Tensor filters, ...@@ -421,37 +422,6 @@ torch::Tensor indiceConvBatch(torch::Tensor features, torch::Tensor filters,
return output; return output;
} }
template <int Algo> struct ConvDispatch;
template <> struct ConvDispatch<kNative> {
constexpr static auto *func = indiceConvNative;
};
template <> struct ConvDispatch<kBatch> {
constexpr static auto *func = indiceConvBatch<false>;
};
template <> struct ConvDispatch<kBatchGemmGather> {
constexpr static auto *func = indiceConvBatch<true>;
};
template <> struct ConvDispatch<kSparseConvNet> {
constexpr static auto *func = indiceConvSparseConvNet;
};
torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, int64_t _inverse, int64_t _subM,
int64_t algo) {
torch::Tensor res;
tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) {
constexpr int AlgoValue = decltype(I)::value;
res = ConvDispatch<AlgoValue>::func(features, filters, indicePairs,
indiceNum, numActOut, _inverse, _subM);
});
return res;
}
std::vector<torch::Tensor> std::vector<torch::Tensor>
indiceConvBwNative(torch::Tensor features, torch::Tensor filters, indiceConvBwNative(torch::Tensor features, torch::Tensor filters,
torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor outGrad, torch::Tensor indicePairs,
...@@ -544,8 +514,9 @@ indiceConvBwNative(torch::Tensor features, torch::Tensor filters, ...@@ -544,8 +514,9 @@ indiceConvBwNative(torch::Tensor features, torch::Tensor filters,
return {inputGrad, filtersGrad.view(filterShape)}; return {inputGrad, filtersGrad.view(filterShape)};
} }
template <int Algo>
std::vector<torch::Tensor> std::vector<torch::Tensor>
indiceConvBwSparseConvNet(torch::Tensor features, torch::Tensor filters, indiceConvBwFused(torch::Tensor features, torch::Tensor filters,
torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor outGrad, torch::Tensor indicePairs,
torch::Tensor indiceNum, int64_t _inverse, torch::Tensor indiceNum, int64_t _inverse,
int64_t _subM) { int64_t _subM) {
...@@ -585,7 +556,7 @@ indiceConvBwSparseConvNet(torch::Tensor features, torch::Tensor filters, ...@@ -585,7 +556,7 @@ indiceConvBwSparseConvNet(torch::Tensor features, torch::Tensor filters,
} }
#ifdef TV_CUDA #ifdef TV_CUDA
else if (device == torch::kCUDA) { else if (device == torch::kCUDA) {
fused_conv_backward_cuda(features, inputGrad, outGrad, filters[i], FusedConvDispatch<Algo>::bwd(features, inputGrad, outGrad, filters[i],
filtersGrad[i], indicePairs[inverse][i], filtersGrad[i], indicePairs[inverse][i],
indicePairs[!inverse][i], nHot); indicePairs[!inverse][i], nHot);
} }
...@@ -725,24 +696,47 @@ indiceConvBwBatch(torch::Tensor features, torch::Tensor filters, ...@@ -725,24 +696,47 @@ indiceConvBwBatch(torch::Tensor features, torch::Tensor filters,
return {inputGrad, filtersGrad.view(filterShape)}; return {inputGrad, filtersGrad.view(filterShape)};
} }
template <int Algo> struct ConvBwDispatch; template <int Algo> struct ConvDispatch;
template <> struct ConvDispatch<kNative> {
constexpr static auto *fwd = indiceConvNative;
constexpr static auto *bwd = indiceConvBwNative;
};
template <> struct ConvBwDispatch<kNative> { template <> struct ConvDispatch<kBatch> {
constexpr static auto *func = indiceConvBwNative; constexpr static auto *fwd = indiceConvBatch<false>;
constexpr static auto *bwd = indiceConvBwBatch<false>;
}; };
template <> struct ConvBwDispatch<kBatch> { template <> struct ConvDispatch<kBatchGemmGather> {
constexpr static auto *func = indiceConvBwBatch<false>; constexpr static auto *fwd = indiceConvBatch<true>;
constexpr static auto *bwd = indiceConvBwBatch<true>;
}; };
template <> struct ConvBwDispatch<kBatchGemmGather> { template <> struct ConvDispatch<kSparseConvNet> {
constexpr static auto *func = indiceConvBwBatch<true>; constexpr static auto *fwd = indiceConvFused<kFSparseConvNet>;
constexpr static auto *bwd = indiceConvBwFused<kFSparseConvNet>;
}; };
template <> struct ConvBwDispatch<kSparseConvNet> { template <> struct ConvDispatch<kMinkowskiEngine> {
constexpr static auto *func = indiceConvBwSparseConvNet; constexpr static auto *fwd = indiceConvFused<kFMinkowskiEngine>;
constexpr static auto *bwd = indiceConvBwFused<kFMinkowskiEngine>;
}; };
torch::Tensor indiceConv(torch::Tensor features, torch::Tensor filters,
torch::Tensor indicePairs, torch::Tensor indiceNum,
int64_t numActOut, int64_t _inverse, int64_t _subM,
int64_t algo) {
torch::Tensor res;
tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) {
constexpr int AlgoValue = decltype(I)::value;
res = ConvDispatch<AlgoValue>::fwd(features, filters, indicePairs,
indiceNum, numActOut, _inverse, _subM);
});
return res;
}
std::vector<torch::Tensor> std::vector<torch::Tensor>
indiceConvBackward(torch::Tensor features, torch::Tensor filters, indiceConvBackward(torch::Tensor features, torch::Tensor filters,
torch::Tensor outGrad, torch::Tensor indicePairs, torch::Tensor outGrad, torch::Tensor indicePairs,
...@@ -751,8 +745,8 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters, ...@@ -751,8 +745,8 @@ indiceConvBackward(torch::Tensor features, torch::Tensor filters,
std::vector<torch::Tensor> res; std::vector<torch::Tensor> res;
tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) { tv::DispatchInt<all_conv_algos_t>()(algo, [&](auto I) {
constexpr int AlgoValue = decltype(I)::value; constexpr int AlgoValue = decltype(I)::value;
res = ConvBwDispatch<AlgoValue>::func( res = ConvDispatch<AlgoValue>::bwd(features, filters, outGrad, indicePairs,
features, filters, outGrad, indicePairs, indiceNum, _inverse, _subM); indiceNum, _inverse, _subM);
}); });
return res; return res;
} }
......
// Copyright 2019 Yan Yan // Copyright 2019-2020 Yan Yan
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License. // you may not use this file except in compliance with the License.
......
...@@ -23,41 +23,41 @@ def waymo_data(batch_size=1): ...@@ -23,41 +23,41 @@ def waymo_data(batch_size=1):
class Net(nn.Module): class Net(nn.Module):
def __init__(self, shape): def __init__(self, shape, algo):
super().__init__() super().__init__()
self.net = spconv.SparseSequential( self.net = spconv.SparseSequential(
spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0"), spconv.SubMConv3d(3, 64, 3, bias=False, indice_key="c0", algo=algo),
spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0"), spconv.SubMConv3d(64, 64, 3, bias=False, indice_key="c0", algo=algo),
# nn.BatchNorm1d(32), # nn.BatchNorm1d(32),
# nn.ReLU(), # nn.ReLU(),
spconv.SparseMaxPool3d(2, 2), spconv.SparseMaxPool3d(2, 2),
spconv.SubMConv3d(64, 96, 3, bias=False, indice_key="c1"), spconv.SubMConv3d(64, 96, 3, bias=False, indice_key="c1", algo=algo),
spconv.SubMConv3d(96, 96, 3, bias=False, indice_key="c1"), spconv.SubMConv3d(96, 96, 3, bias=False, indice_key="c1", algo=algo),
# nn.BatchNorm1d(64), # nn.BatchNorm1d(64),
# nn.ReLU(), # nn.ReLU(),
spconv.SparseMaxPool3d(2, 2), spconv.SparseMaxPool3d(2, 2),
spconv.SubMConv3d(96, 128, 3, bias=False, indice_key="c2"), spconv.SubMConv3d(96, 128, 3, bias=False, indice_key="c2", algo=algo),
spconv.SubMConv3d(128, 128, 3, bias=False, indice_key="c2"), spconv.SubMConv3d(128, 128, 3, bias=False, indice_key="c2", algo=algo),
# nn.BatchNorm1d(128), # nn.BatchNorm1d(128),
# nn.ReLU(), # nn.ReLU(),
spconv.SparseMaxPool3d(2, 2), spconv.SparseMaxPool3d(2, 2),
spconv.SubMConv3d(128, 160, 3, bias=False, indice_key="c3"), spconv.SubMConv3d(128, 160, 3, bias=False, indice_key="c3", algo=algo),
spconv.SubMConv3d(160, 160, 3, bias=False, indice_key="c3"), spconv.SubMConv3d(160, 160, 3, bias=False, indice_key="c3", algo=algo),
# nn.BatchNorm1d(128), # nn.BatchNorm1d(128),
# nn.ReLU(), # nn.ReLU(),
spconv.SparseMaxPool3d(2, 2), spconv.SparseMaxPool3d(2, 2),
spconv.SubMConv3d(160, 192, 3, bias=False, indice_key="c4"), spconv.SubMConv3d(160, 192, 3, bias=False, indice_key="c4", algo=algo),
spconv.SubMConv3d(192, 192, 3, bias=False, indice_key="c4"), spconv.SubMConv3d(192, 192, 3, bias=False, indice_key="c4", algo=algo),
# nn.BatchNorm1d(128), # nn.BatchNorm1d(128),
# nn.ReLU(), # nn.ReLU(),
spconv.SparseMaxPool3d(2, 2), spconv.SparseMaxPool3d(2, 2),
spconv.SubMConv3d(192, 224, 3, bias=False, indice_key="c5"), spconv.SubMConv3d(192, 224, 3, bias=False, indice_key="c5", algo=algo),
spconv.SubMConv3d(224, 224, 3, bias=False, indice_key="c5"), spconv.SubMConv3d(224, 224, 3, bias=False, indice_key="c5", algo=algo),
# nn.BatchNorm1d(128), # nn.BatchNorm1d(128),
# nn.ReLU(), # nn.ReLU(),
spconv.SparseMaxPool3d(2, 2), spconv.SparseMaxPool3d(2, 2),
spconv.SubMConv3d(224, 256, 3, bias=False, indice_key="c6"), spconv.SubMConv3d(224, 256, 3, bias=False, indice_key="c6", algo=algo),
spconv.SubMConv3d(256, 256, 3, bias=False, indice_key="c6"), spconv.SubMConv3d(256, 256, 3, bias=False, indice_key="c6", algo=algo),
) )
max_batch_size = 1 max_batch_size = 1
# grid (dense map) is used for indice generation. use pre-allocated grid can run faster. # grid (dense map) is used for indice generation. use pre-allocated grid can run faster.
...@@ -76,7 +76,8 @@ def main(): ...@@ -76,7 +76,8 @@ def main():
voxels, coors, spatial_shape = waymo_data() voxels, coors, spatial_shape = waymo_data()
voxels_th = torch.from_numpy(voxels).cuda().float() voxels_th = torch.from_numpy(voxels).cuda().float()
coors_th = torch.from_numpy(coors).cuda().int() coors_th = torch.from_numpy(coors).cuda().int()
net = Net(spatial_shape[::-1]).cuda().eval().float() algo = spconv.ConvAlgo.Native
net = Net(spatial_shape[::-1], algo).cuda().eval().float()
print(coors_th.shape) print(coors_th.shape)
out = net(voxels_th, coors_th, 1) out = net(voxels_th, coors_th, 1)
print(out.spatial_shape) print(out.spatial_shape)
......
# Copyright 2019 Yan Yan # Copyright 2019-2020 Yan Yan
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -37,7 +37,7 @@ class SparseConv3dTestTorch(nn.Module): ...@@ -37,7 +37,7 @@ class SparseConv3dTestTorch(nn.Module):
stride, stride,
padding, padding,
dilation, dilation,
algo=spconv.ConvAlgo.SparseConvNet): algo=spconv.ConvAlgo.Minkowski):
super().__init__() super().__init__()
layers = [ layers = [
spconv.SparseConv3d(in_channels, spconv.SparseConv3d(in_channels,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment