Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
one
spconv
Commits
cfaa1a3a
Commit
cfaa1a3a
authored
Jun 16, 2020
by
yanyan
Browse files
add Minkowski conv kernel
parent
9ce18407
Changes
47
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
226 additions
and
20 deletions
+226
-20
LICENSE
LICENSE
+1
-1
include/paramsgrid.h
include/paramsgrid.h
+1
-1
include/spconv/box_iou.h
include/spconv/box_iou.h
+1
-1
include/spconv/fused_conv.h
include/spconv/fused_conv.h
+28
-2
include/spconv/fused_spconv_ops.h
include/spconv/fused_spconv_ops.h
+1
-1
include/spconv/geometry.h
include/spconv/geometry.h
+1
-1
include/spconv/indice.cu.h
include/spconv/indice.cu.h
+1
-1
include/spconv/indice.h
include/spconv/indice.h
+1
-1
include/spconv/maxpool.h
include/spconv/maxpool.h
+1
-1
include/spconv/minkowski.cu.h
include/spconv/minkowski.cu.h
+180
-0
include/spconv/nms.h
include/spconv/nms.h
+1
-1
include/spconv/nms_functor.h
include/spconv/nms_functor.h
+1
-1
include/spconv/nms_gpu.h
include/spconv/nms_gpu.h
+1
-1
include/spconv/nms_ops.h
include/spconv/nms_ops.h
+1
-1
include/spconv/pillar_scatter_functor.h
include/spconv/pillar_scatter_functor.h
+1
-1
include/spconv/pillar_scatter_ops.h
include/spconv/pillar_scatter_ops.h
+1
-1
include/spconv/point2voxel.h
include/spconv/point2voxel.h
+1
-1
include/spconv/pool_ops.h
include/spconv/pool_ops.h
+1
-1
include/spconv/reordering.cu.h
include/spconv/reordering.cu.h
+1
-1
include/spconv/reordering.h
include/spconv/reordering.h
+1
-1
No files found.
LICENSE
View file @
cfaa1a3a
...
@@ -186,7 +186,7 @@
...
@@ -186,7 +186,7 @@
same "printed page" as the copyright notice for easier
same "printed page" as the copyright notice for easier
identification within third-party archives.
identification within third-party archives.
Copyright 2019 Yan Yan
Copyright 2019
-2020
Yan Yan
Licensed under the Apache License, Version 2.0 (the "License");
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
you may not use this file except in compliance with the License.
...
...
include/paramsgrid.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/box_iou.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/fused_conv.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
@@ -13,10 +13,14 @@
...
@@ -13,10 +13,14 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <cuda_runtime_api.h>
#include <cuda_runtime_api.h>
#include <tensorview/tensor
view
.h>
#include <tensorview/tensor.h>
#include <torch/script.h>
#include <torch/script.h>
namespace
spconv
{
namespace
spconv
{
enum
FusedConvAlgo
{
kFSparseConvNet
,
kFMinkowskiEngine
};
using
all_fused_conv_algos_t
=
tv
::
mp_list_c
<
int
,
kFSparseConvNet
,
kFMinkowskiEngine
>
;
void
fused_conv_cuda
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
void
fused_conv_cuda
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
);
torch
::
Tensor
indicesOut
,
int
nHot
);
...
@@ -26,4 +30,26 @@ void fused_conv_backward_cuda(torch::Tensor features, torch::Tensor din,
...
@@ -26,4 +30,26 @@ void fused_conv_backward_cuda(torch::Tensor features, torch::Tensor din,
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
);
torch
::
Tensor
indicesOut
,
int
nHot
);
void
fused_conv_cuda_minkowski
(
torch
::
Tensor
output
,
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
);
void
fused_conv_backward_cuda_minkowski
(
torch
::
Tensor
features
,
torch
::
Tensor
din
,
torch
::
Tensor
dout
,
torch
::
Tensor
filters
,
torch
::
Tensor
dfilters
,
torch
::
Tensor
indicesIn
,
torch
::
Tensor
indicesOut
,
int
nHot
);
template
<
int
Algo
>
struct
FusedConvDispatch
;
template
<
>
struct
FusedConvDispatch
<
kFSparseConvNet
>
{
constexpr
static
auto
*
fwd
=
fused_conv_cuda
;
constexpr
static
auto
*
bwd
=
fused_conv_backward_cuda
;
};
template
<
>
struct
FusedConvDispatch
<
kFMinkowskiEngine
>
{
constexpr
static
auto
*
fwd
=
fused_conv_cuda_minkowski
;
constexpr
static
auto
*
bwd
=
fused_conv_backward_cuda_minkowski
;
};
}
// namespace spconv
}
// namespace spconv
include/spconv/fused_spconv_ops.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/geometry.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/indice.cu.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/indice.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/maxpool.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/minkowski.cu.h
0 → 100644
View file @
cfaa1a3a
/* Copyright (c) Chris Choy (chrischoy@ai.stanford.edu).
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*
* Please cite "4D Spatio-Temporal ConvNets: Minkowski Convolutional Neural
* Networks", CVPR'19 (https://arxiv.org/abs/1904.08755) if you use any part
* of the code.
*/
template
<
typename
Dtype
,
typename
Itype
,
int
BLOCK_SIZE
>
__global__
void
matmul
(
const
Dtype
*
A
,
const
int
wA
,
const
int
hA
,
const
Dtype
*
B
,
const
int
wB
,
const
int
hB
,
Dtype
*
C
,
const
Itype
*
in_map
,
const
Itype
*
out_map
)
{
// Use in_feat as A and kernel as B
// Block index
const
int
bx
=
blockIdx
.
x
;
const
int
by
=
blockIdx
.
y
;
// Thread index
const
int
tx
=
threadIdx
.
x
;
const
int
ty
=
threadIdx
.
y
;
// Coordinate. x is for rows, y is for columns.
const
int
x
=
BLOCK_SIZE
*
bx
+
tx
;
const
int
y
=
BLOCK_SIZE
*
by
+
ty
;
// Csub is used to store the element of the block sub-matrix
// that is computed by the thread
Dtype
Csub
=
0
;
const
Itype
in_row
=
y
<
hA
?
in_map
[
y
]
:
0
;
const
Itype
out_row
=
y
<
hA
?
out_map
[
y
]
:
0
;
// Loop over all the sub-matrices of A and B
// required to compute the block sub-matrix
for
(
int
s
=
0
;
s
<
wA
;
s
+=
BLOCK_SIZE
)
{
// Declaration of the shared memory array As used to
// store the sub-matrix of A
__shared__
Dtype
As
[
BLOCK_SIZE
][
BLOCK_SIZE
];
// Declaration of the shared memory array Bs used to
// store the sub-matrix of B
__shared__
Dtype
Bs
[
BLOCK_SIZE
][
BLOCK_SIZE
];
// Load the matrices from device memory
// to shared memory; each thread loads
// one element of each matrix
As
[
ty
][
tx
]
=
((
s
+
tx
)
<
wA
&&
y
<
hA
)
?
A
[
wA
*
in_row
+
s
+
tx
]
:
0
;
Bs
[
ty
][
tx
]
=
((
s
+
ty
)
<
hB
&&
x
<
wB
)
?
B
[
wB
*
(
s
+
ty
)
+
x
]
:
0
;
// Synchronize to make sure the matrices are loaded
__syncthreads
();
// Multiply the two matrices together;
// each thread computes one element
// of the block sub-matrix
#pragma unroll
for
(
int
k
=
0
;
k
<
BLOCK_SIZE
;
++
k
)
{
Csub
+=
As
[
ty
][
k
]
*
Bs
[
k
][
tx
];
}
// Synchronize to make sure that the preceding
// computation is done before loading two new
// sub-matrices of A and B in the next iteration
__syncthreads
();
}
// Write the block sub-matrix to device memory;
// each thread writes one element
if
(
y
<
hA
&&
x
<
wB
)
atomicAdd
(
&
C
[
wB
*
out_row
+
x
],
Csub
);
// C[wB * out_row + x] += Csub;
}
template
<
typename
Dtype
,
typename
Itype
,
int
BLOCK_SIZE
>
__global__
void
matmul2
(
const
Dtype
*
A
,
const
int
wA
,
const
int
hA
,
const
Dtype
*
B
,
const
int
wB
,
const
int
hB
,
const
Dtype
*
D
,
const
int
wD
,
const
int
hD
,
Dtype
*
C
,
Dtype
*
E
,
const
Itype
*
in_map
,
const
Itype
*
out_map
)
{
// Use grad_out_feat as A, transposed kernel weight as B, and in_feat as D
// Block index
const
int
bx
=
blockIdx
.
x
;
const
int
by
=
blockIdx
.
y
;
// Thread index
const
int
tx
=
threadIdx
.
x
;
const
int
ty
=
threadIdx
.
y
;
// Coordinate. y is for rows, x is for columns.
const
int
x
=
BLOCK_SIZE
*
bx
+
tx
;
const
int
y
=
BLOCK_SIZE
*
by
+
ty
;
const
Itype
in_row
=
y
<
hA
?
in_map
[
y
]
:
0
;
const
Itype
out_row
=
y
<
hA
?
out_map
[
y
]
:
0
;
// Csub is used to store the element of the block sub-matrix
// that is computed by the thread
Dtype
Csub
=
0
;
Dtype
Esub
=
0
;
// Declaration of the shared memory array As used to
// store the sub-matrix of A
__shared__
Dtype
As
[
BLOCK_SIZE
][
BLOCK_SIZE
];
// Declaration of the shared memory array Bs used to
// store the sub-matrix of B
__shared__
Dtype
BTs
[
BLOCK_SIZE
][
BLOCK_SIZE
];
// Declaration of the shared memory array Ds used to
// store the sub-matrix of D
__shared__
Dtype
DTs
[
BLOCK_SIZE
][
BLOCK_SIZE
];
// For Ds = D^T[...:..., ...:...], use the transposed grid dimension for A
DTs
[
ty
][
tx
]
=
(
x
<
wD
&&
y
<
hD
)
?
D
[
wD
*
in_row
+
x
]
:
0
;
// Loop over all the sub-matrices of A and B
// required to compute the block sub-matrix
for
(
int
s
=
0
;
s
<
wA
;
s
+=
BLOCK_SIZE
)
{
// Load the matrices from device memory
// to shared memory; each thread loads
// one element of each matrix
As
[
ty
][
tx
]
=
((
s
+
tx
)
<
wA
&&
y
<
hA
)
?
A
[
wA
*
out_row
+
s
+
tx
]
:
0
;
// Transposed kernel
BTs
[
ty
][
tx
]
=
((
s
+
ty
)
<
wB
&&
x
<
hB
)
?
B
[
wB
*
x
+
s
+
ty
]
:
0
;
// Synchronize to make sure the matrices are loaded
__syncthreads
();
// Multiply the two matrices together;
// each thread computes one element
// of the block sub-matrix
#pragma unroll
for
(
int
k
=
0
;
k
<
BLOCK_SIZE
;
++
k
)
{
Csub
+=
As
[
ty
][
k
]
*
BTs
[
k
][
tx
];
}
// For Esub, reset to 0
Esub
=
0
;
#pragma unroll
for
(
int
k
=
0
;
k
<
BLOCK_SIZE
;
++
k
)
{
Esub
+=
DTs
[
k
][
ty
]
*
As
[
k
][
tx
];
}
// Synchronize to make sure that the preceding
// computation is done before loading two new
// sub-matrices of A and B in the next iteration
__syncthreads
();
// For the E matrix which requires accmulation of multiple blocks, use
// atomic addition. This can be replaced with a more sophisticaed reduction
// algorithm.
if
((
bx
*
BLOCK_SIZE
+
ty
)
<
wD
&&
(
s
+
tx
)
<
wA
)
atomicAdd
(
&
E
[
wA
*
(
bx
*
BLOCK_SIZE
+
ty
)
+
(
s
+
tx
)],
Esub
);
}
// Write the block sub-matrix to device memory;
// each thread writes one element
if
(
y
<
hA
&&
x
<
hB
)
atomicAdd
(
&
C
[
hB
*
in_row
+
x
],
Csub
);
}
include/spconv/nms.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/nms_functor.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/nms_gpu.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/nms_ops.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/pillar_scatter_functor.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/pillar_scatter_ops.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/point2voxel.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/pool_ops.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/reordering.cu.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
include/spconv/reordering.h
View file @
cfaa1a3a
// Copyright 2019 Yan Yan
// Copyright 2019
-2020
Yan Yan
//
//
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment