Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
d1b9ae40
Commit
d1b9ae40
authored
Jun 06, 2020
by
zhangwenwei
Browse files
clean c files
parent
16c3f6e1
Changes
35
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
435 additions
and
451 deletions
+435
-451
mmdet3d/ops/spconv/include/spconv/maxpool.h
mmdet3d/ops/spconv/include/spconv/maxpool.h
+9
-14
mmdet3d/ops/spconv/include/spconv/mp_helper.h
mmdet3d/ops/spconv/include/spconv/mp_helper.h
+11
-7
mmdet3d/ops/spconv/include/spconv/nms.h
mmdet3d/ops/spconv/include/spconv/nms.h
+13
-18
mmdet3d/ops/spconv/include/spconv/nms_functor.h
mmdet3d/ops/spconv/include/spconv/nms_functor.h
+11
-16
mmdet3d/ops/spconv/include/spconv/point2voxel.h
mmdet3d/ops/spconv/include/spconv/point2voxel.h
+60
-77
mmdet3d/ops/spconv/include/spconv/reordering.cu.h
mmdet3d/ops/spconv/include/spconv/reordering.cu.h
+1
-1
mmdet3d/ops/spconv/include/spconv/reordering.h
mmdet3d/ops/spconv/include/spconv/reordering.h
+13
-15
mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h
mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h
+27
-35
mmdet3d/ops/spconv/include/tensorview/tensorview.h
mmdet3d/ops/spconv/include/tensorview/tensorview.h
+135
-116
mmdet3d/ops/spconv/src/indice.cc
mmdet3d/ops/spconv/src/indice.cc
+34
-38
mmdet3d/ops/spconv/src/indice_cuda.cu
mmdet3d/ops/spconv/src/indice_cuda.cu
+33
-33
mmdet3d/ops/spconv/src/maxpool.cc
mmdet3d/ops/spconv/src/maxpool.cc
+6
-6
mmdet3d/ops/spconv/src/maxpool_cuda.cu
mmdet3d/ops/spconv/src/maxpool_cuda.cu
+51
-44
mmdet3d/ops/spconv/src/reordering.cc
mmdet3d/ops/spconv/src/reordering.cc
+11
-11
mmdet3d/ops/spconv/src/reordering_cuda.cu
mmdet3d/ops/spconv/src/reordering_cuda.cu
+20
-20
No files found.
mmdet3d/ops/spconv/include/spconv/maxpool.h
View file @
d1b9ae40
...
...
@@ -16,29 +16,24 @@
#define SPARSE_MAXPOOL_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseMaxPoolForwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
struct
SparseMaxPoolForwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseMaxPoolBackwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
T
>
outFeatures
,
struct
SparseMaxPoolBackwardFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
const
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
T
>
dout
,
tv
::
TensorView
<
T
>
din
,
tv
::
TensorView
<
const
T
>
dout
,
tv
::
TensorView
<
T
>
din
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
}
// namespace functor
}
// namespace spconv
}
// namespace functor
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/mp_helper.h
View file @
d1b9ae40
...
...
@@ -4,7 +4,8 @@
#include <utility>
namespace
spconv
{
template
<
class
...
T
>
struct
mp_list
{};
template
<
class
...
T
>
struct
mp_list
{};
template
<
class
T
,
T
...
I
>
using
mp_list_c
=
mp_list
<
std
::
integral_constant
<
T
,
I
>
...
>
;
...
...
@@ -16,15 +17,17 @@ constexpr F mp_for_each_impl(mp_list<T...>, F &&f) {
return
std
::
initializer_list
<
int
>
{(
f
(
T
()),
0
)...},
std
::
forward
<
F
>
(
f
);
}
template
<
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<>
,
F
&&
f
)
{
template
<
class
F
>
constexpr
F
mp_for_each_impl
(
mp_list
<>
,
F
&&
f
)
{
return
std
::
forward
<
F
>
(
f
);
}
}
// namespace detail
}
// namespace detail
namespace
detail
{
template
<
class
A
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
{
template
<
class
A
,
template
<
class
...
>
class
B
>
struct
mp_rename_impl
{
// An error "no type named 'type'" here means that the first argument to
// mp_rename is not a list
};
...
...
@@ -34,14 +37,15 @@ struct mp_rename_impl<A<T...>, B> {
using
type
=
B
<
T
...
>
;
};
}
// namespace detail
}
// namespace detail
template
<
class
A
,
template
<
class
...
>
class
B
>
using
mp_rename
=
typename
detail
::
mp_rename_impl
<
A
,
B
>::
type
;
template
<
class
L
,
class
F
>
constexpr
F
mp_for_each
(
F
&&
f
)
{
template
<
class
L
,
class
F
>
constexpr
F
mp_for_each
(
F
&&
f
)
{
return
detail
::
mp_for_each_impl
(
mp_rename
<
L
,
mp_list
>
(),
std
::
forward
<
F
>
(
f
));
}
}
// namespace spconv
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/nms.h
View file @
d1b9ae40
...
...
@@ -16,11 +16,13 @@
#define NMS_CPU_H
#include <pybind11/pybind11.h>
// must include pybind11/stl.h if using containers in STL in arguments.
#include <algorithm>
#include <boost/geometry.hpp>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <algorithm>
#include <boost/geometry.hpp>
#include <vector>
#include "box_iou.h"
#include "nms_gpu.h"
namespace
spconv
{
...
...
@@ -48,13 +50,11 @@ std::vector<int> non_max_suppression_cpu(py::array_t<DType> boxes,
DType
xx1
,
xx2
,
w
,
h
,
inter
,
ovr
;
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
i
=
order_r
(
_i
);
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
keep
.
push_back
(
i
);
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
j
=
order_r
(
_j
);
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
xx2
=
std
::
min
(
boxes_r
(
i
,
2
),
boxes_r
(
j
,
2
));
xx1
=
std
::
max
(
boxes_r
(
i
,
0
),
boxes_r
(
j
,
0
));
w
=
xx2
-
xx1
+
eps
;
...
...
@@ -65,8 +65,7 @@ std::vector<int> non_max_suppression_cpu(py::array_t<DType> boxes,
if
(
h
>
0
)
{
inter
=
w
*
h
;
ovr
=
inter
/
(
area_rw
(
i
)
+
area_rw
(
j
)
-
inter
);
if
(
ovr
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
if
(
ovr
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
}
}
}
...
...
@@ -97,15 +96,12 @@ std::vector<int> rotate_non_max_suppression_cpu(py::array_t<DType> box_corners,
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
i
=
order_r
(
_i
);
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
keep
.
push_back
(
i
);
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
j
=
order_r
(
_j
);
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
if
(
standup_iou_r
(
i
,
j
)
<=
0.0
)
continue
;
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
if
(
standup_iou_r
(
i
,
j
)
<=
0.0
)
continue
;
// std::cout << "pre_poly" << std::endl;
try
{
bg
::
append
(
poly
,
...
...
@@ -164,13 +160,12 @@ std::vector<int> rotate_non_max_suppression_cpu(py::array_t<DType> box_corners,
}
}*/
// std::cout << "post_union" << poly_union.empty() << std::endl;
if
(
!
poly_union
.
empty
())
{
// ignore invalid box
if
(
!
poly_union
.
empty
())
{
// ignore invalid box
union_area
=
bg
::
area
(
poly_union
.
front
());
// std::cout << "post union area" << std::endl;
// std::cout << union_area << "debug" << std::endl;
overlap
=
inter_area
/
union_area
;
if
(
overlap
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
if
(
overlap
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
poly_union
.
clear
();
}
}
...
...
@@ -197,5 +192,5 @@ int non_max_suppression(py::array_t<DType> boxes, py::array_t<int> keep_out,
nms_overlap_thresh
,
device_id
);
}
}
// namespace spconv
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/nms_functor.h
View file @
d1b9ae40
...
...
@@ -16,27 +16,22 @@
#define NMS_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
NonMaxSupressionFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxes
,
T
threshold
,
T
eps
);
struct
NonMaxSupressionFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxes
,
T
threshold
,
T
eps
);
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
rotateNonMaxSupressionFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxCorners
,
tv
::
TensorView
<
const
T
>
standupIoU
,
T
threshold
);
struct
rotateNonMaxSupressionFunctor
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxCorners
,
tv
::
TensorView
<
const
T
>
standupIoU
,
T
threshold
);
};
}
// namespace functor
}
// namespace spconv
}
// namespace functor
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/point2voxel.h
View file @
d1b9ae40
...
...
@@ -16,13 +16,15 @@
#include <pybind11/pybind11.h>
// must include pybind11/eigen.h if using eigen matrix as arguments.
// must include pybind11/stl.h if using containers in STL in arguments.
#include <algorithm>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <algorithm>
// #include <vector>
#include <iostream>
#include <math.h>
#include <iostream>
namespace
spconv
{
namespace
py
=
pybind11
;
using
namespace
pybind11
::
literals
;
...
...
@@ -64,13 +66,11 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
break
;
if
(
voxel_num
>=
max_voxels
)
break
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
...
@@ -87,20 +87,19 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
}
return
voxel_num
;
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
mean
s
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxel
s
,
py
::
array_t
<
DType
>
means
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
means_rw
=
means
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
...
...
@@ -131,13 +130,11 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
break
;
if
(
voxel_num
>=
max_voxels
)
break
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
...
@@ -151,14 +148,15 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
means_rw
(
voxelidx
,
k
)
+=
(
points_rw
(
i
,
k
)
-
means_rw
(
voxelidx
,
k
))
/
DType
(
num
+
1
);
means_rw
(
voxelidx
,
k
)
+=
(
points_rw
(
i
,
k
)
-
means_rw
(
voxelidx
,
k
))
/
DType
(
num
+
1
);
}
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
num
=
num_points_per_voxel_rw
(
i
);
for
(
int
j
=
num
;
j
<
max_points
;
++
j
){
for
(
int
j
=
num
;
j
<
max_points
;
++
j
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
i
,
j
,
k
)
=
means_rw
(
i
,
k
);
}
...
...
@@ -168,15 +166,12 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_height
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
int
points_to_voxel_3d_np_height
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
...
...
@@ -208,13 +203,11 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
break
;
if
(
voxel_num
>=
max_voxels
)
break
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
...
@@ -225,7 +218,8 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
height_rw
(
voxelidx
,
k
)
=
std
::
min
(
points_rw
(
i
,
k
),
height_rw
(
voxelidx
,
k
));
height_rw
(
voxelidx
,
k
)
=
std
::
min
(
points_rw
(
i
,
k
),
height_rw
(
voxelidx
,
k
));
maxs_rw
(
voxelidx
,
k
)
=
std
::
max
(
points_rw
(
i
,
k
),
maxs_rw
(
voxelidx
,
k
));
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
...
...
@@ -241,15 +235,11 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
}
template
<
typename
DType
,
int
NDim
>
int
block_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
int
>
mask
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_voxels
,
DType
eps
)
{
int
block_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
int
>
mask
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_voxels
,
DType
eps
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
1
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
1
>();
...
...
@@ -278,8 +268,7 @@ int block_filtering(py::array_t<DType> points,
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
...
...
@@ -299,30 +288,23 @@ int block_filtering(py::array_t<DType> points,
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
((
maxs_rw
(
voxelidx
)
-
height_rw
(
voxelidx
,
2
))
<
eps
){
if
((
maxs_rw
(
voxelidx
)
-
height_rw
(
voxelidx
,
2
))
<
eps
)
{
mask
(
i
)
=
0
;
}
}
}
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
DType
height_threshold
)
{
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
DType
height_threshold
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
mins_rw
=
mins
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
...
...
@@ -361,13 +343,11 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
}
coor
[
ndim_minus_1
-
j
]
=
c
;
}
if
(
failed
)
continue
;
if
(
failed
)
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
break
;
if
(
voxel_num
>=
max_voxels
)
break
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
...
@@ -381,8 +361,10 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
}
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
mins_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
min
(
points_rw
(
i
,
2
),
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]));
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]));
mins_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
min
(
points_rw
(
i
,
2
),
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]));
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]));
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
...
...
@@ -394,13 +376,15 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
min_value
=
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]);
max_value
=
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]);
startx
=
std
::
max
(
0
,
block_coor
[
0
]
-
block_size
/
2
);
stopx
=
std
::
min
(
block_shape_H
,
block_coor
[
0
]
+
block_size
-
block_size
/
2
);
starty
=
std
::
max
(
0
,
block_coor
[
1
]
-
block_size
/
2
);
stopy
=
std
::
min
(
block_shape_W
,
block_coor
[
1
]
+
block_size
-
block_size
/
2
);
startx
=
std
::
max
(
0
,
block_coor
[
0
]
-
block_size
/
2
);
stopx
=
std
::
min
(
block_shape_H
,
block_coor
[
0
]
+
block_size
-
block_size
/
2
);
starty
=
std
::
max
(
0
,
block_coor
[
1
]
-
block_size
/
2
);
stopy
=
std
::
min
(
block_shape_W
,
block_coor
[
1
]
+
block_size
-
block_size
/
2
);
for
(
int
j
=
startx
;
j
<
stopx
;
++
j
){
for
(
int
k
=
starty
;
k
<
stopy
;
++
k
){
for
(
int
j
=
startx
;
j
<
stopx
;
++
j
)
{
for
(
int
k
=
starty
;
k
<
stopy
;
++
k
)
{
min_value
=
std
::
min
(
min_value
,
mins_rw
(
j
,
k
));
max_value
=
std
::
max
(
max_value
,
maxs_rw
(
j
,
k
));
}
...
...
@@ -410,5 +394,4 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
return
voxel_num
;
}
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/include/spconv/reordering.cu.h
View file @
d1b9ae40
...
...
@@ -156,6 +156,6 @@ __global__ void scatterAddVecBlockKernel(T *outFeatures, const T *buffer,
}
}
}
// namespace spconv
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/reordering.h
View file @
d1b9ae40
...
...
@@ -16,25 +16,23 @@
#define SPARSE_REORDERING_FUNCTOR_H_
#include <tensorview/tensorview.h>
namespace
spconv
{
namespace
functor
{
namespace
spconv
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseGatherFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
struct
SparseGatherFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseScatterAddFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
out_features
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
=
false
);
struct
SparseScatterAddFunctor
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
out_features
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
=
false
);
};
}
// namespace functor
}
// namespace spconv
}
// namespace functor
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h
View file @
d1b9ae40
#pragma once
// from tensorflow
namespace
tv
{
namespace
detail
{
namespace
tv
{
namespace
detail
{
template
<
typename
T
>
class
KernelLoop
{
struct
Iterator
{
__forceinline__
__device__
Iterator
(
T
index
,
T
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
class
KernelLoop
{
struct
Iterator
{
__forceinline__
__device__
Iterator
(
T
index
,
T
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
__forceinline__
__device__
T
operator
*
()
const
{
return
index_
;
}
__forceinline__
__device__
Iterator
&
operator
++
()
{
__forceinline__
__device__
Iterator
&
operator
++
()
{
index_
+=
delta_
;
return
*
this
;
}
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
bool
greater
=
index_
>
other
.
index_
;
bool
less
=
index_
<
other
.
index_
;
// Anything past an end iterator (delta_ == 0) is equal.
// In range-based for loops, this optimizes to 'return less'.
if
(
!
other
.
delta_
)
{
if
(
!
other
.
delta_
)
{
return
less
;
}
if
(
!
delta_
)
{
if
(
!
delta_
)
{
return
greater
;
}
return
less
||
greater
;
}
private:
private:
T
index_
;
const
T
delta_
;
};
public:
public:
__forceinline__
__device__
KernelLoop
(
T
begin
,
T
delta
,
T
end
)
:
begin_
(
begin
),
delta_
(
delta
),
end_
(
end
)
{}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
end
()
const
{
return
Iterator
{
end_
,
0
};
}
private:
private:
T
begin_
;
T
delta_
;
T
end_
;
};
}
// namespace detail
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopX
(
T
count
)
{
}
// namespace detail
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopX
(
T
count
)
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopY
(
T
count
)
{
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopY
(
T
count
)
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
,
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
}
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopZ
(
T
count
)
{
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopZ
(
T
count
)
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
z
*
blockDim
.
z
+
threadIdx
.
z
,
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
}
}
// namespace tv
}
// namespace tv
mmdet3d/ops/spconv/include/tensorview/tensorview.h
View file @
d1b9ae40
...
...
@@ -13,10 +13,11 @@
// limitations under the License.
#pragma once
#include <cuda_runtime_api.h>
#include <algorithm>
#include <cassert>
#include <cstdlib>
#include <cuda_runtime_api.h>
#include <iostream>
#include <memory>
// #include <prettyprint.h>
...
...
@@ -42,22 +43,22 @@ namespace tv {
#define TV_HOST_DEVICE
#endif
#define TV_REQUIRE(expr, ...)
\
{
\
if (!(expr)) {
\
printf(__VA_ARGS__);
\
assert(expr);
\
}
\
#define TV_REQUIRE(expr, ...) \
{ \
if (!(expr)) { \
printf(__VA_ARGS__); \
assert(expr); \
} \
}
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) \
printf(__VA_ARGS__); \
assert(expr); \
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
assert(expr); \
}
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
ss
<<
val
;
}
...
...
@@ -67,37 +68,37 @@ void sstream_print(SStream &ss, T val, TArgs... args) {
sstream_print
(
ss
,
args
...);
}
#define TV_ASSERT_RT_ERR(expr, ...)
\
{
\
if (!(expr)) {
\
std::stringstream __macro_s;
\
__macro_s << __FILE__ << " " << __LINE__ << "\n";
\
__macro_s << #expr << " assert faild. ";
\
tv::sstream_print(__macro_s, __VA_ARGS__);
\
throw std::runtime_error(__macro_s.str());
\
}
\
}
#define TV_ASSERT_INVALID_ARG(expr, ...)
\
{
\
if (!(expr)) {
\
std::stringstream __macro_s;
\
__macro_s << __FILE__ << " " << __LINE__ << "\n";
\
__macro_s << #expr << " assert faild. ";
\
tv::sstream_print(__macro_s, __VA_ARGS__);
\
throw std::invalid_argument(__macro_s.str());
\
}
\
}
#define TV_CHECK_CUDA_ERR()
\
{
\
auto err = cudaGetLastError();
\
if (err != cudaSuccess) {
\
std::stringstream __macro_s;
\
__macro_s << __FILE__ << " " << __LINE__ << "\n";
\
__macro_s << "cuda execution failed with error " << err;
\
throw std::runtime_error(__macro_s.str());
\
}
\
#define TV_ASSERT_RT_ERR(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str()); \
} \
}
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{ \
if (!(expr)) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str()); \
} \
}
#define TV_CHECK_CUDA_ERR() \
{ \
auto err = cudaGetLastError(); \
if (err != cudaSuccess) { \
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << err; \
throw std::runtime_error(__macro_s.str()); \
} \
}
struct
GPU
{
...
...
@@ -130,7 +131,7 @@ constexpr size_t calc_align(size_t ndim)
*/
template
<
typename
T
,
size_t
MaxDim
=
TV_MAX_DIM
>
struct
/*alignas(calc_align<T>(MaxDim))*/
SimpleVector
{
public:
public:
TV_HOST_DEVICE_INLINE
SimpleVector
(){};
TV_HOST_DEVICE_INLINE
SimpleVector
(
std
::
initializer_list
<
T
>
q
)
{
TV_ASSERT
(
q
.
size
()
<=
MaxDim
);
...
...
@@ -187,7 +188,7 @@ public:
typedef
size_t
size_type
;
class
iterator
{
public:
public:
typedef
iterator
self_type
;
typedef
T
value_type
;
typedef
T
&
reference
;
...
...
@@ -213,12 +214,12 @@ public:
return
ptr_
!=
rhs
.
ptr_
;
}
private:
private:
pointer
ptr_
;
};
class
const_iterator
{
public:
public:
typedef
const_iterator
self_type
;
typedef
T
value_type
;
typedef
const
T
&
reference
;
...
...
@@ -244,7 +245,7 @@ public:
return
ptr_
!=
rhs
.
ptr_
;
}
private:
private:
pointer
ptr_
;
};
...
...
@@ -267,7 +268,7 @@ public:
return
const_iterator
(
mArray
+
mSize
);
}
protected:
protected:
T
mArray
[
MaxDim
];
size_t
mSize
=
0
;
};
...
...
@@ -275,11 +276,9 @@ protected:
template
<
typename
T
,
size_t
MaxDim
>
bool
operator
==
(
const
SimpleVector
<
T
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
T
,
MaxDim
>
&
rfs
)
{
if
(
lfs
.
size
()
!=
rfs
.
size
())
return
false
;
if
(
lfs
.
size
()
!=
rfs
.
size
())
return
false
;
for
(
size_t
i
=
0
;
i
<
lfs
.
size
();
++
i
)
{
if
(
lfs
[
i
]
!=
rfs
[
i
])
return
false
;
if
(
lfs
[
i
]
!=
rfs
[
i
])
return
false
;
}
return
true
;
}
...
...
@@ -287,12 +286,12 @@ bool operator==(const SimpleVector<T, MaxDim> &lfs,
template
<
typename
T
,
size_t
MaxDim
>
bool
operator
!=
(
const
SimpleVector
<
T
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
T
,
MaxDim
>
&
rfs
)
{
return
!
(
lfs
==
rfs
);
}
struct
Slice
{
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
static_assert
(
sizeof
...(
ints
)
<=
3
,
"slice init must smaller than 3"
);
SimpleVector
<
int
,
3
>
slices
{
int
(
ints
)...};
mSlices
[
0
]
=
-
1
;
...
...
@@ -333,7 +332,7 @@ struct Slice {
return
mSlices
[
idx
];
}
protected:
protected:
int
mSlices
[
3
];
};
...
...
@@ -372,8 +371,7 @@ struct ShapeBase : public SimpleVector<int, MaxDim> {
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
if
(
this
->
mSize
==
0
)
return
0
;
if
(
this
->
mSize
==
0
)
return
0
;
size_t
s
=
1
;
for
(
int
i
=
0
;
i
<
int
(
this
->
mSize
);
++
i
)
{
s
*=
this
->
mArray
[
i
];
...
...
@@ -384,16 +382,14 @@ struct ShapeBase : public SimpleVector<int, MaxDim> {
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
()
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
if
(
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
(
int
dim
)
const
{
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
}
return
shape
;
}
...
...
@@ -479,7 +475,8 @@ TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
return
index
;
}
template
<
int
N
>
struct
ArrayIndexRowMajor
{
template
<
int
N
>
struct
ArrayIndexRowMajor
{
// mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
...
...
@@ -488,7 +485,8 @@ template <int N> struct ArrayIndexRowMajor {
}
};
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
return
0
;
...
...
@@ -496,24 +494,36 @@ template <> struct ArrayIndexRowMajor<0> {
};
namespace
detail
{
template
<
typename
T
>
constexpr
const
char
*
simpleTypeName
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
template
<
typename
T
>
constexpr
const
char
*
simpleTypeName
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
return
"float32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
return
"float64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
template
<
>
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
return
"uint32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
template
<
>
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
return
"uint64"
;
}
};
// namespace detail
};
// namespace detail
template
<
typename
T
,
int
Rank
=
-
1
>
struct
TensorView
{
template
<
typename
T
,
int
Rank
=
-
1
>
struct
TensorView
{
TV_HOST_DEVICE_INLINE
TensorView
()
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
T
*
ptr
,
Shape
shape
)
:
mPtr
(
ptr
),
mShape
(
shape
)
{}
...
...
@@ -526,29 +536,28 @@ template <typename T, int Rank = -1> struct TensorView {
mShape
=
{
int
(
shapes
)...};
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
const
TensorView
<
T
,
Rank
>
&
tensor
)
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
const
TensorView
<
T
,
Rank
>
&
tensor
)
{
TV_REQUIRE
(
tensor
.
shape
()
==
shape
(),
"you must provide same input size%s"
,
"
\n
"
);
T
*
ptr
=
mPtr
;
const
T
*
other_ptr
=
tensor
.
data
();
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
*
(
ptr
++
)
=
*
(
other_ptr
++
);
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
*
(
ptr
++
)
=
*
(
other_ptr
++
);
return
*
this
;
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
std
::
initializer_list
<
T1
>
seq
)
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
std
::
initializer_list
<
T1
>
seq
)
{
TV_REQUIRE
(
seq
.
size
()
==
size
(),
"you must provide same input size%s"
,
"
\n
"
);
T
*
ptr
=
mPtr
;
for
(
const
T1
&
s
:
seq
)
*
(
ptr
++
)
=
T
(
s
);
for
(
const
T1
&
s
:
seq
)
*
(
ptr
++
)
=
T
(
s
);
return
*
this
;
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
Inds
...
inds
)
{
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
Inds
...
inds
)
{
#ifdef TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
...
...
@@ -610,7 +619,8 @@ template <typename T, int Rank = -1> struct TensorView {
return
mPtr
[
0
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
)
{
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
)
{
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
...
...
@@ -711,7 +721,8 @@ template <typename T, int Rank = -1> struct TensorView {
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
)
const
{
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
)
const
{
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
...
...
@@ -843,12 +854,12 @@ template <typename T, int Rank = -1> struct TensorView {
#endif
return mPtr[idx];
}*/
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
const
TensorView
<
T
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
const
{
TV_HOST_DEVICE_INLINE
const
TensorView
<
T
,
Rank
>
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
const
{
return
_subview
(
slice_vec
);
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
mPtr
==
nullptr
;
}
...
...
@@ -917,7 +928,7 @@ template <typename T, int Rank = -1> struct TensorView {
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
new_shape
[
i
]
=
1
;
// reduce dim
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
...
...
@@ -952,8 +963,7 @@ template <typename T, int Rank = -1> struct TensorView {
std
::
string
repr
()
const
{
std
::
ostringstream
ss
;
if
(
empty
())
return
""
;
if
(
empty
())
return
""
;
if
(
mShape
.
ndim
()
==
0
)
{
ss
<<
*
mPtr
;
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
...
...
@@ -980,14 +990,12 @@ template <typename T, int Rank = -1> struct TensorView {
print_comma
=
false
;
}
}
if
(
print_comma
&&
i
!=
this
->
size
()
-
1
)
ss
<<
", "
;
if
(
print_comma
&&
i
!=
this
->
size
()
-
1
)
ss
<<
", "
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"]"
;
}
if
(
i
!=
this
->
size
()
-
1
)
{
if
(
inc_count
!=
0
)
ss
<<
"
\n
"
;
if
(
inc_count
!=
0
)
ss
<<
"
\n
"
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"["
;
}
...
...
@@ -1000,11 +1008,11 @@ template <typename T, int Rank = -1> struct TensorView {
return
ss
.
str
();
}
protected:
protected:
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
_subview
(
SimpleVector
<
Slice
>
slice_vec
)
{
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
_subview
(
SimpleVector
<
Slice
>
slice_vec
)
{
Shape
new_shape
;
for
(
int
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
new_shape
.
push_back
(
slice_vec
[
i
][
0
]);
...
...
@@ -1022,7 +1030,7 @@ protected:
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
new_shape
[
i
]
=
1
;
// reduce dim
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
...
...
@@ -1041,7 +1049,8 @@ protected:
}
return
TensorView
<
T
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
return
Slice
{
int
(
s
),
-
1
,
-
1
};
}
...
...
@@ -1064,26 +1073,38 @@ Os &operator<<(Os &os, const TensorView<const T, Rank> &dt) {
}
namespace
detail
{
template
<
typename
T
>
constexpr
const
char
*
printfTypeFormat
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
template
<
typename
T
>
constexpr
const
char
*
printfTypeFormat
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
template
<
>
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
return
"%u"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
template
<
>
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
return
"%lu"
;
}
};
// namespace detail
};
// namespace detail
template
<
typename
T
>
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
T
>
tensor
,
const
char
*
format
)
{
if
(
tensor
.
empty
())
return
;
if
(
tensor
.
empty
())
return
;
if
(
tensor
.
ndim
()
==
0
)
{
printf
(
format
,
tensor
());
printf
(
"
\n
"
);
...
...
@@ -1108,14 +1129,12 @@ TV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,
print_comma
=
false
;
}
}
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
printf
(
", "
);
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
printf
(
", "
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"]"
);
}
if
(
i
!=
tensor
.
size
()
-
1
)
{
if
(
inc_count
!=
0
)
printf
(
"
\n
"
);
if
(
inc_count
!=
0
)
printf
(
"
\n
"
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"["
);
}
...
...
@@ -1141,4 +1160,4 @@ TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,
return
printTensorView
(
TensorView
<
const
T
>
(
ptr
,
shape
),
format
);
}
}
// namespace tv
}
// namespace tv
mmdet3d/ops/spconv/src/indice.cc
View file @
d1b9ae40
...
...
@@ -23,61 +23,57 @@ namespace functor {
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
Index
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
if
(
transpose
)
return
getIndicePairsDeConv
<
Index
,
IndexGrid
,
NDim
>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
else
return
getIndicePairsConv
<
Index
,
IndexGrid
,
NDim
>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
}
};
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateSubMIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
Index
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
return
getIndicePairsSubM
<
Index
,
IndexGrid
,
NDim
>
(
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
}
};
}
// namespace functor
#define DECLARE_CPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::CPU, Index, int, NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::CPU, Index, int, \
NDIM>;
}
// namespace functor
#define DECLARE_CPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::CPU, Index, int, \
NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::CPU, Index, int, \
NDIM>;
#define DECLARE_CPU_INDEX(Index)
\
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 1);
\
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 2);
\
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 3);
\
#define DECLARE_CPU_INDEX(Index) \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 1); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 2); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 3); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_CPU_INDEX
(
int
);
...
...
@@ -86,4 +82,4 @@ DECLARE_CPU_INDEX(long);
#undef DECLARE_CPU_INDEX
#undef DECLARE_CPU_SPECS_INDEX_NDIM
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/indice_cuda.cu
View file @
d1b9ae40
...
...
@@ -13,16 +13,17 @@
// limitations under the License.
#include <ATen/ATen.h>
#include <chrono>
#include <limits>
#include <spconv/mp_helper.h>
#include <spconv/indice.h>
#include <spconv/indice.cu.h>
#include <spconv/indice.h>
#include <spconv/mp_helper.h>
#include <tensorview/helper_launch.h>
#include <tensorview/tensorview.h>
#include <type_traits>
#include <utility/timer.h>
#include <chrono>
#include <limits>
#include <type_traits>
namespace
spconv
{
namespace
functor
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
...
...
@@ -41,21 +42,20 @@ struct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {
bool
transpose
)
{
Index
batchSize
=
gridsOut
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
if
(
numActIn
==
0
)
return
0
;
if
(
numActIn
==
0
)
return
0
;
// auto timer = spconv::CudaContextTimer<>();
if
(
transpose
)
prepareDeConvIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
else
prepareIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
// std::cout << "p1 gene time " << timer.report() / 1000.0 << std::endl;
return
1
;
...
...
@@ -75,18 +75,17 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
Index
batchSize
=
gridsOut
.
dim
(
0
);
auto
kernelVolume
=
indicePairs
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
if
(
numActIn
==
0
)
return
0
;
if
(
numActIn
==
0
)
return
0
;
Index
numAct
=
indicePairUnique
.
dim
(
0
)
-
1
;
assignGridAndIndiceOutKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numAct
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesOut
,
gridsOut
,
numAct
,
indicePairs
,
indicePairUnique
,
outSpatialShape
,
batchSize
);
indicePairUnique
,
outSpatialShape
,
batchSize
);
TV_CHECK_CUDA_ERR
();
assignIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesOut
,
gridsOut
,
numActIn
,
indicePairs
,
indicePairUnique
,
outSpatialShape
);
indicePairUnique
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
if
(
resetGrid
)
{
resetGridKernel
<
Index
,
IndexGrid
,
NDim
>
...
...
@@ -111,8 +110,7 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
if
(
numActIn
==
0
)
return
0
;
if
(
numActIn
==
0
)
return
0
;
// auto timer = spconv::CudaContextTimer<>();
prepareSubMGridKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
...
...
@@ -121,38 +119,40 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
getSubMIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
// std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl;
if
(
resetGrid
)
{
resetGridSubMKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
.
data
(),
gridsOut
,
outSpatialShape
,
numActIn
);
d
.
getStream
()
>>>
(
indicesIn
.
data
(),
gridsOut
,
outSpatialShape
,
numActIn
);
TV_CHECK_CUDA_ERR
();
}
return
numActIn
;
}
};
}
// namespace functor
}
// namespace functor
#define DECLARE_GPU_SPECS_INDEX_NDIM(Index, NDIM)
\
template struct functor::CreateConvIndicePairFunctor<tv::GPU, Index, int,
\
NDIM>;
\
template struct functor::CreateConvIndicePairFunctorP1<tv::GPU, Index, int,
\
NDIM>;
\
template struct functor::CreateConvIndicePairFunctorP2<tv::GPU, Index, int,
\
NDIM>;
\
template struct functor::CreateSubMIndicePairFunctor<tv::GPU, Index, int,
\
#define DECLARE_GPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::GPU, Index, int, \
NDIM>; \
template struct functor::CreateConvIndicePairFunctorP1<tv::GPU, Index, int, \
NDIM>; \
template struct functor::CreateConvIndicePairFunctorP2<tv::GPU, Index, int, \
NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::GPU, Index, int, \
NDIM>;
#define DECLARE_GPU_INDEX(Index)
\
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 1);
\
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 2);
\
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 3);
\
#define DECLARE_GPU_INDEX(Index) \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 1); \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 2); \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 3); \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_GPU_INDEX
(
int
);
#undef DECLARE_GPU_INDEX
#undef DECLARE_GPU_SPECS_INDEX_NDIM
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/maxpool.cc
View file @
d1b9ae40
...
...
@@ -62,14 +62,14 @@ struct SparseMaxPoolBackwardFunctor<tv::CPU, T, Index> {
}
}
};
}
// namespace functor
}
// namespace functor
#define DECLARE_CPU_SPECS_T_INDEX(T, Index)
\
template struct functor::SparseMaxPoolForwardFunctor<tv::CPU, T, Index>;
\
#define DECLARE_CPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseMaxPoolForwardFunctor<tv::CPU, T, Index>; \
template struct functor::SparseMaxPoolBackwardFunctor<tv::CPU, T, Index>;
#define DECLARE_CPU_SPECS(T)
\
DECLARE_CPU_SPECS_T_INDEX(T, int);
\
#define DECLARE_CPU_SPECS(T) \
DECLARE_CPU_SPECS_T_INDEX(T, int); \
DECLARE_CPU_SPECS_T_INDEX(T, long);
DECLARE_CPU_SPECS
(
float
);
...
...
@@ -79,4 +79,4 @@ DECLARE_CPU_SPECS(at::Half);
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPECS_T_INDEX
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/maxpool_cuda.cu
View file @
d1b9ae40
This diff is collapsed.
Click to expand it.
mmdet3d/ops/spconv/src/reordering.cc
View file @
d1b9ae40
...
...
@@ -19,7 +19,8 @@ namespace spconv {
namespace
functor
{
template
<
typename
T
,
typename
Index
>
struct
SparseGatherFunctor
<
tv
::
CPU
,
T
,
Index
>
{
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
int
numPlanes
=
features
.
dim
(
1
);
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
...
...
@@ -33,30 +34,29 @@ struct SparseGatherFunctor<tv::CPU, T, Index> {
template
<
typename
T
,
typename
Index
>
struct
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
Index
>
{
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
)
{
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
)
{
int
numPlanes
=
outFeatures
.
dim
(
1
);
const
T
*
buf
=
buffer
.
data
();
T
*
out
=
outFeatures
.
data
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
){
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
)
{
out
[
j
]
+=
buf
[
j
];
}
}
}
};
}
// namespace functor
}
// namespace functor
#define DECLARE_CPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseGatherFunctor<tv::CPU, T, Index>; \
#define DECLARE_CPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseGatherFunctor<tv::CPU, T, Index>; \
template struct functor::SparseScatterAddFunctor<tv::CPU, T, Index>;
#define DECLARE_CPU_SPECS(T)
\
DECLARE_CPU_SPECS_T_INDEX(T, int);
\
#define DECLARE_CPU_SPECS(T) \
DECLARE_CPU_SPECS_T_INDEX(T, int); \
DECLARE_CPU_SPECS_T_INDEX(T, long);
DECLARE_CPU_SPECS
(
float
);
...
...
@@ -66,4 +66,4 @@ DECLARE_CPU_SPECS(at::Half);
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPECS_T_INDEX
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/reordering_cuda.cu
View file @
d1b9ae40
This diff is collapsed.
Click to expand it.
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment