Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
f27d308f
Commit
f27d308f
authored
Jun 07, 2020
by
yinchimaoliang
Browse files
merge master
parents
c66ae813
27ebcfac
Changes
80
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1147 additions
and
435 deletions
+1147
-435
mmdet3d/ops/spconv/include/spconv/nms.h
mmdet3d/ops/spconv/include/spconv/nms.h
+13
-18
mmdet3d/ops/spconv/include/spconv/nms_functor.h
mmdet3d/ops/spconv/include/spconv/nms_functor.h
+11
-16
mmdet3d/ops/spconv/include/spconv/point2voxel.h
mmdet3d/ops/spconv/include/spconv/point2voxel.h
+60
-77
mmdet3d/ops/spconv/include/spconv/reordering.cu.h
mmdet3d/ops/spconv/include/spconv/reordering.cu.h
+1
-1
mmdet3d/ops/spconv/include/spconv/reordering.h
mmdet3d/ops/spconv/include/spconv/reordering.h
+13
-15
mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h
mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h
+27
-35
mmdet3d/ops/spconv/include/tensorview/tensorview.h
mmdet3d/ops/spconv/include/tensorview/tensorview.h
+135
-116
mmdet3d/ops/spconv/src/indice.cc
mmdet3d/ops/spconv/src/indice.cc
+34
-38
mmdet3d/ops/spconv/src/indice_cuda.cu
mmdet3d/ops/spconv/src/indice_cuda.cu
+33
-33
mmdet3d/ops/spconv/src/maxpool.cc
mmdet3d/ops/spconv/src/maxpool.cc
+6
-6
mmdet3d/ops/spconv/src/maxpool_cuda.cu
mmdet3d/ops/spconv/src/maxpool_cuda.cu
+51
-44
mmdet3d/ops/spconv/src/reordering.cc
mmdet3d/ops/spconv/src/reordering.cc
+11
-11
mmdet3d/ops/spconv/src/reordering_cuda.cu
mmdet3d/ops/spconv/src/reordering_cuda.cu
+20
-20
tests/test_heads.py
tests/test_heads.py
+84
-0
tests/test_losses.py
tests/test_losses.py
+68
-0
tests/test_nms.py
tests/test_nms.py
+57
-0
tests/test_parta2_bbox_head.py
tests/test_parta2_bbox_head.py
+486
-0
tests/test_roiaware_pool3d.py
tests/test_roiaware_pool3d.py
+28
-1
tests/test_vote_module.py
tests/test_vote_module.py
+8
-3
tools/data_converter/create_gt_database.py
tools/data_converter/create_gt_database.py
+1
-1
No files found.
mmdet3d/ops/spconv/include/spconv/nms.h
View file @
f27d308f
...
@@ -16,11 +16,13 @@
...
@@ -16,11 +16,13 @@
#define NMS_CPU_H
#define NMS_CPU_H
#include <pybind11/pybind11.h>
#include <pybind11/pybind11.h>
// must include pybind11/stl.h if using containers in STL in arguments.
// must include pybind11/stl.h if using containers in STL in arguments.
#include <algorithm>
#include <boost/geometry.hpp>
#include <pybind11/numpy.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <pybind11/stl.h>
#include <algorithm>
#include <boost/geometry.hpp>
#include <vector>
#include <vector>
#include "box_iou.h"
#include "box_iou.h"
#include "nms_gpu.h"
#include "nms_gpu.h"
namespace
spconv
{
namespace
spconv
{
...
@@ -48,13 +50,11 @@ std::vector<int> non_max_suppression_cpu(py::array_t<DType> boxes,
...
@@ -48,13 +50,11 @@ std::vector<int> non_max_suppression_cpu(py::array_t<DType> boxes,
DType
xx1
,
xx2
,
w
,
h
,
inter
,
ovr
;
DType
xx1
,
xx2
,
w
,
h
,
inter
,
ovr
;
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
i
=
order_r
(
_i
);
i
=
order_r
(
_i
);
if
(
suppressed_rw
(
i
)
==
1
)
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
continue
;
keep
.
push_back
(
i
);
keep
.
push_back
(
i
);
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
j
=
order_r
(
_j
);
j
=
order_r
(
_j
);
if
(
suppressed_rw
(
j
)
==
1
)
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
continue
;
xx2
=
std
::
min
(
boxes_r
(
i
,
2
),
boxes_r
(
j
,
2
));
xx2
=
std
::
min
(
boxes_r
(
i
,
2
),
boxes_r
(
j
,
2
));
xx1
=
std
::
max
(
boxes_r
(
i
,
0
),
boxes_r
(
j
,
0
));
xx1
=
std
::
max
(
boxes_r
(
i
,
0
),
boxes_r
(
j
,
0
));
w
=
xx2
-
xx1
+
eps
;
w
=
xx2
-
xx1
+
eps
;
...
@@ -65,8 +65,7 @@ std::vector<int> non_max_suppression_cpu(py::array_t<DType> boxes,
...
@@ -65,8 +65,7 @@ std::vector<int> non_max_suppression_cpu(py::array_t<DType> boxes,
if
(
h
>
0
)
{
if
(
h
>
0
)
{
inter
=
w
*
h
;
inter
=
w
*
h
;
ovr
=
inter
/
(
area_rw
(
i
)
+
area_rw
(
j
)
-
inter
);
ovr
=
inter
/
(
area_rw
(
i
)
+
area_rw
(
j
)
-
inter
);
if
(
ovr
>=
thresh
)
if
(
ovr
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
suppressed_rw
(
j
)
=
1
;
}
}
}
}
}
}
...
@@ -97,15 +96,12 @@ std::vector<int> rotate_non_max_suppression_cpu(py::array_t<DType> box_corners,
...
@@ -97,15 +96,12 @@ std::vector<int> rotate_non_max_suppression_cpu(py::array_t<DType> box_corners,
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
for
(
int
_i
=
0
;
_i
<
ndets
;
++
_i
)
{
i
=
order_r
(
_i
);
i
=
order_r
(
_i
);
if
(
suppressed_rw
(
i
)
==
1
)
if
(
suppressed_rw
(
i
)
==
1
)
continue
;
continue
;
keep
.
push_back
(
i
);
keep
.
push_back
(
i
);
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
for
(
int
_j
=
_i
+
1
;
_j
<
ndets
;
++
_j
)
{
j
=
order_r
(
_j
);
j
=
order_r
(
_j
);
if
(
suppressed_rw
(
j
)
==
1
)
if
(
suppressed_rw
(
j
)
==
1
)
continue
;
continue
;
if
(
standup_iou_r
(
i
,
j
)
<=
0.0
)
continue
;
if
(
standup_iou_r
(
i
,
j
)
<=
0.0
)
continue
;
// std::cout << "pre_poly" << std::endl;
// std::cout << "pre_poly" << std::endl;
try
{
try
{
bg
::
append
(
poly
,
bg
::
append
(
poly
,
...
@@ -164,13 +160,12 @@ std::vector<int> rotate_non_max_suppression_cpu(py::array_t<DType> box_corners,
...
@@ -164,13 +160,12 @@ std::vector<int> rotate_non_max_suppression_cpu(py::array_t<DType> box_corners,
}
}
}*/
}*/
// std::cout << "post_union" << poly_union.empty() << std::endl;
// std::cout << "post_union" << poly_union.empty() << std::endl;
if
(
!
poly_union
.
empty
())
{
// ignore invalid box
if
(
!
poly_union
.
empty
())
{
// ignore invalid box
union_area
=
bg
::
area
(
poly_union
.
front
());
union_area
=
bg
::
area
(
poly_union
.
front
());
// std::cout << "post union area" << std::endl;
// std::cout << "post union area" << std::endl;
// std::cout << union_area << "debug" << std::endl;
// std::cout << union_area << "debug" << std::endl;
overlap
=
inter_area
/
union_area
;
overlap
=
inter_area
/
union_area
;
if
(
overlap
>=
thresh
)
if
(
overlap
>=
thresh
)
suppressed_rw
(
j
)
=
1
;
suppressed_rw
(
j
)
=
1
;
poly_union
.
clear
();
poly_union
.
clear
();
}
}
}
}
...
@@ -197,5 +192,5 @@ int non_max_suppression(py::array_t<DType> boxes, py::array_t<int> keep_out,
...
@@ -197,5 +192,5 @@ int non_max_suppression(py::array_t<DType> boxes, py::array_t<int> keep_out,
nms_overlap_thresh
,
device_id
);
nms_overlap_thresh
,
device_id
);
}
}
}
// namespace spconv
}
// namespace spconv
#endif
#endif
mmdet3d/ops/spconv/include/spconv/nms_functor.h
View file @
f27d308f
...
@@ -16,27 +16,22 @@
...
@@ -16,27 +16,22 @@
#define NMS_FUNCTOR_H_
#define NMS_FUNCTOR_H_
#include <tensorview/tensorview.h>
#include <tensorview/tensorview.h>
namespace
spconv
namespace
spconv
{
{
namespace
functor
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
NonMaxSupressionFunctor
struct
NonMaxSupressionFunctor
{
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxes
,
T
threshold
,
T
eps
);
tv
::
TensorView
<
const
T
>
boxes
,
T
threshold
,
T
eps
);
};
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
rotateNonMaxSupressionFunctor
struct
rotateNonMaxSupressionFunctor
{
{
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
Index
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
Index
>
keep
,
tv
::
TensorView
<
const
T
>
boxCorners
,
tv
::
TensorView
<
const
T
>
boxCorners
,
tv
::
TensorView
<
const
T
>
standupIoU
,
T
threshold
);
tv
::
TensorView
<
const
T
>
standupIoU
,
T
threshold
);
};
};
}
// namespace functor
}
// namespace functor
}
// namespace spconv
}
// namespace spconv
#endif
#endif
mmdet3d/ops/spconv/include/spconv/point2voxel.h
View file @
f27d308f
...
@@ -16,13 +16,15 @@
...
@@ -16,13 +16,15 @@
#include <pybind11/pybind11.h>
#include <pybind11/pybind11.h>
// must include pybind11/eigen.h if using eigen matrix as arguments.
// must include pybind11/eigen.h if using eigen matrix as arguments.
// must include pybind11/stl.h if using containers in STL in arguments.
// must include pybind11/stl.h if using containers in STL in arguments.
#include <algorithm>
#include <pybind11/numpy.h>
#include <pybind11/numpy.h>
#include <pybind11/stl.h>
#include <pybind11/stl.h>
#include <algorithm>
// #include <vector>
// #include <vector>
#include <iostream>
#include <math.h>
#include <math.h>
#include <iostream>
namespace
spconv
{
namespace
spconv
{
namespace
py
=
pybind11
;
namespace
py
=
pybind11
;
using
namespace
pybind11
::
literals
;
using
namespace
pybind11
::
literals
;
...
@@ -64,13 +66,11 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
...
@@ -64,13 +66,11 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
}
}
coor
[
ndim_minus_1
-
j
]
=
c
;
coor
[
ndim_minus_1
-
j
]
=
c
;
}
}
if
(
failed
)
if
(
failed
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
if
(
voxel_num
>=
max_voxels
)
break
;
break
;
voxel_num
+=
1
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
@@ -87,20 +87,19 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
...
@@ -87,20 +87,19 @@ int points_to_voxel_3d_np(py::array_t<DType> points, py::array_t<DType> voxels,
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
}
}
return
voxel_num
;
return
voxel_num
;
}
}
template
<
typename
DType
,
int
NDim
>
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
int
points_to_voxel_3d_np_mean
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
mean
s
,
py
::
array_t
<
DType
>
voxel
s
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
DType
>
means
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
means_rw
=
means
.
template
mutable_unchecked
<
2
>();
auto
means_rw
=
means
.
template
mutable_unchecked
<
2
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
auto
voxels_rw
=
voxels
.
template
mutable_unchecked
<
3
>();
...
@@ -131,13 +130,11 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
...
@@ -131,13 +130,11 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
}
}
coor
[
ndim_minus_1
-
j
]
=
c
;
coor
[
ndim_minus_1
-
j
]
=
c
;
}
}
if
(
failed
)
if
(
failed
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
if
(
voxel_num
>=
max_voxels
)
break
;
break
;
voxel_num
+=
1
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
@@ -151,14 +148,15 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
...
@@ -151,14 +148,15 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
}
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
means_rw
(
voxelidx
,
k
)
+=
(
points_rw
(
i
,
k
)
-
means_rw
(
voxelidx
,
k
))
/
DType
(
num
+
1
);
means_rw
(
voxelidx
,
k
)
+=
(
points_rw
(
i
,
k
)
-
means_rw
(
voxelidx
,
k
))
/
DType
(
num
+
1
);
}
}
}
}
}
}
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
voxel_num
;
++
i
)
{
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
coor_to_voxelidx_rw
(
coors_rw
(
i
,
0
),
coors_rw
(
i
,
1
),
coors_rw
(
i
,
2
))
=
-
1
;
num
=
num_points_per_voxel_rw
(
i
);
num
=
num_points_per_voxel_rw
(
i
);
for
(
int
j
=
num
;
j
<
max_points
;
++
j
){
for
(
int
j
=
num
;
j
<
max_points
;
++
j
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
i
,
j
,
k
)
=
means_rw
(
i
,
k
);
voxels_rw
(
i
,
j
,
k
)
=
means_rw
(
i
,
k
);
}
}
...
@@ -168,15 +166,12 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
...
@@ -168,15 +166,12 @@ int points_to_voxel_3d_np_mean(py::array_t<DType> points, py::array_t<DType> vox
}
}
template
<
typename
DType
,
int
NDim
>
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_np_height
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
int
points_to_voxel_3d_np_height
(
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
py
::
array_t
<
int
>
num_points_per_voxel
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
int
max_points
,
int
max_voxels
)
{
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
...
@@ -208,13 +203,11 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
...
@@ -208,13 +203,11 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
}
}
coor
[
ndim_minus_1
-
j
]
=
c
;
coor
[
ndim_minus_1
-
j
]
=
c
;
}
}
if
(
failed
)
if
(
failed
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
if
(
voxel_num
>=
max_voxels
)
break
;
break
;
voxel_num
+=
1
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
@@ -225,7 +218,8 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
...
@@ -225,7 +218,8 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
if
(
num
<
max_points
)
{
if
(
num
<
max_points
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
voxels_rw
(
voxelidx
,
num
,
k
)
=
points_rw
(
i
,
k
);
height_rw
(
voxelidx
,
k
)
=
std
::
min
(
points_rw
(
i
,
k
),
height_rw
(
voxelidx
,
k
));
height_rw
(
voxelidx
,
k
)
=
std
::
min
(
points_rw
(
i
,
k
),
height_rw
(
voxelidx
,
k
));
maxs_rw
(
voxelidx
,
k
)
=
std
::
max
(
points_rw
(
i
,
k
),
maxs_rw
(
voxelidx
,
k
));
maxs_rw
(
voxelidx
,
k
)
=
std
::
max
(
points_rw
(
i
,
k
),
maxs_rw
(
voxelidx
,
k
));
}
}
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
...
@@ -241,15 +235,11 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
...
@@ -241,15 +235,11 @@ int points_to_voxel_3d_np_height(py::array_t<DType> points, py::array_t<DType> v
}
}
template
<
typename
DType
,
int
NDim
>
template
<
typename
DType
,
int
NDim
>
int
block_filtering
(
py
::
array_t
<
DType
>
points
,
int
block_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
int
>
mask
,
py
::
array_t
<
int
>
mask
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
DType
>
height
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
py
::
array_t
<
DType
>
maxs
,
std
::
vector
<
DType
>
voxel_size
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
std
::
vector
<
DType
>
coors_range
,
int
max_voxels
,
DType
eps
)
{
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
int
max_voxels
,
DType
eps
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
1
>();
auto
height_rw
=
height
.
template
mutable_unchecked
<
1
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
1
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
1
>();
...
@@ -278,8 +268,7 @@ int block_filtering(py::array_t<DType> points,
...
@@ -278,8 +268,7 @@ int block_filtering(py::array_t<DType> points,
}
}
coor
[
ndim_minus_1
-
j
]
=
c
;
coor
[
ndim_minus_1
-
j
]
=
c
;
}
}
if
(
failed
)
if
(
failed
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxelidx
=
voxel_num
;
...
@@ -299,30 +288,23 @@ int block_filtering(py::array_t<DType> points,
...
@@ -299,30 +288,23 @@ int block_filtering(py::array_t<DType> points,
}
}
coor
[
ndim_minus_1
-
j
]
=
c
;
coor
[
ndim_minus_1
-
j
]
=
c
;
}
}
if
(
failed
)
if
(
failed
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
((
maxs_rw
(
voxelidx
)
-
height_rw
(
voxelidx
,
2
))
<
eps
){
if
((
maxs_rw
(
voxelidx
)
-
height_rw
(
voxelidx
,
2
))
<
eps
)
{
mask
(
i
)
=
0
;
mask
(
i
)
=
0
;
}
}
}
}
}
}
template
<
typename
DType
,
int
NDim
>
template
<
typename
DType
,
int
NDim
>
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
int
points_to_voxel_3d_with_filtering
(
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
points
,
py
::
array_t
<
DType
>
voxels
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
int
>
voxel_mask
,
py
::
array_t
<
DType
>
mins
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
DType
>
maxs
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
coors
,
py
::
array_t
<
int
>
num_points_per_voxel
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
py
::
array_t
<
int
>
num_points_per_voxel
,
std
::
vector
<
DType
>
voxel_size
,
std
::
vector
<
DType
>
coors_range
,
py
::
array_t
<
int
>
coor_to_voxelidx
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
std
::
vector
<
DType
>
voxel_size
,
DType
height_threshold
)
{
std
::
vector
<
DType
>
coors_range
,
int
max_points
,
int
max_voxels
,
int
block_factor
,
int
block_size
,
DType
height_threshold
)
{
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
points_rw
=
points
.
template
mutable_unchecked
<
2
>();
auto
mins_rw
=
mins
.
template
mutable_unchecked
<
2
>();
auto
mins_rw
=
mins
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
auto
maxs_rw
=
maxs
.
template
mutable_unchecked
<
2
>();
...
@@ -361,13 +343,11 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
...
@@ -361,13 +343,11 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
}
}
coor
[
ndim_minus_1
-
j
]
=
c
;
coor
[
ndim_minus_1
-
j
]
=
c
;
}
}
if
(
failed
)
if
(
failed
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
voxelidx
=
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
]);
if
(
voxelidx
==
-
1
)
{
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxelidx
=
voxel_num
;
if
(
voxel_num
>=
max_voxels
)
if
(
voxel_num
>=
max_voxels
)
break
;
break
;
voxel_num
+=
1
;
voxel_num
+=
1
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
coor_to_voxelidx_rw
(
coor
[
0
],
coor
[
1
],
coor
[
2
])
=
voxelidx
;
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
...
@@ -381,8 +361,10 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
...
@@ -381,8 +361,10 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
}
}
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
0
]
=
coor
[
1
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
mins_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
min
(
points_rw
(
i
,
2
),
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]));
mins_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]));
std
::
min
(
points_rw
(
i
,
2
),
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]));
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
])
=
std
::
max
(
points_rw
(
i
,
2
),
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]));
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
num_points_per_voxel_rw
(
voxelidx
)
+=
1
;
}
}
}
}
...
@@ -394,13 +376,15 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
...
@@ -394,13 +376,15 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
block_coor
[
1
]
=
coor
[
2
]
/
block_factor
;
min_value
=
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]);
min_value
=
mins_rw
(
block_coor
[
0
],
block_coor
[
1
]);
max_value
=
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]);
max_value
=
maxs_rw
(
block_coor
[
0
],
block_coor
[
1
]);
startx
=
std
::
max
(
0
,
block_coor
[
0
]
-
block_size
/
2
);
startx
=
std
::
max
(
0
,
block_coor
[
0
]
-
block_size
/
2
);
stopx
=
std
::
min
(
block_shape_H
,
block_coor
[
0
]
+
block_size
-
block_size
/
2
);
stopx
=
starty
=
std
::
max
(
0
,
block_coor
[
1
]
-
block_size
/
2
);
std
::
min
(
block_shape_H
,
block_coor
[
0
]
+
block_size
-
block_size
/
2
);
stopy
=
std
::
min
(
block_shape_W
,
block_coor
[
1
]
+
block_size
-
block_size
/
2
);
starty
=
std
::
max
(
0
,
block_coor
[
1
]
-
block_size
/
2
);
stopy
=
std
::
min
(
block_shape_W
,
block_coor
[
1
]
+
block_size
-
block_size
/
2
);
for
(
int
j
=
startx
;
j
<
stopx
;
++
j
){
for
(
int
j
=
startx
;
j
<
stopx
;
++
j
)
{
for
(
int
k
=
starty
;
k
<
stopy
;
++
k
){
for
(
int
k
=
starty
;
k
<
stopy
;
++
k
)
{
min_value
=
std
::
min
(
min_value
,
mins_rw
(
j
,
k
));
min_value
=
std
::
min
(
min_value
,
mins_rw
(
j
,
k
));
max_value
=
std
::
max
(
max_value
,
maxs_rw
(
j
,
k
));
max_value
=
std
::
max
(
max_value
,
maxs_rw
(
j
,
k
));
}
}
...
@@ -410,5 +394,4 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
...
@@ -410,5 +394,4 @@ int points_to_voxel_3d_with_filtering(py::array_t<DType> points, py::array_t<DTy
return
voxel_num
;
return
voxel_num
;
}
}
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/include/spconv/reordering.cu.h
View file @
f27d308f
...
@@ -156,6 +156,6 @@ __global__ void scatterAddVecBlockKernel(T *outFeatures, const T *buffer,
...
@@ -156,6 +156,6 @@ __global__ void scatterAddVecBlockKernel(T *outFeatures, const T *buffer,
}
}
}
}
}
// namespace spconv
}
// namespace spconv
#endif
#endif
mmdet3d/ops/spconv/include/spconv/reordering.h
View file @
f27d308f
...
@@ -16,25 +16,23 @@
...
@@ -16,25 +16,23 @@
#define SPARSE_REORDERING_FUNCTOR_H_
#define SPARSE_REORDERING_FUNCTOR_H_
#include <tensorview/tensorview.h>
#include <tensorview/tensorview.h>
namespace
spconv
namespace
spconv
{
{
namespace
functor
{
namespace
functor
{
template
<
typename
Device
,
typename
T
,
typename
Index
>
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseGatherFunctor
struct
SparseGatherFunctor
{
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
buffer
,
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
tv
::
TensorView
<
const
Index
>
indices
,
int
size
);
};
};
template
<
typename
Device
,
typename
T
,
typename
Index
>
template
<
typename
Device
,
typename
T
,
typename
Index
>
struct
SparseScatterAddFunctor
struct
SparseScatterAddFunctor
{
{
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
out_features
,
void
operator
()(
const
Device
&
d
,
tv
::
TensorView
<
T
>
out_features
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
int
size
,
bool
stable
=
false
);
bool
stable
=
false
);
};
};
}
// namespace functor
}
// namespace functor
}
// namespace spconv
}
// namespace spconv
#endif
#endif
mmdet3d/ops/spconv/include/tensorview/helper_kernel.cu.h
View file @
f27d308f
#pragma once
#pragma once
// from tensorflow
// from tensorflow
namespace
tv
namespace
tv
{
{
namespace
detail
{
namespace
detail
{
template
<
typename
T
>
template
<
typename
T
>
class
KernelLoop
class
KernelLoop
{
{
struct
Iterator
{
struct
Iterator
__forceinline__
__device__
Iterator
(
T
index
,
T
delta
)
{
:
index_
(
index
),
delta_
(
delta
)
{}
__forceinline__
__device__
Iterator
(
T
index
,
T
delta
)
:
index_
(
index
),
delta_
(
delta
)
{}
__forceinline__
__device__
T
operator
*
()
const
{
return
index_
;
}
__forceinline__
__device__
T
operator
*
()
const
{
return
index_
;
}
__forceinline__
__device__
Iterator
&
operator
++
()
__forceinline__
__device__
Iterator
&
operator
++
()
{
{
index_
+=
delta_
;
index_
+=
delta_
;
return
*
this
;
return
*
this
;
}
}
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
__forceinline__
__device__
bool
operator
!=
(
const
Iterator
&
other
)
const
{
{
bool
greater
=
index_
>
other
.
index_
;
bool
greater
=
index_
>
other
.
index_
;
bool
less
=
index_
<
other
.
index_
;
bool
less
=
index_
<
other
.
index_
;
// Anything past an end iterator (delta_ == 0) is equal.
// Anything past an end iterator (delta_ == 0) is equal.
// In range-based for loops, this optimizes to 'return less'.
// In range-based for loops, this optimizes to 'return less'.
if
(
!
other
.
delta_
)
if
(
!
other
.
delta_
)
{
{
return
less
;
return
less
;
}
}
if
(
!
delta_
)
if
(
!
delta_
)
{
{
return
greater
;
return
greater
;
}
}
return
less
||
greater
;
return
less
||
greater
;
}
}
private:
private:
T
index_
;
T
index_
;
const
T
delta_
;
const
T
delta_
;
};
};
public:
public:
__forceinline__
__device__
KernelLoop
(
T
begin
,
T
delta
,
T
end
)
__forceinline__
__device__
KernelLoop
(
T
begin
,
T
delta
,
T
end
)
:
begin_
(
begin
),
delta_
(
delta
),
end_
(
end
)
{}
:
begin_
(
begin
),
delta_
(
delta
),
end_
(
end
)
{}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
begin
()
const
{
return
Iterator
{
begin_
,
delta_
};
}
__forceinline__
__device__
Iterator
end
()
const
{
return
Iterator
{
end_
,
0
};
}
__forceinline__
__device__
Iterator
end
()
const
{
return
Iterator
{
end_
,
0
};
}
private:
private:
T
begin_
;
T
begin_
;
T
delta_
;
T
delta_
;
T
end_
;
T
end_
;
};
};
}
// namespace detail
}
// namespace detail
template
<
typename
T
,
int
NumILP
=
1
>
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopX
(
T
count
)
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopX
(
T
count
)
{
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
,
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
gridDim
.
x
*
blockDim
.
x
*
NumILP
,
count
);
}
}
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Helper to visit indices in the range 0 <= i < count using the y-coordinate.
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
// Usage: for(int i : KernelLoopY(count)) { visit(i); }
template
<
typename
T
,
int
NumILP
=
1
>
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopY
(
T
count
)
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopY
(
T
count
)
{
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
,
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
,
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
gridDim
.
y
*
blockDim
.
y
*
NumILP
,
count
);
}
}
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Helper to visit indices in the range 0 <= i < count using the z-coordinate.
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
// Usage: for(int i : KernelLoopZ(count)) { visit(i); }
template
<
typename
T
,
int
NumILP
=
1
>
template
<
typename
T
,
int
NumILP
=
1
>
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopZ
(
T
count
)
__forceinline__
__device__
detail
::
KernelLoop
<
T
>
KernelLoopZ
(
T
count
)
{
{
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
z
*
blockDim
.
z
+
threadIdx
.
z
,
return
detail
::
KernelLoop
<
T
>
(
blockIdx
.
z
*
blockDim
.
z
+
threadIdx
.
z
,
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
gridDim
.
z
*
blockDim
.
z
*
NumILP
,
count
);
}
}
}
// namespace tv
}
// namespace tv
mmdet3d/ops/spconv/include/tensorview/tensorview.h
View file @
f27d308f
...
@@ -13,10 +13,11 @@
...
@@ -13,10 +13,11 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <cuda_runtime_api.h>
#include <algorithm>
#include <algorithm>
#include <cassert>
#include <cassert>
#include <cstdlib>
#include <cstdlib>
#include <cuda_runtime_api.h>
#include <iostream>
#include <iostream>
#include <memory>
#include <memory>
// #include <prettyprint.h>
// #include <prettyprint.h>
...
@@ -42,22 +43,22 @@ namespace tv {
...
@@ -42,22 +43,22 @@ namespace tv {
#define TV_HOST_DEVICE
#define TV_HOST_DEVICE
#endif
#endif
#define TV_REQUIRE(expr, ...)
\
#define TV_REQUIRE(expr, ...) \
{
\
{ \
if (!(expr)) {
\
if (!(expr)) { \
printf(__VA_ARGS__);
\
printf(__VA_ARGS__); \
assert(expr);
\
assert(expr); \
}
\
} \
}
}
#define TV_DEVICE_REQUIRE(expr, ...) \
#define TV_DEVICE_REQUIRE(expr, ...) \
{ \
{ \
if (!(expr) && threadIdx.x == 0) \
if (!(expr) && threadIdx.x == 0) printf(__VA_ARGS__); \
printf(__VA_ARGS__); \
assert(expr); \
assert(expr); \
}
}
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
template
<
class
SStream
,
class
T
>
void
sstream_print
(
SStream
&
ss
,
T
val
)
{
ss
<<
val
;
ss
<<
val
;
}
}
...
@@ -67,37 +68,37 @@ void sstream_print(SStream &ss, T val, TArgs... args) {
...
@@ -67,37 +68,37 @@ void sstream_print(SStream &ss, T val, TArgs... args) {
sstream_print
(
ss
,
args
...);
sstream_print
(
ss
,
args
...);
}
}
#define TV_ASSERT_RT_ERR(expr, ...)
\
#define TV_ASSERT_RT_ERR(expr, ...) \
{
\
{ \
if (!(expr)) {
\
if (!(expr)) { \
std::stringstream __macro_s;
\
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n";
\
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. ";
\
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__);
\
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::runtime_error(__macro_s.str());
\
throw std::runtime_error(__macro_s.str()); \
}
\
} \
}
}
#define TV_ASSERT_INVALID_ARG(expr, ...)
\
#define TV_ASSERT_INVALID_ARG(expr, ...) \
{
\
{ \
if (!(expr)) {
\
if (!(expr)) { \
std::stringstream __macro_s;
\
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n";
\
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << #expr << " assert faild. ";
\
__macro_s << #expr << " assert faild. "; \
tv::sstream_print(__macro_s, __VA_ARGS__);
\
tv::sstream_print(__macro_s, __VA_ARGS__); \
throw std::invalid_argument(__macro_s.str());
\
throw std::invalid_argument(__macro_s.str()); \
}
\
} \
}
}
#define TV_CHECK_CUDA_ERR()
\
#define TV_CHECK_CUDA_ERR() \
{
\
{ \
auto err = cudaGetLastError();
\
auto err = cudaGetLastError(); \
if (err != cudaSuccess) {
\
if (err != cudaSuccess) { \
std::stringstream __macro_s;
\
std::stringstream __macro_s; \
__macro_s << __FILE__ << " " << __LINE__ << "\n";
\
__macro_s << __FILE__ << " " << __LINE__ << "\n"; \
__macro_s << "cuda execution failed with error " << err;
\
__macro_s << "cuda execution failed with error " << err; \
throw std::runtime_error(__macro_s.str());
\
throw std::runtime_error(__macro_s.str()); \
}
\
} \
}
}
struct
GPU
{
struct
GPU
{
...
@@ -130,7 +131,7 @@ constexpr size_t calc_align(size_t ndim)
...
@@ -130,7 +131,7 @@ constexpr size_t calc_align(size_t ndim)
*/
*/
template
<
typename
T
,
size_t
MaxDim
=
TV_MAX_DIM
>
template
<
typename
T
,
size_t
MaxDim
=
TV_MAX_DIM
>
struct
/*alignas(calc_align<T>(MaxDim))*/
SimpleVector
{
struct
/*alignas(calc_align<T>(MaxDim))*/
SimpleVector
{
public:
public:
TV_HOST_DEVICE_INLINE
SimpleVector
(){};
TV_HOST_DEVICE_INLINE
SimpleVector
(){};
TV_HOST_DEVICE_INLINE
SimpleVector
(
std
::
initializer_list
<
T
>
q
)
{
TV_HOST_DEVICE_INLINE
SimpleVector
(
std
::
initializer_list
<
T
>
q
)
{
TV_ASSERT
(
q
.
size
()
<=
MaxDim
);
TV_ASSERT
(
q
.
size
()
<=
MaxDim
);
...
@@ -187,7 +188,7 @@ public:
...
@@ -187,7 +188,7 @@ public:
typedef
size_t
size_type
;
typedef
size_t
size_type
;
class
iterator
{
class
iterator
{
public:
public:
typedef
iterator
self_type
;
typedef
iterator
self_type
;
typedef
T
value_type
;
typedef
T
value_type
;
typedef
T
&
reference
;
typedef
T
&
reference
;
...
@@ -213,12 +214,12 @@ public:
...
@@ -213,12 +214,12 @@ public:
return
ptr_
!=
rhs
.
ptr_
;
return
ptr_
!=
rhs
.
ptr_
;
}
}
private:
private:
pointer
ptr_
;
pointer
ptr_
;
};
};
class
const_iterator
{
class
const_iterator
{
public:
public:
typedef
const_iterator
self_type
;
typedef
const_iterator
self_type
;
typedef
T
value_type
;
typedef
T
value_type
;
typedef
const
T
&
reference
;
typedef
const
T
&
reference
;
...
@@ -244,7 +245,7 @@ public:
...
@@ -244,7 +245,7 @@ public:
return
ptr_
!=
rhs
.
ptr_
;
return
ptr_
!=
rhs
.
ptr_
;
}
}
private:
private:
pointer
ptr_
;
pointer
ptr_
;
};
};
...
@@ -267,7 +268,7 @@ public:
...
@@ -267,7 +268,7 @@ public:
return
const_iterator
(
mArray
+
mSize
);
return
const_iterator
(
mArray
+
mSize
);
}
}
protected:
protected:
T
mArray
[
MaxDim
];
T
mArray
[
MaxDim
];
size_t
mSize
=
0
;
size_t
mSize
=
0
;
};
};
...
@@ -275,11 +276,9 @@ protected:
...
@@ -275,11 +276,9 @@ protected:
template
<
typename
T
,
size_t
MaxDim
>
template
<
typename
T
,
size_t
MaxDim
>
bool
operator
==
(
const
SimpleVector
<
T
,
MaxDim
>
&
lfs
,
bool
operator
==
(
const
SimpleVector
<
T
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
T
,
MaxDim
>
&
rfs
)
{
const
SimpleVector
<
T
,
MaxDim
>
&
rfs
)
{
if
(
lfs
.
size
()
!=
rfs
.
size
())
if
(
lfs
.
size
()
!=
rfs
.
size
())
return
false
;
return
false
;
for
(
size_t
i
=
0
;
i
<
lfs
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
lfs
.
size
();
++
i
)
{
if
(
lfs
[
i
]
!=
rfs
[
i
])
if
(
lfs
[
i
]
!=
rfs
[
i
])
return
false
;
return
false
;
}
}
return
true
;
return
true
;
}
}
...
@@ -287,12 +286,12 @@ bool operator==(const SimpleVector<T, MaxDim> &lfs,
...
@@ -287,12 +286,12 @@ bool operator==(const SimpleVector<T, MaxDim> &lfs,
template
<
typename
T
,
size_t
MaxDim
>
template
<
typename
T
,
size_t
MaxDim
>
bool
operator
!=
(
const
SimpleVector
<
T
,
MaxDim
>
&
lfs
,
bool
operator
!=
(
const
SimpleVector
<
T
,
MaxDim
>
&
lfs
,
const
SimpleVector
<
T
,
MaxDim
>
&
rfs
)
{
const
SimpleVector
<
T
,
MaxDim
>
&
rfs
)
{
return
!
(
lfs
==
rfs
);
return
!
(
lfs
==
rfs
);
}
}
struct
Slice
{
struct
Slice
{
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
template
<
class
...
Integers
>
TV_HOST_DEVICE_INLINE
Slice
(
Integers
...
ints
)
{
static_assert
(
sizeof
...(
ints
)
<=
3
,
"slice init must smaller than 3"
);
static_assert
(
sizeof
...(
ints
)
<=
3
,
"slice init must smaller than 3"
);
SimpleVector
<
int
,
3
>
slices
{
int
(
ints
)...};
SimpleVector
<
int
,
3
>
slices
{
int
(
ints
)...};
mSlices
[
0
]
=
-
1
;
mSlices
[
0
]
=
-
1
;
...
@@ -333,7 +332,7 @@ struct Slice {
...
@@ -333,7 +332,7 @@ struct Slice {
return
mSlices
[
idx
];
return
mSlices
[
idx
];
}
}
protected:
protected:
int
mSlices
[
3
];
int
mSlices
[
3
];
};
};
...
@@ -372,8 +371,7 @@ struct ShapeBase : public SimpleVector<int, MaxDim> {
...
@@ -372,8 +371,7 @@ struct ShapeBase : public SimpleVector<int, MaxDim> {
}
}
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
TV_HOST_DEVICE_INLINE
size_t
size
()
const
{
if
(
this
->
mSize
==
0
)
if
(
this
->
mSize
==
0
)
return
0
;
return
0
;
size_t
s
=
1
;
size_t
s
=
1
;
for
(
int
i
=
0
;
i
<
int
(
this
->
mSize
);
++
i
)
{
for
(
int
i
=
0
;
i
<
int
(
this
->
mSize
);
++
i
)
{
s
*=
this
->
mArray
[
i
];
s
*=
this
->
mArray
[
i
];
...
@@ -384,16 +382,14 @@ struct ShapeBase : public SimpleVector<int, MaxDim> {
...
@@ -384,16 +382,14 @@ struct ShapeBase : public SimpleVector<int, MaxDim> {
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
()
const
{
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
()
const
{
ShapeBase
<
MaxDim
>
shape
;
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
this
->
mArray
[
i
]
!=
1
)
if
(
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
shape
.
push_back
(
this
->
mArray
[
i
]);
}
}
return
shape
;
return
shape
;
}
}
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
(
int
dim
)
const
{
TV_HOST_DEVICE_INLINE
ShapeBase
<
MaxDim
>
squeeze
(
int
dim
)
const
{
ShapeBase
<
MaxDim
>
shape
;
ShapeBase
<
MaxDim
>
shape
;
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
this
->
mSize
;
++
i
)
{
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
if
(
i
!=
dim
||
this
->
mArray
[
i
]
!=
1
)
shape
.
push_back
(
this
->
mArray
[
i
]);
shape
.
push_back
(
this
->
mArray
[
i
]);
}
}
return
shape
;
return
shape
;
}
}
...
@@ -479,7 +475,8 @@ TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
...
@@ -479,7 +475,8 @@ TV_HOST_DEVICE_INLINE Index rowArrayIdxInv(Index index, Index *output,
return
index
;
return
index
;
}
}
template
<
int
N
>
struct
ArrayIndexRowMajor
{
template
<
int
N
>
struct
ArrayIndexRowMajor
{
// mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
// mPtr[((i1 * mShape[1] + i2) * mShape[2] + i3) * mShape[3] + i4];
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
const
Shape
&
indexes
)
{
...
@@ -488,7 +485,8 @@ template <int N> struct ArrayIndexRowMajor {
...
@@ -488,7 +485,8 @@ template <int N> struct ArrayIndexRowMajor {
}
}
};
};
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
template
<
>
struct
ArrayIndexRowMajor
<
0
>
{
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
TV_HOST_DEVICE_INLINE
static
unsigned
run
(
const
Shape
&
shape
,
const
Shape
&
indexes
)
{
const
Shape
&
indexes
)
{
return
0
;
return
0
;
...
@@ -496,24 +494,36 @@ template <> struct ArrayIndexRowMajor<0> {
...
@@ -496,24 +494,36 @@ template <> struct ArrayIndexRowMajor<0> {
};
};
namespace
detail
{
namespace
detail
{
template
<
typename
T
>
constexpr
const
char
*
simpleTypeName
(
T
val
=
T
());
template
<
typename
T
>
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
constexpr
const
char
*
simpleTypeName
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
simpleTypeName
(
float
val
)
{
return
"float32"
;
return
"float32"
;
}
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
template
<
>
constexpr
const
char
*
simpleTypeName
(
double
val
)
{
return
"float64"
;
return
"float64"
;
}
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
constexpr
const
char
*
simpleTypeName
(
int
val
)
{
return
"int32"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
val
)
{
return
"uint32"
;
return
"uint32"
;
}
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
constexpr
const
char
*
simpleTypeName
(
long
val
)
{
return
"int64"
;
}
template
<
>
constexpr
const
char
*
simpleTypeName
(
unsigned
long
val
)
{
return
"uint64"
;
return
"uint64"
;
}
}
};
// namespace detail
};
// namespace detail
template
<
typename
T
,
int
Rank
=
-
1
>
struct
TensorView
{
template
<
typename
T
,
int
Rank
=
-
1
>
struct
TensorView
{
TV_HOST_DEVICE_INLINE
TensorView
()
{}
TV_HOST_DEVICE_INLINE
TensorView
()
{}
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
T
*
ptr
,
Shape
shape
)
explicit
TV_HOST_DEVICE_INLINE
TensorView
(
T
*
ptr
,
Shape
shape
)
:
mPtr
(
ptr
),
mShape
(
shape
)
{}
:
mPtr
(
ptr
),
mShape
(
shape
)
{}
...
@@ -526,29 +536,28 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -526,29 +536,28 @@ template <typename T, int Rank = -1> struct TensorView {
mShape
=
{
int
(
shapes
)...};
mShape
=
{
int
(
shapes
)...};
}
}
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
assign
(
const
TensorView
<
T
,
Rank
>
&
tensor
)
{
const
TensorView
<
T
,
Rank
>
&
tensor
)
{
TV_REQUIRE
(
tensor
.
shape
()
==
shape
(),
"you must provide same input size%s"
,
TV_REQUIRE
(
tensor
.
shape
()
==
shape
(),
"you must provide same input size%s"
,
"
\n
"
);
"
\n
"
);
T
*
ptr
=
mPtr
;
T
*
ptr
=
mPtr
;
const
T
*
other_ptr
=
tensor
.
data
();
const
T
*
other_ptr
=
tensor
.
data
();
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
for
(
size_t
i
=
0
;
i
<
size
();
++
i
)
*
(
ptr
++
)
=
*
(
other_ptr
++
);
*
(
ptr
++
)
=
*
(
other_ptr
++
);
return
*
this
;
return
*
this
;
}
}
template
<
typename
T1
>
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
&
assign
(
assign
(
std
::
initializer_list
<
T1
>
seq
)
{
std
::
initializer_list
<
T1
>
seq
)
{
TV_REQUIRE
(
seq
.
size
()
==
size
(),
"you must provide same input size%s"
,
TV_REQUIRE
(
seq
.
size
()
==
size
(),
"you must provide same input size%s"
,
"
\n
"
);
"
\n
"
);
T
*
ptr
=
mPtr
;
T
*
ptr
=
mPtr
;
for
(
const
T1
&
s
:
seq
)
for
(
const
T1
&
s
:
seq
)
*
(
ptr
++
)
=
T
(
s
);
*
(
ptr
++
)
=
T
(
s
);
return
*
this
;
return
*
this
;
}
}
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
Inds
...
inds
)
{
template
<
class
...
Inds
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
Inds
...
inds
)
{
#ifdef TV_DEBUG
#ifdef TV_DEBUG
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
int
idxes
[
sizeof
...(
Inds
)]{
int
(
inds
)...};
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
TV_REQUIRE
(
sizeof
...(
inds
)
==
mShape
.
ndim
(),
...
@@ -610,7 +619,8 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -610,7 +619,8 @@ template <typename T, int Rank = -1> struct TensorView {
return
mPtr
[
0
];
return
mPtr
[
0
];
}
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
)
{
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
T
&
operator
()(
T1
i1
)
{
#if defined TV_DEBUG
#if defined TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
...
@@ -711,7 +721,8 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -711,7 +721,8 @@ template <typename T, int Rank = -1> struct TensorView {
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
return
mPtr
[((
i1
*
mShape
[
1
]
+
i2
)
*
mShape
[
2
]
+
i3
)
*
mShape
[
3
]
+
i4
];
}
}
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
)
const
{
template
<
class
T1
>
TV_HOST_DEVICE_INLINE
const
T
&
operator
()(
T1
i1
)
const
{
#ifdef TV_DEBUG
#ifdef TV_DEBUG
#if defined(__CUDA_ARCH__)
#if defined(__CUDA_ARCH__)
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
TV_DEVICE_REQUIRE
(
mShape
.
ndim
()
==
1
,
...
@@ -843,12 +854,12 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -843,12 +854,12 @@ template <typename T, int Rank = -1> struct TensorView {
#endif
#endif
return mPtr[idx];
return mPtr[idx];
}*/
}*/
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
operator
[](
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
{
SimpleVector
<
Slice
>
slice_vec
)
{
return
_subview
(
slice_vec
);
return
_subview
(
slice_vec
);
}
}
TV_HOST_DEVICE_INLINE
const
TensorView
<
T
,
Rank
>
TV_HOST_DEVICE_INLINE
const
TensorView
<
T
,
Rank
>
operator
[](
operator
[](
SimpleVector
<
Slice
>
slice_vec
)
const
{
SimpleVector
<
Slice
>
slice_vec
)
const
{
return
_subview
(
slice_vec
);
return
_subview
(
slice_vec
);
}
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
mPtr
==
nullptr
;
}
TV_HOST_DEVICE_INLINE
bool
empty
()
const
{
return
mPtr
==
nullptr
;
}
...
@@ -917,7 +928,7 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -917,7 +928,7 @@ template <typename T, int Rank = -1> struct TensorView {
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
new_shape
[
i
]
=
1
;
// reduce dim
}
}
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
...
@@ -952,8 +963,7 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -952,8 +963,7 @@ template <typename T, int Rank = -1> struct TensorView {
std
::
string
repr
()
const
{
std
::
string
repr
()
const
{
std
::
ostringstream
ss
;
std
::
ostringstream
ss
;
if
(
empty
())
if
(
empty
())
return
""
;
return
""
;
if
(
mShape
.
ndim
()
==
0
)
{
if
(
mShape
.
ndim
()
==
0
)
{
ss
<<
*
mPtr
;
ss
<<
*
mPtr
;
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
// ss << fmt::format("\nTensor: shape={}, dtype={}", mShape,
...
@@ -980,14 +990,12 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -980,14 +990,12 @@ template <typename T, int Rank = -1> struct TensorView {
print_comma
=
false
;
print_comma
=
false
;
}
}
}
}
if
(
print_comma
&&
i
!=
this
->
size
()
-
1
)
if
(
print_comma
&&
i
!=
this
->
size
()
-
1
)
ss
<<
", "
;
ss
<<
", "
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"]"
;
ss
<<
"]"
;
}
}
if
(
i
!=
this
->
size
()
-
1
)
{
if
(
i
!=
this
->
size
()
-
1
)
{
if
(
inc_count
!=
0
)
if
(
inc_count
!=
0
)
ss
<<
"
\n
"
;
ss
<<
"
\n
"
;
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
ss
<<
"["
;
ss
<<
"["
;
}
}
...
@@ -1000,11 +1008,11 @@ template <typename T, int Rank = -1> struct TensorView {
...
@@ -1000,11 +1008,11 @@ template <typename T, int Rank = -1> struct TensorView {
return
ss
.
str
();
return
ss
.
str
();
}
}
protected:
protected:
// TODO: make this function public.
// TODO: make this function public.
// currently this function is called unexpectedly when using subview({0, 0}).
// currently this function is called unexpectedly when using subview({0, 0}).
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
TV_HOST_DEVICE_INLINE
TensorView
<
T
,
Rank
>
_subview
(
_subview
(
SimpleVector
<
Slice
>
slice_vec
)
{
SimpleVector
<
Slice
>
slice_vec
)
{
Shape
new_shape
;
Shape
new_shape
;
for
(
int
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
for
(
int
i
=
0
;
i
<
slice_vec
.
size
();
++
i
)
{
new_shape
.
push_back
(
slice_vec
[
i
][
0
]);
new_shape
.
push_back
(
slice_vec
[
i
][
0
]);
...
@@ -1022,7 +1030,7 @@ protected:
...
@@ -1022,7 +1030,7 @@ protected:
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
new_shape
[
i
]
=
slice_vec
[
i
][
1
]
-
slice_vec
[
i
][
0
];
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
TV_ASSERT
(
new_shape
[
i
]
>=
0
);
}
else
{
}
else
{
new_shape
[
i
]
=
1
;
// reduce dim
new_shape
[
i
]
=
1
;
// reduce dim
}
}
}
}
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
auto
offset
=
rowArrayIdx
(
mShape
,
start
);
...
@@ -1041,7 +1049,8 @@ protected:
...
@@ -1041,7 +1049,8 @@ protected:
}
}
return
TensorView
<
T
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
return
TensorView
<
T
,
Rank
>
(
mPtr
+
offset
,
reduced_shape
);
}
}
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
template
<
typename
T1
>
TV_HOST_DEVICE_INLINE
Slice
to_slice
(
T1
s
)
const
{
return
Slice
{
int
(
s
),
-
1
,
-
1
};
return
Slice
{
int
(
s
),
-
1
,
-
1
};
}
}
...
@@ -1064,26 +1073,38 @@ Os &operator<<(Os &os, const TensorView<const T, Rank> &dt) {
...
@@ -1064,26 +1073,38 @@ Os &operator<<(Os &os, const TensorView<const T, Rank> &dt) {
}
}
namespace
detail
{
namespace
detail
{
template
<
typename
T
>
constexpr
const
char
*
printfTypeFormat
(
T
val
=
T
());
template
<
typename
T
>
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
constexpr
const
char
*
printfTypeFormat
(
T
val
=
T
());
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
template
<
>
constexpr
const
char
*
printfTypeFormat
(
float
val
)
{
return
"%.2f"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
double
val
)
{
return
"%.2f"
;
return
"%.2f"
;
}
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
constexpr
const
char
*
printfTypeFormat
(
int
val
)
{
return
"%d"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
val
)
{
return
"%u"
;
return
"%u"
;
}
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
constexpr
const
char
*
printfTypeFormat
(
long
val
)
{
return
"%ld"
;
}
template
<
>
constexpr
const
char
*
printfTypeFormat
(
unsigned
long
val
)
{
return
"%lu"
;
return
"%lu"
;
}
}
};
// namespace detail
};
// namespace detail
template
<
typename
T
>
template
<
typename
T
>
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
T
>
tensor
,
TV_HOST_DEVICE
void
printTensorView
(
const
TensorView
<
T
>
tensor
,
const
char
*
format
)
{
const
char
*
format
)
{
if
(
tensor
.
empty
())
if
(
tensor
.
empty
())
return
;
return
;
if
(
tensor
.
ndim
()
==
0
)
{
if
(
tensor
.
ndim
()
==
0
)
{
printf
(
format
,
tensor
());
printf
(
format
,
tensor
());
printf
(
"
\n
"
);
printf
(
"
\n
"
);
...
@@ -1108,14 +1129,12 @@ TV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,
...
@@ -1108,14 +1129,12 @@ TV_HOST_DEVICE void printTensorView(const TensorView<T> tensor,
print_comma
=
false
;
print_comma
=
false
;
}
}
}
}
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
if
(
print_comma
&&
i
!=
tensor
.
size
()
-
1
)
printf
(
", "
);
printf
(
", "
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"]"
);
printf
(
"]"
);
}
}
if
(
i
!=
tensor
.
size
()
-
1
)
{
if
(
i
!=
tensor
.
size
()
-
1
)
{
if
(
inc_count
!=
0
)
if
(
inc_count
!=
0
)
printf
(
"
\n
"
);
printf
(
"
\n
"
);
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
for
(
int
j
=
0
;
j
<
inc_count
;
++
j
)
{
printf
(
"["
);
printf
(
"["
);
}
}
...
@@ -1141,4 +1160,4 @@ TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,
...
@@ -1141,4 +1160,4 @@ TV_HOST_DEVICE void printTensorView(const T *ptr, Shape shape,
return
printTensorView
(
TensorView
<
const
T
>
(
ptr
,
shape
),
format
);
return
printTensorView
(
TensorView
<
const
T
>
(
ptr
,
shape
),
format
);
}
}
}
// namespace tv
}
// namespace tv
mmdet3d/ops/spconv/src/indice.cc
View file @
f27d308f
...
@@ -23,61 +23,57 @@ namespace functor {
...
@@ -23,61 +23,57 @@ namespace functor {
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateConvIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
struct
CreateConvIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
Index
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
Index
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
Index
>
indicesOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
bool
transpose
,
bool
resetGrid
)
{
if
(
transpose
)
if
(
transpose
)
return
getIndicePairsDeConv
<
Index
,
IndexGrid
,
NDim
>
(
return
getIndicePairsDeConv
<
Index
,
IndexGrid
,
NDim
>
(
indicesIn
,
indicesOut
,
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
outSpatialShape
.
data
());
else
else
return
getIndicePairsConv
<
Index
,
IndexGrid
,
NDim
>
(
return
getIndicePairsConv
<
Index
,
IndexGrid
,
NDim
>
(
indicesIn
,
indicesOut
,
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
outSpatialShape
.
data
());
}
}
};
};
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
struct
CreateSubMIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
struct
CreateSubMIndicePairFunctor
<
tv
::
CPU
,
Index
,
IndexGrid
,
NDim
>
{
Index
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
Index
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
const
Index
>
indicesIn
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
IndexGrid
>
gridsOut
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indicePairs
,
tv
::
TensorView
<
Index
>
indiceNum
,
tv
::
TensorView
<
Index
>
indiceNum
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
kernelSize
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
stride
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
padding
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
dilation
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
bool
transpose
,
bool
resetGrid
)
{
return
getIndicePairsSubM
<
Index
,
IndexGrid
,
NDim
>
(
return
getIndicePairsSubM
<
Index
,
IndexGrid
,
NDim
>
(
indicesIn
,
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
.
data
(),
gridsOut
,
indicePairs
,
indiceNum
,
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
kernelSize
.
data
(),
stride
.
data
(),
padding
.
data
(),
dilation
.
data
(),
outSpatialShape
.
data
());
}
}
};
};
}
// namespace functor
}
// namespace functor
#define DECLARE_CPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::CPU, Index, int, NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::CPU, Index, int, \
NDIM>;
#define DECLARE_CPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::CPU, Index, int, \
NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::CPU, Index, int, \
NDIM>;
#define DECLARE_CPU_INDEX(Index)
\
#define DECLARE_CPU_INDEX(Index) \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 1);
\
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 1); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 2);
\
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 2); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 3);
\
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 3); \
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_CPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_CPU_INDEX
(
int
);
DECLARE_CPU_INDEX
(
int
);
...
@@ -86,4 +82,4 @@ DECLARE_CPU_INDEX(long);
...
@@ -86,4 +82,4 @@ DECLARE_CPU_INDEX(long);
#undef DECLARE_CPU_INDEX
#undef DECLARE_CPU_INDEX
#undef DECLARE_CPU_SPECS_INDEX_NDIM
#undef DECLARE_CPU_SPECS_INDEX_NDIM
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/indice_cuda.cu
View file @
f27d308f
...
@@ -13,16 +13,17 @@
...
@@ -13,16 +13,17 @@
// limitations under the License.
// limitations under the License.
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <chrono>
#include <limits>
#include <spconv/mp_helper.h>
#include <spconv/indice.h>
#include <spconv/indice.cu.h>
#include <spconv/indice.cu.h>
#include <spconv/indice.h>
#include <spconv/mp_helper.h>
#include <tensorview/helper_launch.h>
#include <tensorview/helper_launch.h>
#include <tensorview/tensorview.h>
#include <tensorview/tensorview.h>
#include <type_traits>
#include <utility/timer.h>
#include <utility/timer.h>
#include <chrono>
#include <limits>
#include <type_traits>
namespace
spconv
{
namespace
spconv
{
namespace
functor
{
namespace
functor
{
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
template
<
typename
Index
,
typename
IndexGrid
,
unsigned
NDim
>
...
@@ -41,21 +42,20 @@ struct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {
...
@@ -41,21 +42,20 @@ struct CreateConvIndicePairFunctorP1<tv::GPU, Index, IndexGrid, NDim> {
bool
transpose
)
{
bool
transpose
)
{
Index
batchSize
=
gridsOut
.
dim
(
0
);
Index
batchSize
=
gridsOut
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
if
(
numActIn
==
0
)
if
(
numActIn
==
0
)
return
0
;
return
0
;
// auto timer = spconv::CudaContextTimer<>();
// auto timer = spconv::CudaContextTimer<>();
if
(
transpose
)
if
(
transpose
)
prepareDeConvIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
prepareDeConvIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
d
.
getStream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
padding
,
dilation
,
outSpatialShape
);
else
else
prepareIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
prepareIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
d
.
getStream
()
>>>
(
indicesIn
,
indicesOut
,
gridsOut
,
indicePairs
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
indiceNum
,
indicePairUnique
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
padding
,
dilation
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
// std::cout << "p1 gene time " << timer.report() / 1000.0 << std::endl;
// std::cout << "p1 gene time " << timer.report() / 1000.0 << std::endl;
return
1
;
return
1
;
...
@@ -75,18 +75,17 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
...
@@ -75,18 +75,17 @@ struct CreateConvIndicePairFunctorP2<tv::GPU, Index, IndexGrid, NDim> {
Index
batchSize
=
gridsOut
.
dim
(
0
);
Index
batchSize
=
gridsOut
.
dim
(
0
);
auto
kernelVolume
=
indicePairs
.
dim
(
0
);
auto
kernelVolume
=
indicePairs
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
if
(
numActIn
==
0
)
if
(
numActIn
==
0
)
return
0
;
return
0
;
Index
numAct
=
indicePairUnique
.
dim
(
0
)
-
1
;
Index
numAct
=
indicePairUnique
.
dim
(
0
)
-
1
;
assignGridAndIndiceOutKernel
<
Index
,
IndexGrid
,
NDim
>
assignGridAndIndiceOutKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numAct
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numAct
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesOut
,
gridsOut
,
numAct
,
indicePairs
,
d
.
getStream
()
>>>
(
indicesOut
,
gridsOut
,
numAct
,
indicePairs
,
indicePairUnique
,
outSpatialShape
,
batchSize
);
indicePairUnique
,
outSpatialShape
,
batchSize
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
assignIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
>
assignIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesOut
,
gridsOut
,
numActIn
,
indicePairs
,
d
.
getStream
()
>>>
(
indicesOut
,
gridsOut
,
numActIn
,
indicePairs
,
indicePairUnique
,
outSpatialShape
);
indicePairUnique
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
if
(
resetGrid
)
{
if
(
resetGrid
)
{
resetGridKernel
<
Index
,
IndexGrid
,
NDim
>
resetGridKernel
<
Index
,
IndexGrid
,
NDim
>
...
@@ -111,8 +110,7 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
...
@@ -111,8 +110,7 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
const
tv
::
SimpleVector
<
Index
,
NDim
>
outSpatialShape
,
bool
transpose
,
bool
resetGrid
)
{
bool
transpose
,
bool
resetGrid
)
{
auto
numActIn
=
indicesIn
.
dim
(
0
);
auto
numActIn
=
indicesIn
.
dim
(
0
);
if
(
numActIn
==
0
)
if
(
numActIn
==
0
)
return
0
;
return
0
;
// auto timer = spconv::CudaContextTimer<>();
// auto timer = spconv::CudaContextTimer<>();
prepareSubMGridKernel
<
Index
,
IndexGrid
,
NDim
>
prepareSubMGridKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
...
@@ -121,38 +119,40 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
...
@@ -121,38 +119,40 @@ struct CreateSubMIndicePairFunctor<tv::GPU, Index, IndexGrid, NDim> {
getSubMIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
getSubMIndicePairsKernel
<
Index
,
IndexGrid
,
NDim
,
4096
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
d
.
getStream
()
>>>
(
indicesIn
,
gridsOut
,
indicePairs
,
indiceNum
,
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
kernelSize
,
stride
,
padding
,
dilation
,
outSpatialShape
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
// std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl;
// std::cout << "subm gene time " << timer.report() / 1000.0 << std::endl;
if
(
resetGrid
)
{
if
(
resetGrid
)
{
resetGridSubMKernel
<
Index
,
IndexGrid
,
NDim
>
resetGridSubMKernel
<
Index
,
IndexGrid
,
NDim
>
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
<<<
tv
::
launch
::
getBlocks
(
numActIn
),
tv
::
launch
::
CUDA_NUM_THREADS
,
0
,
d
.
getStream
()
>>>
(
indicesIn
.
data
(),
gridsOut
,
outSpatialShape
,
numActIn
);
d
.
getStream
()
>>>
(
indicesIn
.
data
(),
gridsOut
,
outSpatialShape
,
numActIn
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
return
numActIn
;
return
numActIn
;
}
}
};
};
}
// namespace functor
}
// namespace functor
#define DECLARE_GPU_SPECS_INDEX_NDIM(Index, NDIM)
\
#define DECLARE_GPU_SPECS_INDEX_NDIM(Index, NDIM) \
template struct functor::CreateConvIndicePairFunctor<tv::GPU, Index, int,
\
template struct functor::CreateConvIndicePairFunctor<tv::GPU, Index, int, \
NDIM>;
\
NDIM>; \
template struct functor::CreateConvIndicePairFunctorP1<tv::GPU, Index, int,
\
template struct functor::CreateConvIndicePairFunctorP1<tv::GPU, Index, int, \
NDIM>;
\
NDIM>; \
template struct functor::CreateConvIndicePairFunctorP2<tv::GPU, Index, int,
\
template struct functor::CreateConvIndicePairFunctorP2<tv::GPU, Index, int, \
NDIM>;
\
NDIM>; \
template struct functor::CreateSubMIndicePairFunctor<tv::GPU, Index, int,
\
template struct functor::CreateSubMIndicePairFunctor<tv::GPU, Index, int, \
NDIM>;
NDIM>;
#define DECLARE_GPU_INDEX(Index)
\
#define DECLARE_GPU_INDEX(Index) \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 1);
\
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 1); \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 2);
\
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 2); \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 3);
\
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 3); \
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_GPU_SPECS_INDEX_NDIM(Index, 4);
DECLARE_GPU_INDEX
(
int
);
DECLARE_GPU_INDEX
(
int
);
#undef DECLARE_GPU_INDEX
#undef DECLARE_GPU_INDEX
#undef DECLARE_GPU_SPECS_INDEX_NDIM
#undef DECLARE_GPU_SPECS_INDEX_NDIM
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/maxpool.cc
View file @
f27d308f
...
@@ -62,14 +62,14 @@ struct SparseMaxPoolBackwardFunctor<tv::CPU, T, Index> {
...
@@ -62,14 +62,14 @@ struct SparseMaxPoolBackwardFunctor<tv::CPU, T, Index> {
}
}
}
}
};
};
}
// namespace functor
}
// namespace functor
#define DECLARE_CPU_SPECS_T_INDEX(T, Index)
\
#define DECLARE_CPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseMaxPoolForwardFunctor<tv::CPU, T, Index>;
\
template struct functor::SparseMaxPoolForwardFunctor<tv::CPU, T, Index>; \
template struct functor::SparseMaxPoolBackwardFunctor<tv::CPU, T, Index>;
template struct functor::SparseMaxPoolBackwardFunctor<tv::CPU, T, Index>;
#define DECLARE_CPU_SPECS(T)
\
#define DECLARE_CPU_SPECS(T) \
DECLARE_CPU_SPECS_T_INDEX(T, int);
\
DECLARE_CPU_SPECS_T_INDEX(T, int); \
DECLARE_CPU_SPECS_T_INDEX(T, long);
DECLARE_CPU_SPECS_T_INDEX(T, long);
DECLARE_CPU_SPECS
(
float
);
DECLARE_CPU_SPECS
(
float
);
...
@@ -79,4 +79,4 @@ DECLARE_CPU_SPECS(at::Half);
...
@@ -79,4 +79,4 @@ DECLARE_CPU_SPECS(at::Half);
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPECS_T_INDEX
#undef DECLARE_CPU_SPECS_T_INDEX
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/maxpool_cuda.cu
View file @
f27d308f
...
@@ -13,13 +13,14 @@
...
@@ -13,13 +13,14 @@
// limitations under the License.
// limitations under the License.
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <chrono>
#include <limits>
#include <spconv/maxpool.h>
#include <spconv/maxpool.h>
#include <spconv/mp_helper.h>
#include <spconv/mp_helper.h>
#include <tensorview/helper_kernel.cu.h>
#include <tensorview/helper_kernel.cu.h>
#include <tensorview/helper_launch.h>
#include <tensorview/helper_launch.h>
#include <tensorview/tensorview.h>
#include <tensorview/tensorview.h>
#include <chrono>
#include <limits>
#include <type_traits>
#include <type_traits>
namespace
spconv
{
namespace
spconv
{
...
@@ -54,10 +55,11 @@ __global__ void maxPoolFwdBlockKernel(T *outFeatures, const T *inFeatures,
...
@@ -54,10 +55,11 @@ __global__ void maxPoolFwdBlockKernel(T *outFeatures, const T *inFeatures,
}
}
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
>
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
>
__global__
void
__global__
void
maxPoolFwdGenericBlockKernel
(
T
*
outFeatures
,
maxPoolFwdGenericBlockKernel
(
T
*
outFeatures
,
const
T
*
inFeatures
,
const
T
*
inFeatures
,
const
Index
*
indicesIn
,
const
Index
*
indicesOut
,
const
Index
*
indicesIn
,
int
numHot
,
int
numPlanes
)
{
const
Index
*
indicesOut
,
int
numHot
,
int
numPlanes
)
{
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
int
ILPStrideX
[
NumILP
];
int
ILPStrideX
[
NumILP
];
Index
RI
[
NumILP
];
Index
RI
[
NumILP
];
...
@@ -160,10 +162,11 @@ __global__ void maxPoolFwdGenericKernel(T *outFeatures, const T *inFeatures,
...
@@ -160,10 +162,11 @@ __global__ void maxPoolFwdGenericKernel(T *outFeatures, const T *inFeatures,
}
}
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
>
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
>
__global__
void
__global__
void
maxPoolBwdBlockKernel
(
const
T
*
outFeatures
,
const
T
*
inFeatures
,
maxPoolBwdBlockKernel
(
const
T
*
outFeatures
,
const
T
*
inFeatures
,
const
T
*
dout
,
const
T
*
dout
,
T
*
din
,
T
*
din
,
const
Index
*
indicesIn
,
const
Index
*
indicesOut
,
const
Index
*
indicesIn
,
int
numHot
,
int
numPlanes
)
{
const
Index
*
indicesOut
,
int
numHot
,
int
numPlanes
)
{
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
T
in
,
out
;
T
in
,
out
;
Index
idxo
,
idxi
;
Index
idxo
,
idxi
;
...
@@ -226,10 +229,11 @@ __global__ void maxPoolBwdGenericBlockKernel(const T *outFeatures,
...
@@ -226,10 +229,11 @@ __global__ void maxPoolBwdGenericBlockKernel(const T *outFeatures,
}
}
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
,
typename
VecType
>
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
,
typename
VecType
>
__global__
void
__global__
void
maxPoolBwdVecBlockKernel
(
const
T
*
outFeatures
,
maxPoolBwdVecBlockKernel
(
const
T
*
outFeatures
,
const
T
*
inFeatures
,
const
T
*
inFeatures
,
const
T
*
dout
,
const
T
*
dout
,
T
*
din
,
const
Index
*
indicesIn
,
T
*
din
,
const
Index
*
indicesIn
,
const
Index
*
indicesOut
,
int
numHot
,
int
numPlanes
)
{
const
Index
*
indicesOut
,
int
numHot
,
int
numPlanes
)
{
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
int
ILPStrideY
[
NumILP
];
int
ILPStrideY
[
NumILP
];
constexpr
int
vecloadFactor
=
sizeof
(
VecType
)
/
sizeof
(
T
);
constexpr
int
vecloadFactor
=
sizeof
(
VecType
)
/
sizeof
(
T
);
...
@@ -255,7 +259,8 @@ maxPoolBwdVecBlockKernel(const T *outFeatures, const T *inFeatures,
...
@@ -255,7 +259,8 @@ maxPoolBwdVecBlockKernel(const T *outFeatures, const T *inFeatures,
reinterpret_cast
<
const
VecType
*>
(
inFeatures
)[
idxi
];
reinterpret_cast
<
const
VecType
*>
(
inFeatures
)[
idxi
];
reinterpret_cast
<
VecType
*>
(
bufdo
)[
0
]
=
reinterpret_cast
<
VecType
*>
(
bufdo
)[
0
]
=
reinterpret_cast
<
const
VecType
*>
(
dout
)[
idxo
];
reinterpret_cast
<
const
VecType
*>
(
dout
)[
idxo
];
reinterpret_cast
<
VecType
*>
(
bufdi
)[
0
]
=
reinterpret_cast
<
VecType
*>
(
din
)[
idxi
];
reinterpret_cast
<
VecType
*>
(
bufdi
)[
0
]
=
reinterpret_cast
<
VecType
*>
(
din
)[
idxi
];
#pragma unroll
#pragma unroll
for
(
int
i
=
0
;
i
<
vecloadFactor
;
i
++
)
{
for
(
int
i
=
0
;
i
<
vecloadFactor
;
i
++
)
{
...
@@ -263,16 +268,18 @@ maxPoolBwdVecBlockKernel(const T *outFeatures, const T *inFeatures,
...
@@ -263,16 +268,18 @@ maxPoolBwdVecBlockKernel(const T *outFeatures, const T *inFeatures,
bufdi
[
i
]
+=
bufdo
[
i
];
bufdi
[
i
]
+=
bufdo
[
i
];
}
}
}
}
reinterpret_cast
<
VecType
*>
(
din
)[
idxi
]
=
reinterpret_cast
<
VecType
*>
(
bufdi
)[
0
];
reinterpret_cast
<
VecType
*>
(
din
)[
idxi
]
=
reinterpret_cast
<
VecType
*>
(
bufdi
)[
0
];
}
}
}
}
}
}
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
>
template
<
typename
T
,
typename
Index
,
int
NumTLP
,
int
NumILP
>
__global__
void
__global__
void
maxPoolBwdGenericKernel
(
const
T
*
outFeatures
,
maxPoolBwdGenericKernel
(
const
T
*
outFeatures
,
const
T
*
inFeatures
,
const
T
*
inFeatures
,
const
T
*
dout
,
const
T
*
dout
,
T
*
din
,
const
Index
*
indicesIn
,
T
*
din
,
const
Index
*
indicesIn
,
const
Index
*
indicesOut
,
int
numHot
,
int
numPlanes
)
{
const
Index
*
indicesOut
,
int
numHot
,
int
numPlanes
)
{
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
// see http://www.nvidia.com/content/GTC-2010/pdfs/2238_GTC2010.pdf.
int
ILPStrideX
[
NumILP
];
int
ILPStrideX
[
NumILP
];
Index
RI
[
NumILP
];
Index
RI
[
NumILP
];
...
@@ -313,8 +320,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
...
@@ -313,8 +320,7 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
void
operator
()(
const
tv
::
GPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
void
operator
()(
const
tv
::
GPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
if
(
size
<=
0
)
if
(
size
<=
0
)
return
;
return
;
int
numPlanes
=
inFeatures
.
dim
(
1
);
int
numPlanes
=
inFeatures
.
dim
(
1
);
bool
notFound
=
true
;
bool
notFound
=
true
;
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
...
@@ -326,13 +332,14 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
...
@@ -326,13 +332,14 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
if
(
notFound
)
{
if
(
notFound
)
{
if
(
numPlanes
%
NumTLP
==
0
)
{
if
(
numPlanes
%
NumTLP
==
0
)
{
if
(
numHotBlock
>=
NumTLP
)
{
if
(
numHotBlock
>=
NumTLP
)
{
maxPoolFwdVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
maxPoolFwdVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
std
::
min
(
size
/
NumTLP
,
512
),
numPlanes
/
NumTLP
),
<<<
dim3
(
std
::
min
(
size
/
NumTLP
,
512
),
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
1
).
data
(),
numHotBlock
,
indices
.
subview
(
1
).
data
(),
numHotBlock
,
numPlanes
/
vecloadFactor
);
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
...
@@ -340,9 +347,9 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
...
@@ -340,9 +347,9 @@ struct SparseMaxPoolForwardFunctor<tv::GPU, T, Index> {
maxPoolFwdGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
maxPoolFwdGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
size
-
numHotBlock
,
numPlanes
);
size
-
numHotBlock
,
numPlanes
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
notFound
=
false
;
notFound
=
false
;
...
@@ -387,8 +394,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
...
@@ -387,8 +394,7 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
T
>
inFeatures
,
tv
::
TensorView
<
const
T
>
dout
,
tv
::
TensorView
<
T
>
din
,
tv
::
TensorView
<
const
T
>
dout
,
tv
::
TensorView
<
T
>
din
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
if
(
size
<=
0
)
if
(
size
<=
0
)
return
;
return
;
int
numPlanes
=
inFeatures
.
dim
(
1
);
int
numPlanes
=
inFeatures
.
dim
(
1
);
bool
notFound
=
true
;
bool
notFound
=
true
;
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
...
@@ -400,14 +406,15 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
...
@@ -400,14 +406,15 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
if
(
notFound
)
{
if
(
notFound
)
{
if
(
numPlanes
%
NumTLP
==
0
)
{
if
(
numPlanes
%
NumTLP
==
0
)
{
if
(
numHotBlock
>=
NumTLP
)
{
if
(
numHotBlock
>=
NumTLP
)
{
maxPoolBwdVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
maxPoolBwdVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
std
::
min
(
size
/
NumTLP
,
512
),
numPlanes
/
NumTLP
),
<<<
dim3
(
std
::
min
(
size
/
NumTLP
,
512
),
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
dout
.
data
(),
din
.
data
(),
dout
.
data
(),
din
.
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
0
).
data
(),
indices
.
subview
(
1
).
data
(),
numHotBlock
,
indices
.
subview
(
1
).
data
(),
numHotBlock
,
numPlanes
/
vecloadFactor
);
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
...
@@ -415,10 +422,10 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
...
@@ -415,10 +422,10 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
maxPoolBwdGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
maxPoolBwdGenericKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
>
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
inFeatures
.
data
(),
dout
.
data
(),
din
.
data
(),
dout
.
data
(),
din
.
data
(),
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
0
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
indices
.
subview
(
1
).
data
()
+
numHotBlock
,
size
-
numHotBlock
,
numPlanes
);
size
-
numHotBlock
,
numPlanes
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
notFound
=
false
;
notFound
=
false
;
...
@@ -454,10 +461,10 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
...
@@ -454,10 +461,10 @@ struct SparseMaxPoolBackwardFunctor<tv::GPU, T, Index> {
}
}
};
};
}
// namespace functor
}
// namespace functor
#define DECLARE_GPU_SPECS_T_INDEX(T, Index)
\
#define DECLARE_GPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseMaxPoolForwardFunctor<tv::GPU, T, Index>;
\
template struct functor::SparseMaxPoolForwardFunctor<tv::GPU, T, Index>; \
template struct functor::SparseMaxPoolBackwardFunctor<tv::GPU, T, Index>;
template struct functor::SparseMaxPoolBackwardFunctor<tv::GPU, T, Index>;
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPECS_T_INDEX(T, int);
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPECS_T_INDEX(T, int);
...
@@ -468,4 +475,4 @@ DECLARE_GPU_SPECS(at::Half);
...
@@ -468,4 +475,4 @@ DECLARE_GPU_SPECS(at::Half);
#undef DECLARE_GPU_SPECS
#undef DECLARE_GPU_SPECS
#undef DECLARE_GPU_SPECS_T_INDEX
#undef DECLARE_GPU_SPECS_T_INDEX
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/reordering.cc
View file @
f27d308f
...
@@ -19,7 +19,8 @@ namespace spconv {
...
@@ -19,7 +19,8 @@ namespace spconv {
namespace
functor
{
namespace
functor
{
template
<
typename
T
,
typename
Index
>
template
<
typename
T
,
typename
Index
>
struct
SparseGatherFunctor
<
tv
::
CPU
,
T
,
Index
>
{
struct
SparseGatherFunctor
<
tv
::
CPU
,
T
,
Index
>
{
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
int
numPlanes
=
features
.
dim
(
1
);
int
numPlanes
=
features
.
dim
(
1
);
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
...
@@ -33,30 +34,29 @@ struct SparseGatherFunctor<tv::CPU, T, Index> {
...
@@ -33,30 +34,29 @@ struct SparseGatherFunctor<tv::CPU, T, Index> {
template
<
typename
T
,
typename
Index
>
template
<
typename
T
,
typename
Index
>
struct
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
Index
>
{
struct
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
Index
>
{
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
void
operator
()(
const
tv
::
CPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
tv
::
TensorView
<
const
T
>
buffer
,
int
size
,
bool
stable
)
{
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
)
{
int
numPlanes
=
outFeatures
.
dim
(
1
);
int
numPlanes
=
outFeatures
.
dim
(
1
);
const
T
*
buf
=
buffer
.
data
();
const
T
*
buf
=
buffer
.
data
();
T
*
out
=
outFeatures
.
data
();
T
*
out
=
outFeatures
.
data
();
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
buf
=
buffer
.
data
()
+
i
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
out
=
outFeatures
.
data
()
+
indices
[
i
]
*
numPlanes
;
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
){
for
(
int
j
=
0
;
j
<
numPlanes
;
++
j
)
{
out
[
j
]
+=
buf
[
j
];
out
[
j
]
+=
buf
[
j
];
}
}
}
}
}
}
};
};
}
// namespace functor
}
// namespace functor
#define DECLARE_CPU_SPECS_T_INDEX(T, Index) \
#define DECLARE_CPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseGatherFunctor<tv::CPU, T, Index>; \
template struct functor::SparseGatherFunctor<tv::CPU, T, Index>; \
template struct functor::SparseScatterAddFunctor<tv::CPU, T, Index>;
template struct functor::SparseScatterAddFunctor<tv::CPU, T, Index>;
#define DECLARE_CPU_SPECS(T)
\
#define DECLARE_CPU_SPECS(T) \
DECLARE_CPU_SPECS_T_INDEX(T, int);
\
DECLARE_CPU_SPECS_T_INDEX(T, int); \
DECLARE_CPU_SPECS_T_INDEX(T, long);
DECLARE_CPU_SPECS_T_INDEX(T, long);
DECLARE_CPU_SPECS
(
float
);
DECLARE_CPU_SPECS
(
float
);
...
@@ -66,4 +66,4 @@ DECLARE_CPU_SPECS(at::Half);
...
@@ -66,4 +66,4 @@ DECLARE_CPU_SPECS(at::Half);
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPECS
#undef DECLARE_CPU_SPECS_T_INDEX
#undef DECLARE_CPU_SPECS_T_INDEX
}
// namespace spconv
}
// namespace spconv
mmdet3d/ops/spconv/src/reordering_cuda.cu
View file @
f27d308f
...
@@ -13,17 +13,18 @@
...
@@ -13,17 +13,18 @@
// limitations under the License.
// limitations under the License.
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <chrono>
#include <limits>
#include <spconv/mp_helper.h>
#include <spconv/mp_helper.h>
#include <spconv/reordering.h>
#include <spconv/reordering.cu.h>
#include <spconv/reordering.cu.h>
#include <spconv/reordering.h>
#include <tensorview/helper_kernel.cu.h>
#include <tensorview/helper_kernel.cu.h>
#include <tensorview/helper_launch.h>
#include <tensorview/helper_launch.h>
#include <tensorview/tensorview.h>
#include <tensorview/tensorview.h>
#include <type_traits>
#include <utility/timer.h>
#include <utility/timer.h>
#include <chrono>
#include <limits>
#include <type_traits>
namespace
spconv
{
namespace
spconv
{
namespace
functor
{
namespace
functor
{
template
<
typename
T
,
typename
Index
>
template
<
typename
T
,
typename
Index
>
...
@@ -34,8 +35,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
...
@@ -34,8 +35,7 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
void
operator
()(
const
tv
::
GPU
&
d
,
tv
::
TensorView
<
T
>
buffer
,
void
operator
()(
const
tv
::
GPU
&
d
,
tv
::
TensorView
<
T
>
buffer
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
T
>
features
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
tv
::
TensorView
<
const
Index
>
indices
,
int
size
)
{
if
(
size
<=
0
)
if
(
size
<=
0
)
return
;
return
;
int
numPlanes
=
features
.
dim
(
1
);
int
numPlanes
=
features
.
dim
(
1
);
bool
notFound
=
true
;
bool
notFound
=
true
;
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
...
@@ -50,8 +50,9 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
...
@@ -50,8 +50,9 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
gatherVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
gatherVecBlockKernel
<
T
,
Index
,
int
(
NumTLP
),
NumILP
,
vecload_type_t
>
<<<
dim3
(
numPlanes
/
NumTLP
,
size
/
NumTLP
),
<<<
dim3
(
numPlanes
/
NumTLP
,
size
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
getStream
()
>>>
(
buffer
.
data
(),
features
.
data
(),
indices
.
data
(),
d
.
getStream
()
>>>
(
buffer
.
data
(),
features
.
data
(),
nHotBlock
,
numPlanes
/
vecloadFactor
);
indices
.
data
(),
nHotBlock
,
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
...
@@ -60,8 +61,9 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
...
@@ -60,8 +61,9 @@ struct SparseGatherFunctor<tv::GPU, T, Index> {
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
<<<
dim3
(
1
,
numPlanes
/
NumTLP
),
dim3
(
NumTLP
/
NumILP
,
NumTLP
/
vecloadFactor
),
0
,
dim3
(
NumTLP
/
NumILP
,
NumTLP
/
vecloadFactor
),
0
,
d
.
getStream
()
>>>
(
buffer
.
data
()
+
nHotBlock
*
numPlanes
,
d
.
getStream
()
>>>
(
buffer
.
data
()
+
nHotBlock
*
numPlanes
,
features
.
data
(),
indices
.
data
()
+
nHotBlock
,
features
.
data
(),
indices
.
data
()
+
nHotBlock
,
size
-
nHotBlock
,
numPlanes
/
vecloadFactor
);
size
-
nHotBlock
,
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
notFound
=
false
;
notFound
=
false
;
...
@@ -89,12 +91,11 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
...
@@ -89,12 +91,11 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
void
operator
()(
const
tv
::
GPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
void
operator
()(
const
tv
::
GPU
&
d
,
tv
::
TensorView
<
T
>
outFeatures
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
T
>
buffer
,
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
)
{
tv
::
TensorView
<
const
Index
>
indices
,
int
size
,
bool
stable
)
{
if
(
size
<=
0
)
if
(
size
<=
0
)
return
;
return
;
int
numPlanes
=
outFeatures
.
dim
(
1
);
int
numPlanes
=
outFeatures
.
dim
(
1
);
bool
notFound
=
true
;
bool
notFound
=
true
;
constexpr
int
vecloadFactor
=
constexpr
int
vecloadFactor
=
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
// important for half.
sizeof
(
vecload_type_t
)
/
sizeof
(
T
);
// important for half.
mp_for_each
<
kernel_block_t
>
([
=
,
&
d
,
&
outFeatures
,
&
buffer
,
&
indices
,
mp_for_each
<
kernel_block_t
>
([
=
,
&
d
,
&
outFeatures
,
&
buffer
,
&
indices
,
&
notFound
](
auto
NumTLP
)
{
&
notFound
](
auto
NumTLP
)
{
// constexpr int NumILP = NumTLP / (64 / (NumTLP / vecloadFactor));
// constexpr int NumILP = NumTLP / (64 / (NumTLP / vecloadFactor));
...
@@ -108,8 +109,8 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
...
@@ -108,8 +109,8 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
<<<
dim3
(
numPlanes
/
NumTLP
,
size
/
NumTLP
),
<<<
dim3
(
numPlanes
/
NumTLP
,
size
/
NumTLP
),
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
dim3
(
NumTLP
/
vecloadFactor
,
NumTLP
/
NumILP
),
0
,
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
buffer
.
data
(),
d
.
getStream
()
>>>
(
outFeatures
.
data
(),
buffer
.
data
(),
indices
.
data
(),
nHotBlock
,
indices
.
data
(),
nHotBlock
,
numPlanes
/
vecloadFactor
);
numPlanes
/
vecloadFactor
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
if
(
size
-
nHotBlock
>
0
)
{
if
(
size
-
nHotBlock
>
0
)
{
...
@@ -137,11 +138,10 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
...
@@ -137,11 +138,10 @@ struct SparseScatterAddFunctor<tv::GPU, T, Index> {
}
}
}
}
};
};
}
// namespace functor
}
// namespace functor
#define DECLARE_GPU_SPECS_T_INDEX(T, Index)
\
#define DECLARE_GPU_SPECS_T_INDEX(T, Index) \
template struct functor::SparseGatherFunctor<tv::GPU, T, Index>;
\
template struct functor::SparseGatherFunctor<tv::GPU, T, Index>; \
template struct functor::SparseScatterAddFunctor<tv::GPU, T, Index>;
template struct functor::SparseScatterAddFunctor<tv::GPU, T, Index>;
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPECS_T_INDEX(T, int);
#define DECLARE_GPU_SPECS(T) DECLARE_GPU_SPECS_T_INDEX(T, int);
...
@@ -152,4 +152,4 @@ DECLARE_GPU_SPECS(at::Half);
...
@@ -152,4 +152,4 @@ DECLARE_GPU_SPECS(at::Half);
#undef DECLARE_GPU_SPECS
#undef DECLARE_GPU_SPECS
#undef DECLARE_GPU_SPECS_T_INDEX
#undef DECLARE_GPU_SPECS_T_INDEX
}
// namespace spconv
}
// namespace spconv
tests/test_heads.py
View file @
f27d308f
...
@@ -170,3 +170,87 @@ def test_parta2_rpnhead_getboxes():
...
@@ -170,3 +170,87 @@ def test_parta2_rpnhead_getboxes():
assert
result_list
[
0
][
'labels_3d'
].
shape
==
torch
.
Size
([
512
])
assert
result_list
[
0
][
'labels_3d'
].
shape
==
torch
.
Size
([
512
])
assert
result_list
[
0
][
'cls_preds'
].
shape
==
torch
.
Size
([
512
,
3
])
assert
result_list
[
0
][
'cls_preds'
].
shape
==
torch
.
Size
([
512
,
3
])
assert
result_list
[
0
][
'boxes_3d'
].
shape
==
torch
.
Size
([
512
,
7
])
assert
result_list
[
0
][
'boxes_3d'
].
shape
==
torch
.
Size
([
512
,
7
])
def
test_vote_head
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
from
mmdet3d.models.dense_heads
import
VoteHead
bbox_head_cfg
=
dict
(
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'PartialBinBasedBBoxCoder'
,
num_sizes
=
10
,
num_dir_bins
=
5
,
with_rot
=
True
,
mean_sizes
=
[[
2.114256
,
1.620300
,
0.927272
],
[
0.791118
,
1.279516
,
0.718182
],
[
0.923508
,
1.867419
,
0.845495
],
[
0.591958
,
0.552978
,
0.827272
],
[
0.699104
,
0.454178
,
0.75625
],
[
0.69519
,
1.346299
,
0.736364
],
[
0.528526
,
1.002642
,
1.172878
],
[
0.500618
,
0.632163
,
0.683424
],
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]]),
vote_moudule_cfg
=
dict
(
in_channels
=
64
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
64
,
64
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
64
,
32
,
32
,
32
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
64
,
64
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
))
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
)
self
=
VoteHead
(
train_cfg
=
train_cfg
,
**
bbox_head_cfg
).
cuda
()
fp_xyz
=
[
torch
.
rand
([
2
,
64
,
3
],
dtype
=
torch
.
float32
).
cuda
()]
fp_features
=
[
torch
.
rand
([
2
,
64
,
64
],
dtype
=
torch
.
float32
).
cuda
()]
fp_indices
=
[
torch
.
randint
(
0
,
128
,
[
2
,
64
]).
cuda
()]
input_dict
=
dict
(
fp_xyz
=
fp_xyz
,
fp_features
=
fp_features
,
fp_indices
=
fp_indices
)
# test forward
ret_dict
=
self
(
input_dict
,
'vote'
)
assert
ret_dict
[
'center'
].
shape
==
torch
.
Size
([
2
,
256
,
3
])
assert
ret_dict
[
'obj_scores'
].
shape
==
torch
.
Size
([
2
,
256
,
2
])
assert
ret_dict
[
'size_res'
].
shape
==
torch
.
Size
([
2
,
256
,
10
,
3
])
assert
ret_dict
[
'dir_res'
].
shape
==
torch
.
Size
([
2
,
256
,
5
])
tests/test_losses.py
0 → 100644
View file @
f27d308f
import
pytest
import
torch
def
test_chamfer_disrance
():
from
mmdet3d.models.losses
import
ChamferDistance
,
chamfer_distance
with
pytest
.
raises
(
AssertionError
):
# test invalid mode
ChamferDistance
(
mode
=
'smoothl1'
)
# test invalid type of reduction
ChamferDistance
(
mode
=
'l2'
,
reduction
=
None
)
self
=
ChamferDistance
(
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
1.0
,
loss_dst_weight
=
1.0
)
source
=
torch
.
tensor
([[[
-
0.9888
,
0.9683
,
-
0.8494
],
[
-
6.4536
,
4.5146
,
1.6861
],
[
2.0482
,
5.6936
,
-
1.4701
],
[
-
0.5173
,
5.6472
,
2.1748
],
[
-
2.8010
,
5.4423
,
-
1.2158
],
[
2.4018
,
2.4389
,
-
0.2403
],
[
-
2.8811
,
3.8486
,
1.4750
],
[
-
0.2031
,
3.8969
,
-
1.5245
],
[
1.3827
,
4.9295
,
1.1537
],
[
-
2.6961
,
2.2621
,
-
1.0976
]],
[[
0.3692
,
1.8409
,
-
1.4983
],
[
1.9995
,
6.3602
,
0.1798
],
[
-
2.1317
,
4.6011
,
-
0.7028
],
[
2.4158
,
3.1482
,
0.3169
],
[
-
0.5836
,
3.6250
,
-
1.2650
],
[
-
1.9862
,
1.6182
,
-
1.4901
],
[
2.5992
,
1.2847
,
-
0.8471
],
[
-
0.3467
,
5.3681
,
-
1.4755
],
[
-
0.8576
,
3.3400
,
-
1.7399
],
[
2.7447
,
4.6349
,
0.1994
]]])
target
=
torch
.
tensor
([[[
-
0.4758
,
1.0094
,
-
0.8645
],
[
-
0.3130
,
0.8564
,
-
0.9061
],
[
-
0.1560
,
2.0394
,
-
0.8936
],
[
-
0.3685
,
1.6467
,
-
0.8271
],
[
-
0.2740
,
2.2212
,
-
0.7980
]],
[[
1.4856
,
2.5299
,
-
1.0047
],
[
2.3262
,
3.3065
,
-
0.9475
],
[
2.4593
,
2.5870
,
-
0.9423
],
[
0.0000
,
0.0000
,
0.0000
],
[
0.0000
,
0.0000
,
0.0000
]]])
loss_source
,
loss_target
,
indices1
,
indices2
=
self
(
source
,
target
,
return_indices
=
True
)
assert
torch
.
allclose
(
loss_source
,
torch
.
tensor
(
219.5936
))
assert
torch
.
allclose
(
loss_target
,
torch
.
tensor
(
22.3705
))
assert
(
indices1
==
indices1
.
new_tensor
([[
0
,
4
,
4
,
4
,
4
,
2
,
4
,
4
,
4
,
3
],
[
0
,
1
,
0
,
1
,
0
,
4
,
2
,
0
,
0
,
1
]])).
all
()
assert
(
indices2
==
indices2
.
new_tensor
([[
0
,
0
,
0
,
0
,
0
],
[
0
,
3
,
6
,
0
,
0
]])).
all
()
loss_source
,
loss_target
,
indices1
,
indices2
=
chamfer_distance
(
source
,
target
,
reduction
=
'sum'
)
assert
torch
.
allclose
(
loss_source
,
torch
.
tensor
(
219.5936
))
assert
torch
.
allclose
(
loss_target
,
torch
.
tensor
(
22.3705
))
assert
(
indices1
==
indices1
.
new_tensor
([[
0
,
4
,
4
,
4
,
4
,
2
,
4
,
4
,
4
,
3
],
[
0
,
1
,
0
,
1
,
0
,
4
,
2
,
0
,
0
,
1
]])).
all
()
assert
(
indices2
==
indices2
.
new_tensor
([[
0
,
0
,
0
,
0
,
0
],
[
0
,
3
,
6
,
0
,
0
]])).
all
()
tests/test_nms.py
0 → 100644
View file @
f27d308f
import
torch
def
test_aligned_3d_nms
():
from
mmdet3d.core.post_processing
import
aligned_3d_nms
boxes
=
torch
.
tensor
([[
1.2261
,
0.6679
,
-
1.2678
,
2.6547
,
1.0428
,
0.1000
],
[
5.0919
,
0.6512
,
0.7238
,
5.4821
,
1.2451
,
2.1095
],
[
6.8392
,
-
1.2205
,
0.8570
,
7.6920
,
0.3220
,
3.2223
],
[
3.6900
,
-
0.4235
,
-
1.0380
,
4.4415
,
0.2671
,
-
0.1442
],
[
4.8071
,
-
1.4311
,
0.7004
,
5.5788
,
-
0.6837
,
1.2487
],
[
2.1807
,
-
1.5811
,
-
1.1289
,
3.0151
,
-
0.1346
,
-
0.5351
],
[
4.4631
,
-
4.2588
,
-
1.1403
,
5.3012
,
-
3.4463
,
-
0.3212
],
[
4.7607
,
-
3.3311
,
0.5993
,
5.2976
,
-
2.7874
,
1.2273
],
[
3.1265
,
0.7113
,
-
0.0296
,
3.8944
,
1.3532
,
0.9785
],
[
5.5828
,
-
3.5350
,
1.0105
,
8.2841
,
-
0.0405
,
3.3614
],
[
3.0003
,
-
2.1099
,
-
1.0608
,
5.3423
,
0.0328
,
0.6252
],
[
2.7148
,
0.6082
,
-
1.1738
,
3.6995
,
1.2375
,
-
0.0209
],
[
4.9263
,
-
0.2152
,
0.2889
,
5.6963
,
0.3416
,
1.3471
],
[
5.0713
,
1.3459
,
-
0.2598
,
5.6278
,
1.9300
,
1.2835
],
[
4.5985
,
-
2.3996
,
-
0.3393
,
5.2705
,
-
1.7306
,
0.5698
],
[
4.1386
,
0.5658
,
0.0422
,
4.8937
,
1.1983
,
0.9911
],
[
2.7694
,
-
1.9822
,
-
1.0637
,
4.0691
,
0.3575
,
-
0.1393
],
[
4.6464
,
-
3.0123
,
-
1.0694
,
5.1421
,
-
2.4450
,
-
0.3758
],
[
3.4754
,
0.4443
,
-
1.1282
,
4.6727
,
1.3786
,
0.2550
],
[
2.5905
,
-
0.3504
,
-
1.1202
,
3.1599
,
0.1153
,
-
0.3036
],
[
4.1336
,
-
3.4813
,
1.1477
,
6.2091
,
-
0.8776
,
2.6757
],
[
3.9966
,
0.2069
,
-
1.1148
,
5.0841
,
1.0525
,
-
0.0648
],
[
4.3216
,
-
1.8647
,
0.4733
,
6.2069
,
0.6671
,
3.3363
],
[
4.7683
,
0.4286
,
-
0.0500
,
5.5642
,
1.2906
,
0.8902
],
[
1.7337
,
0.7625
,
-
1.0058
,
3.0675
,
1.3617
,
0.3849
],
[
4.7193
,
-
3.3687
,
-
0.9635
,
5.1633
,
-
2.7656
,
1.1001
],
[
4.4704
,
-
2.7744
,
-
1.1127
,
5.0971
,
-
2.0228
,
-
0.3150
],
[
2.7027
,
0.6122
,
-
0.9169
,
3.3083
,
1.2117
,
0.6129
],
[
4.8789
,
-
2.0025
,
0.8385
,
5.5214
,
-
1.3668
,
1.3552
],
[
3.7856
,
-
1.7582
,
-
0.1738
,
5.3373
,
-
0.6300
,
0.5558
]])
scores
=
torch
.
tensor
([
3.6414e-03
,
2.2901e-02
,
2.7576e-04
,
1.2238e-02
,
5.9310e-04
,
1.2659e-01
,
2.4104e-02
,
5.0742e-03
,
2.3581e-03
,
2.0946e-07
,
8.8039e-01
,
1.9127e-01
,
5.0469e-05
,
9.3638e-03
,
3.0663e-03
,
9.4350e-03
,
5.3380e-02
,
1.7895e-01
,
2.0048e-01
,
1.1294e-03
,
3.0304e-08
,
2.0237e-01
,
1.0894e-08
,
6.7972e-02
,
6.7156e-01
,
9.3986e-04
,
7.9470e-01
,
3.9736e-01
,
1.8000e-04
,
7.9151e-04
])
cls
=
torch
.
tensor
([
8
,
8
,
8
,
3
,
3
,
1
,
3
,
3
,
7
,
8
,
0
,
6
,
7
,
8
,
3
,
7
,
2
,
7
,
6
,
3
,
8
,
6
,
6
,
7
,
6
,
8
,
7
,
6
,
3
,
1
])
pick
=
aligned_3d_nms
(
boxes
,
scores
,
cls
,
0.25
)
expected_pick
=
torch
.
tensor
([
10
,
26
,
24
,
27
,
21
,
18
,
17
,
5
,
23
,
16
,
6
,
1
,
3
,
15
,
13
,
7
,
0
,
14
,
8
,
19
,
25
,
29
,
4
,
2
,
28
,
12
,
9
,
20
,
22
])
assert
torch
.
all
(
pick
==
expected_pick
)
tests/test_parta2_bbox_head.py
0 → 100644
View file @
f27d308f
import
pytest
import
torch
from
mmcv
import
Config
from
torch.nn
import
BatchNorm1d
,
ReLU
from
mmdet3d.core.bbox.samplers
import
IoUNegPiecewiseSampler
from
mmdet3d.models
import
PartA2BboxHead
from
mmdet3d.ops
import
make_sparse_convmodule
from
mmdet3d.ops.spconv.conv
import
SubMConv3d
def
test_loss
():
self
=
PartA2BboxHead
(
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
])
cls_score
=
torch
.
Tensor
([[
-
3.6810
],
[
-
3.9413
],
[
-
5.3971
],
[
-
17.1281
],
[
-
5.9434
],
[
-
6.2251
]])
bbox_pred
=
torch
.
Tensor
(
[[
-
6.3016e-03
,
-
5.2294e-03
,
-
1.2793e-02
,
-
1.0602e-02
,
-
7.4086e-04
,
9.2471e-03
,
7.3514e-03
],
[
-
1.1975e-02
,
-
1.1578e-02
,
-
3.1219e-02
,
2.7754e-02
,
6.9775e-03
,
9.4042e-04
,
9.0472e-04
],
[
3.7539e-03
,
-
9.1897e-03
,
-
5.3666e-03
,
-
1.0380e-05
,
4.3467e-03
,
4.2470e-03
,
1.8355e-03
],
[
-
7.6093e-02
,
-
1.2497e-01
,
-
9.2942e-02
,
2.1404e-02
,
2.3750e-02
,
1.0365e-01
,
-
1.3042e-02
],
[
2.7577e-03
,
-
1.1514e-02
,
-
1.1097e-02
,
-
2.4946e-03
,
2.3268e-03
,
1.6797e-03
,
-
1.4076e-03
],
[
3.9635e-03
,
-
7.8551e-03
,
-
3.5125e-03
,
2.1229e-04
,
9.7042e-03
,
1.7499e-03
,
-
5.1254e-03
]])
rois
=
torch
.
Tensor
([
[
0.0000
,
13.3711
,
-
12.5483
,
-
1.9306
,
1.7027
,
4.2836
,
1.4283
,
-
1.1499
],
[
0.0000
,
19.2472
,
-
7.2655
,
-
10.6641
,
3.3078
,
83.1976
,
29.3337
,
2.4501
],
[
0.0000
,
13.8012
,
-
10.9791
,
-
3.0617
,
0.2504
,
1.2518
,
0.8807
,
3.1034
],
[
0.0000
,
16.2736
,
-
9.0284
,
-
2.0494
,
8.2697
,
31.2336
,
9.1006
,
1.9208
],
[
0.0000
,
10.4462
,
-
13.6879
,
-
3.1869
,
7.3366
,
0.3518
,
1.7199
,
-
0.7225
],
[
0.0000
,
11.3374
,
-
13.6671
,
-
3.2332
,
4.9934
,
0.3750
,
1.6033
,
-
0.9665
]
])
labels
=
torch
.
Tensor
([
0.7100
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
])
bbox_targets
=
torch
.
Tensor
(
[[
0.0598
,
0.0243
,
-
0.0984
,
-
0.0454
,
0.0066
,
0.1114
,
0.1714
]])
pos_gt_bboxes
=
torch
.
Tensor
(
[[
13.6686
,
-
12.5586
,
-
2.1553
,
1.6271
,
4.3119
,
1.5966
,
2.1631
]])
reg_mask
=
torch
.
Tensor
([
1
,
0
,
0
,
0
,
0
,
0
])
label_weights
=
torch
.
Tensor
(
[
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
])
bbox_weights
=
torch
.
Tensor
([
1.
,
0.
,
0.
,
0.
,
0.
,
0.
])
loss
=
self
.
loss
(
cls_score
,
bbox_pred
,
rois
,
labels
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
expected_loss_cls
=
torch
.
Tensor
([
2.0579e-02
,
1.5005e-04
,
3.5252e-05
,
0.0000e+00
,
2.0433e-05
,
1.5422e-05
])
expected_loss_bbox
=
torch
.
as_tensor
(
0.0622
)
expected_loss_corner
=
torch
.
Tensor
([
0.1379
])
assert
torch
.
allclose
(
loss
[
'loss_cls'
],
expected_loss_cls
,
1e-3
)
assert
torch
.
allclose
(
loss
[
'loss_bbox'
],
expected_loss_bbox
,
1e-3
)
assert
torch
.
allclose
(
loss
[
'loss_corner'
],
expected_loss_corner
,
1e-3
)
def
test_get_targets
():
self
=
PartA2BboxHead
(
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
])
sampling_result
=
IoUNegPiecewiseSampler
(
1
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
return_iou
=
True
)
sampling_result
.
pos_bboxes
=
torch
.
Tensor
(
[[
8.1517
,
0.0384
,
-
1.9496
,
1.5271
,
4.1131
,
1.4879
,
1.2076
]])
sampling_result
.
pos_gt_bboxes
=
torch
.
Tensor
(
[[
7.8417
,
-
0.1405
,
-
1.9652
,
1.6122
,
3.2838
,
1.5331
,
-
2.0835
]])
sampling_result
.
iou
=
torch
.
Tensor
([
6.7787e-01
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
1.2839e-01
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
7.0261e-04
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
5.8915e-02
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
5.6628e-06
,
5.0271e-02
,
0.0000e+00
,
1.9608e-01
,
0.0000e+00
,
0.0000e+00
,
2.3519e-01
,
1.6589e-02
,
0.0000e+00
,
1.0162e-01
,
2.1634e-02
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
5.6326e-02
,
1.3810e-01
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
4.5455e-02
,
0.0000e+00
,
1.0929e-03
,
0.0000e+00
,
8.8191e-02
,
1.1012e-01
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
1.6236e-01
,
0.0000e+00
,
1.1342e-01
,
1.0636e-01
,
9.9803e-02
,
5.7394e-02
,
0.0000e+00
,
1.6773e-01
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
6.3464e-03
,
0.0000e+00
,
2.7977e-01
,
0.0000e+00
,
3.1252e-01
,
2.1642e-01
,
2.2945e-01
,
0.0000e+00
,
1.8297e-01
,
0.0000e+00
,
2.1908e-01
,
1.1661e-01
,
1.3513e-01
,
1.5898e-01
,
7.4368e-03
,
1.2523e-01
,
1.4735e-04
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
1.0948e-01
,
2.5889e-01
,
4.4585e-04
,
8.6483e-02
,
1.6376e-01
,
0.0000e+00
,
2.2894e-01
,
2.7489e-01
,
0.0000e+00
,
0.0000e+00
,
0.0000e+00
,
1.8334e-01
,
1.0193e-01
,
2.3389e-01
,
1.1035e-01
,
3.3700e-01
,
1.4397e-01
,
1.0379e-01
,
0.0000e+00
,
1.1226e-01
,
0.0000e+00
,
0.0000e+00
,
1.6201e-01
,
0.0000e+00
,
1.3569e-01
])
rcnn_train_cfg
=
Config
({
'assigner'
:
[{
'type'
:
'MaxIoUAssigner'
,
'iou_calculator'
:
{
'type'
:
'BboxOverlaps3D'
,
'coordinate'
:
'lidar'
},
'pos_iou_thr'
:
0.55
,
'neg_iou_thr'
:
0.55
,
'min_pos_iou'
:
0.55
,
'ignore_iof_thr'
:
-
1
},
{
'type'
:
'MaxIoUAssigner'
,
'iou_calculator'
:
{
'type'
:
'BboxOverlaps3D'
,
'coordinate'
:
'lidar'
},
'pos_iou_thr'
:
0.55
,
'neg_iou_thr'
:
0.55
,
'min_pos_iou'
:
0.55
,
'ignore_iof_thr'
:
-
1
},
{
'type'
:
'MaxIoUAssigner'
,
'iou_calculator'
:
{
'type'
:
'BboxOverlaps3D'
,
'coordinate'
:
'lidar'
},
'pos_iou_thr'
:
0.55
,
'neg_iou_thr'
:
0.55
,
'min_pos_iou'
:
0.55
,
'ignore_iof_thr'
:
-
1
}],
'sampler'
:
{
'type'
:
'IoUNegPiecewiseSampler'
,
'num'
:
128
,
'pos_fraction'
:
0.55
,
'neg_piece_fractions'
:
[
0.8
,
0.2
],
'neg_iou_piece_thrs'
:
[
0.55
,
0.1
],
'neg_pos_ub'
:
-
1
,
'add_gt_as_proposals'
:
False
,
'return_iou'
:
True
},
'cls_pos_thr'
:
0.75
,
'cls_neg_thr'
:
0.25
})
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
\
=
self
.
get_targets
([
sampling_result
],
rcnn_train_cfg
)
expected_label
=
torch
.
Tensor
([
0.8557
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0595
,
0.0000
,
0.1250
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0178
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0498
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.1740
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
,
0.0000
])
expected_bbox_targets
=
torch
.
Tensor
(
[[
0.0805
,
0.0130
,
0.0047
,
0.0542
,
-
0.2252
,
0.0299
,
-
0.1495
]])
expected_pos_gt_bboxes
=
torch
.
Tensor
(
[[
7.8417
,
-
0.1405
,
-
1.9652
,
1.6122
,
3.2838
,
1.5331
,
-
2.0835
]])
expected_reg_mask
=
torch
.
LongTensor
([
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
])
expected_label_weights
=
torch
.
Tensor
([
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
,
0.0078
])
expected_bbox_weights
=
torch
.
Tensor
([
1.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
,
0.
])
assert
torch
.
allclose
(
label
,
expected_label
,
1e-2
)
assert
torch
.
allclose
(
bbox_targets
,
expected_bbox_targets
,
1e-2
)
assert
torch
.
allclose
(
pos_gt_bboxes
,
expected_pos_gt_bboxes
)
assert
torch
.
all
(
reg_mask
==
expected_reg_mask
)
assert
torch
.
allclose
(
label_weights
,
expected_label_weights
,
1e-2
)
assert
torch
.
allclose
(
bbox_weights
,
expected_bbox_weights
)
def
test_get_bboxes
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
()
self
=
PartA2BboxHead
(
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
])
rois
=
torch
.
Tensor
([[
0.0000e+00
,
5.6284e+01
,
2.5712e+01
,
-
1.3196e+00
,
1.5943e+00
,
3.7509e+00
,
1.4969e+00
,
1.2105e-03
],
[
0.0000e+00
,
5.4685e+01
,
2.9132e+01
,
-
1.9178e+00
,
1.6337e+00
,
4.1116e+00
,
1.5472e+00
,
-
1.7312e+00
],
[
0.0000e+00
,
5.5927e+01
,
2.5830e+01
,
-
1.4099e+00
,
1.5958e+00
,
3.8861e+00
,
1.4911e+00
,
-
2.9276e+00
],
[
0.0000e+00
,
5.6306e+01
,
2.6310e+01
,
-
1.3729e+00
,
1.5893e+00
,
3.7448e+00
,
1.4924e+00
,
1.6071e-01
],
[
0.0000e+00
,
3.1633e+01
,
-
5.8557e+00
,
-
1.2541e+00
,
1.6517e+00
,
4.1829e+00
,
1.5593e+00
,
-
1.6037e+00
],
[
0.0000e+00
,
3.1789e+01
,
-
5.5308e+00
,
-
1.3012e+00
,
1.6412e+00
,
4.1070e+00
,
1.5487e+00
,
-
1.6517e+00
]]).
cuda
()
cls_score
=
torch
.
Tensor
([[
-
2.2061
],
[
-
2.1121
],
[
-
1.4478
],
[
-
2.9614
],
[
-
0.1761
],
[
0.7357
]]).
cuda
()
bbox_pred
=
torch
.
Tensor
(
[[
-
4.7917e-02
,
-
1.6504e-02
,
-
2.2340e-02
,
5.1296e-03
,
-
2.0984e-02
,
1.0598e-02
,
-
1.1907e-01
],
[
-
1.6261e-02
,
-
5.4005e-02
,
6.2480e-03
,
1.5496e-03
,
-
1.3285e-02
,
8.1482e-03
,
-
2.2707e-03
],
[
-
3.9423e-02
,
2.0151e-02
,
-
2.1138e-02
,
-
1.1845e-03
,
-
1.5343e-02
,
5.7208e-03
,
8.5646e-03
],
[
6.3104e-02
,
-
3.9307e-02
,
2.3005e-02
,
-
7.0528e-03
,
-
9.2637e-05
,
2.2656e-02
,
1.6358e-02
],
[
-
1.4864e-03
,
5.6840e-02
,
5.8247e-03
,
-
3.5541e-03
,
-
4.9658e-03
,
2.5036e-03
,
3.0302e-02
],
[
-
4.3259e-02
,
-
1.9963e-02
,
3.5004e-02
,
3.7546e-03
,
1.0876e-02
,
-
3.9637e-04
,
2.0445e-02
]]).
cuda
()
class_labels
=
[
torch
.
Tensor
([
2
,
2
,
2
,
2
,
2
,
2
]).
cuda
()]
class_pred
=
[
torch
.
Tensor
([[
1.0877e-05
,
1.0318e-05
,
2.6599e-01
],
[
1.3105e-05
,
1.1904e-05
,
2.4432e-01
],
[
1.4530e-05
,
1.4619e-05
,
2.4395e-01
],
[
1.3251e-05
,
1.3038e-05
,
2.3703e-01
],
[
2.9156e-05
,
2.5521e-05
,
2.2826e-01
],
[
3.1665e-05
,
2.9054e-05
,
2.2077e-01
]]).
cuda
()
]
cfg
=
Config
(
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
result_list
=
self
.
get_bboxes
(
rois
,
cls_score
,
bbox_pred
,
class_labels
,
class_pred
,
None
,
cfg
)
selected_bboxes
,
selected_scores
,
selected_label_preds
=
result_list
[
0
]
expected_selected_bboxes
=
torch
.
Tensor
(
[[
56.2170
,
25.9074
,
-
1.3610
,
1.6025
,
3.6730
,
1.5128
,
-
0.1179
],
[
54.6521
,
28.8846
,
-
1.9145
,
1.6362
,
4.0573
,
1.5599
,
-
1.7335
],
[
31.6179
,
-
5.6004
,
-
1.2470
,
1.6458
,
4.1622
,
1.5632
,
-
1.5734
]]).
cuda
()
expected_selected_scores
=
torch
.
Tensor
([
-
2.2061
,
-
2.1121
,
-
0.1761
]).
cuda
()
expected_selected_label_preds
=
torch
.
Tensor
([
2.
,
2.
,
2.
]).
cuda
()
assert
torch
.
allclose
(
selected_bboxes
,
expected_selected_bboxes
,
1e-3
)
assert
torch
.
allclose
(
selected_scores
,
expected_selected_scores
,
1e-3
)
assert
torch
.
allclose
(
selected_label_preds
,
expected_selected_label_preds
)
def
test_multi_class_nms
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
()
self
=
PartA2BboxHead
(
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
])
box_probs
=
torch
.
Tensor
([[
1.0877e-05
,
1.0318e-05
,
2.6599e-01
],
[
1.3105e-05
,
1.1904e-05
,
2.4432e-01
],
[
1.4530e-05
,
1.4619e-05
,
2.4395e-01
],
[
1.3251e-05
,
1.3038e-05
,
2.3703e-01
],
[
2.9156e-05
,
2.5521e-05
,
2.2826e-01
],
[
3.1665e-05
,
2.9054e-05
,
2.2077e-01
],
[
5.5738e-06
,
6.2453e-06
,
2.1978e-01
],
[
9.0193e-06
,
9.2154e-06
,
2.1418e-01
],
[
1.4004e-05
,
1.3209e-05
,
2.1316e-01
],
[
7.9210e-06
,
8.1767e-06
,
2.1304e-01
]]).
cuda
()
box_preds
=
torch
.
Tensor
(
[[
5.6217e+01
,
2.5908e+01
,
-
1.3611e+00
,
1.6025e+00
,
3.6730e+00
,
1.5129e+00
,
-
1.1786e-01
],
[
5.4653e+01
,
2.8885e+01
,
-
1.9145e+00
,
1.6362e+00
,
4.0574e+00
,
1.5599e+00
,
-
1.7335e+00
],
[
5.5809e+01
,
2.5686e+01
,
-
1.4457e+00
,
1.5939e+00
,
3.8270e+00
,
1.4997e+00
,
-
2.9191e+00
],
[
5.6107e+01
,
2.6082e+01
,
-
1.3557e+00
,
1.5782e+00
,
3.7444e+00
,
1.5266e+00
,
1.7707e-01
],
[
3.1618e+01
,
-
5.6004e+00
,
-
1.2470e+00
,
1.6459e+00
,
4.1622e+00
,
1.5632e+00
,
-
1.5734e+00
],
[
3.1605e+01
,
-
5.6342e+00
,
-
1.2467e+00
,
1.6474e+00
,
4.1519e+00
,
1.5481e+00
,
-
1.6313e+00
],
[
5.6211e+01
,
2.7294e+01
,
-
1.5350e+00
,
1.5422e+00
,
3.7733e+00
,
1.5140e+00
,
9.5846e-02
],
[
5.5907e+01
,
2.7155e+01
,
-
1.4712e+00
,
1.5416e+00
,
3.7611e+00
,
1.5142e+00
,
-
5.2059e-02
],
[
5.4000e+01
,
3.0585e+01
,
-
1.6874e+00
,
1.6495e+00
,
4.0376e+00
,
1.5554e+00
,
-
1.7900e+00
],
[
5.6007e+01
,
2.6300e+01
,
-
1.3945e+00
,
1.5716e+00
,
3.7064e+00
,
1.4715e+00
,
-
2.9639e+00
]]).
cuda
()
selected
=
self
.
multi_class_nms
(
box_probs
,
box_preds
,
0.1
,
0.001
)
expected_selected
=
torch
.
Tensor
([
0
,
1
,
4
,
8
]).
cuda
()
assert
torch
.
all
(
selected
==
expected_selected
)
def
test_make_sparse_convmodule
():
with
pytest
.
raises
(
AssertionError
):
# assert invalid order setting
make_sparse_convmodule
(
in_channels
=
4
,
out_channels
=
8
,
kernel_size
=
3
,
indice_key
=
'rcnn_part2'
,
norm_cfg
=
dict
(
type
=
'BN1d'
),
order
=
(
'norm'
,
'act'
,
'conv'
,
'norm'
))
# assert invalid type of order
make_sparse_convmodule
(
in_channels
=
4
,
out_channels
=
8
,
kernel_size
=
3
,
indice_key
=
'rcnn_part2'
,
norm_cfg
=
dict
(
type
=
'BN1d'
),
order
=
[
'norm'
,
'conv'
])
# assert invalid elements of order
make_sparse_convmodule
(
in_channels
=
4
,
out_channels
=
8
,
kernel_size
=
3
,
indice_key
=
'rcnn_part2'
,
norm_cfg
=
dict
(
type
=
'BN1d'
),
order
=
(
'conv'
,
'normal'
,
'activate'
))
sparse_convmodule
=
make_sparse_convmodule
(
in_channels
=
4
,
out_channels
=
64
,
kernel_size
=
3
,
padding
=
1
,
indice_key
=
'rcnn_part0'
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
0.001
,
momentum
=
0.01
))
assert
isinstance
(
sparse_convmodule
[
0
],
SubMConv3d
)
assert
isinstance
(
sparse_convmodule
[
1
],
BatchNorm1d
)
assert
isinstance
(
sparse_convmodule
[
2
],
ReLU
)
assert
sparse_convmodule
[
1
].
num_features
==
64
assert
sparse_convmodule
[
1
].
eps
==
0.001
assert
sparse_convmodule
[
1
].
affine
is
True
assert
sparse_convmodule
[
1
].
track_running_stats
is
True
assert
isinstance
(
sparse_convmodule
[
2
],
ReLU
)
assert
sparse_convmodule
[
2
].
inplace
is
True
pre_act
=
make_sparse_convmodule
(
in_channels
=
4
,
out_channels
=
8
,
kernel_size
=
3
,
indice_key
=
'rcnn_part1'
,
norm_cfg
=
dict
(
type
=
'BN1d'
),
order
=
(
'norm'
,
'act'
,
'conv'
))
assert
isinstance
(
pre_act
[
0
],
BatchNorm1d
)
assert
isinstance
(
pre_act
[
1
],
ReLU
)
assert
isinstance
(
pre_act
[
2
],
SubMConv3d
)
tests/test_roiaware_pool3d.py
View file @
f27d308f
import
pytest
import
pytest
import
torch
import
torch
from
mmdet3d.ops.roiaware_pool3d
import
(
RoIAwarePool3d
,
points_in_boxes_cpu
,
from
mmdet3d.ops.roiaware_pool3d
import
(
RoIAwarePool3d
,
points_in_boxes_batch
,
points_in_boxes_cpu
,
points_in_boxes_gpu
)
points_in_boxes_gpu
)
...
@@ -83,3 +84,29 @@ def test_points_in_boxes_cpu():
...
@@ -83,3 +84,29 @@ def test_points_in_boxes_cpu():
dtype
=
torch
.
int32
)
dtype
=
torch
.
int32
)
assert
point_indices
.
shape
==
torch
.
Size
([
2
,
15
])
assert
point_indices
.
shape
==
torch
.
Size
([
2
,
15
])
assert
(
point_indices
==
expected_point_indices
).
all
()
assert
(
point_indices
==
expected_point_indices
).
all
()
def
test_points_in_boxes_batch
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
boxes
=
torch
.
tensor
(
[[[
1.0
,
2.0
,
3.0
,
4.0
,
5.0
,
6.0
,
0.3
],
[
-
10.0
,
23.0
,
16.0
,
10
,
20
,
20
,
0.5
]]],
dtype
=
torch
.
float32
).
cuda
(
)
# boxes (m, 7) with bottom center in lidar coordinate
pts
=
torch
.
tensor
(
[[[
1
,
2
,
3.3
],
[
1.2
,
2.5
,
3.0
],
[
0.8
,
2.1
,
3.5
],
[
1.6
,
2.6
,
3.6
],
[
0.8
,
1.2
,
3.9
],
[
-
9.2
,
21.0
,
18.2
],
[
3.8
,
7.9
,
6.3
],
[
4.7
,
3.5
,
-
12.2
],
[
3.8
,
7.6
,
-
2
],
[
-
10.6
,
-
12.9
,
-
20
],
[
-
16
,
-
18
,
9
],
[
-
21.3
,
-
52
,
-
5
],
[
0
,
0
,
0
],
[
6
,
7
,
8
],
[
-
2
,
-
3
,
-
4
]]],
dtype
=
torch
.
float32
).
cuda
()
# points (n, 3) in lidar coordinate
point_indices
=
points_in_boxes_batch
(
points
=
pts
,
boxes
=
boxes
)
expected_point_indices
=
torch
.
tensor
(
[[[
1
,
0
],
[
1
,
0
],
[
1
,
0
],
[
1
,
0
],
[
1
,
0
],
[
0
,
1
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
],
[
0
,
0
]]],
dtype
=
torch
.
int32
).
cuda
()
assert
point_indices
.
shape
==
torch
.
Size
([
1
,
15
,
2
])
assert
(
point_indices
==
expected_point_indices
).
all
()
tests/test_vot
ing
_module.py
→
tests/test_vot
e
_module.py
View file @
f27d308f
import
torch
import
torch
def
test_vot
ing
_module
():
def
test_vot
e
_module
():
from
mmdet3d.
op
s
import
VoteModule
from
mmdet3d.
models.model_util
s
import
VoteModule
self
=
VoteModule
(
vote_per_seed
=
3
,
in_channels
=
8
)
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)
self
=
VoteModule
(
vote_per_seed
=
3
,
in_channels
=
8
,
vote_loss
=
vote_loss
)
seed_xyz
=
torch
.
rand
([
2
,
64
,
3
],
dtype
=
torch
.
float32
)
# (b, npoints, 3)
seed_xyz
=
torch
.
rand
([
2
,
64
,
3
],
dtype
=
torch
.
float32
)
# (b, npoints, 3)
seed_features
=
torch
.
rand
(
seed_features
=
torch
.
rand
(
...
...
tools/data_converter/create_gt_database.py
View file @
f27d308f
...
@@ -8,8 +8,8 @@ from mmcv import track_iter_progress
...
@@ -8,8 +8,8 @@ from mmcv import track_iter_progress
from
pycocotools.coco
import
COCO
from
pycocotools.coco
import
COCO
import
mmdet3d.core.bbox.box_np_ops
as
box_np_ops
import
mmdet3d.core.bbox.box_np_ops
as
box_np_ops
from
mmdet3d.core.evaluation.bbox_overlaps
import
bbox_overlaps
from
mmdet3d.datasets
import
build_dataset
from
mmdet3d.datasets
import
build_dataset
from
mmdet.core.evaluation.bbox_overlaps
import
bbox_overlaps
from
mmdet.ops
import
roi_align
from
mmdet.ops
import
roi_align
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment