Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
aec41c7f
Commit
aec41c7f
authored
Apr 27, 2020
by
zhangwenwei
Browse files
Merge master
parents
49f06039
4eca6606
Changes
62
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1257 additions
and
687 deletions
+1257
-687
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
+56
-54
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+102
-90
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
+123
-100
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
+34
-27
mmdet3d/ops/spconv/include/spconv/pool_ops.h
mmdet3d/ops/spconv/include/spconv/pool_ops.h
+9
-8
mmdet3d/ops/spconv/include/spconv/spconv_ops.h
mmdet3d/ops/spconv/include/spconv/spconv_ops.h
+162
-131
mmdet3d/ops/spconv/include/torch_utils.h
mmdet3d/ops/spconv/include/torch_utils.h
+34
-33
mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
+101
-110
mmdet3d/ops/voxel/src/scatter_points_cuda.cu
mmdet3d/ops/voxel/src/scatter_points_cuda.cu
+6
-6
mmdet3d/ops/voxel/src/voxelization.h
mmdet3d/ops/voxel/src/voxelization.h
+4
-4
mmdet3d/ops/voxel/src/voxelization_cpu.cpp
mmdet3d/ops/voxel/src/voxelization_cpu.cpp
+78
-115
mmdet3d/ops/voxel/src/voxelization_cuda.cu
mmdet3d/ops/voxel/src/voxelization_cuda.cu
+6
-6
mmdet3d/utils/__init__.py
mmdet3d/utils/__init__.py
+3
-2
tests/test_anchor.py
tests/test_anchor.py
+149
-0
tests/test_config.py
tests/test_config.py
+29
-1
tools/create_data.py
tools/create_data.py
+20
-0
tools/data_converter/scannet_converter.py
tools/data_converter/scannet_converter.py
+31
-0
tools/data_converter/scannet_data_utils.py
tools/data_converter/scannet_data_utils.py
+82
-0
tools/data_converter/sunrgbd_converter.py
tools/data_converter/sunrgbd_converter.py
+36
-0
tools/data_converter/sunrgbd_data_utils.py
tools/data_converter/sunrgbd_data_utils.py
+192
-0
No files found.
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
View file @
aec41c7f
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//Points in boxes cpu
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
Modified from
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
Points in boxes cpu
//
Written by Shaoshuai Shi
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
// #define DEBUG
inline
void
lidar_to_local_coords_cpu
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
){
// should rotate pi/2 + alpha to translate LiDAR to local
float
rot_angle
=
rz
+
M_PI
/
2
;
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
inline
void
lidar_to_local_coords_cpu
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
)
{
// should rotate pi/2 + alpha to translate LiDAR to local
float
rot_angle
=
rz
+
M_PI
/
2
;
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
inline
int
check_pt_in_box3d_cpu
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords_cpu
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
inline
int
check_pt_in_box3d_cpu
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords_cpu
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
}
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
){
// params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the
bottom center, each box DO NOT overlaps
// params pts: (npoints, 3) [x, y, z]
in LiDAR coordinate
// params pts_indices: (N, npoints)
CHECK_CONTIGUOUS
(
boxes_tensor
);
CHECK_CONTIGUOUS
(
pts_tensor
);
CHECK_CONTIGUOUS
(
pts_indices_tensor
);
int
boxes_num
=
boxes_tensor
.
size
(
0
);
int
pts_num
=
pts_tensor
.
size
(
0
);
const
float
*
boxes
=
boxes_tensor
.
data
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
<
float
>
();
int
*
pts_indices
=
pts_indices_tensor
.
data
<
int
>
();
float
local_x
=
0
,
local_y
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
for
(
int
j
=
0
;
j
<
pts_num
;
j
++
){
int
cur_in_flag
=
check_pt_in_box3d_cpu
(
pts
+
j
*
3
,
boxes
+
i
*
7
,
local_x
,
local_y
);
pt
s
_in
dices
[
i
*
pts_num
+
j
]
=
cur_in_flag
;
}
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
)
{
// params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the
//
bottom center, each box DO NOT overlaps
params pts: (npoints, 3) [x, y, z]
//
in LiDAR coordinate
params pts_indices: (N, npoints)
CHECK_CONTIGUOUS
(
boxes_tensor
);
CHECK_CONTIGUOUS
(
pts_tensor
);
CHECK_CONTIGUOUS
(
pts_indices_tensor
);
int
boxes_num
=
boxes_tensor
.
size
(
0
);
int
pts_num
=
pts_tensor
.
size
(
0
);
const
float
*
boxes
=
boxes_tensor
.
data
_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
_ptr
<
float
>
();
int
*
pts_indices
=
pts_indices_tensor
.
data
_ptr
<
int
>
();
float
local_x
=
0
,
local_y
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
for
(
int
j
=
0
;
j
<
pts_num
;
j
++
)
{
int
cur_in_flag
=
check_
pt_in
_box3d_cpu
(
pts
+
j
*
3
,
boxes
+
i
*
7
,
local_x
,
local_y
)
;
pts_indices
[
i
*
pts_num
+
j
]
=
cur_in_flag
;
}
}
return
1
;
return
1
;
}
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
View file @
aec41c7f
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//Points in boxes gpu
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
Modified from
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
Points in boxes gpu
//
Written by Shaoshuai Shi
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) \
TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
// #define DEBUG
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
){
// should rotate pi/2 + alpha to translate LiDAR to local
float
rot_angle
=
rz
+
M_PI
/
2
;
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
)
{
// should rotate pi/2 + alpha to translate LiDAR to local
float
rot_angle
=
rz
+
M_PI
/
2
;
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
// bottom center
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
}
__global__
void
points_in_boxes_kernel
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
){
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
int
bs_idx
=
blockIdx
.
y
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
bs_idx
>=
batch_size
||
pt_idx
>=
pts_num
)
return
;
boxes
+=
bs_idx
*
boxes_num
*
7
;
pts
+=
bs_idx
*
pts_num
*
3
+
pt_idx
*
3
;
box_idx_of_points
+=
bs_idx
*
pts_num
+
pt_idx
;
float
local_x
=
0
,
local_y
=
0
;
int
cur_in_flag
=
0
;
for
(
int
k
=
0
;
k
<
boxes_num
;
k
++
){
cur_in_flag
=
check_pt_in_box3d
(
pts
,
boxes
+
k
*
7
,
local_x
,
local_y
);
if
(
cur_in_flag
){
box_idx_of_points
[
0
]
=
k
;
break
;
}
__global__
void
points_in_boxes_kernel
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
)
{
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
int
bs_idx
=
blockIdx
.
y
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
bs_idx
>=
batch_size
||
pt_idx
>=
pts_num
)
return
;
boxes
+=
bs_idx
*
boxes_num
*
7
;
pts
+=
bs_idx
*
pts_num
*
3
+
pt_idx
*
3
;
box_idx_of_points
+=
bs_idx
*
pts_num
+
pt_idx
;
float
local_x
=
0
,
local_y
=
0
;
int
cur_in_flag
=
0
;
for
(
int
k
=
0
;
k
<
boxes_num
;
k
++
)
{
cur_in_flag
=
check_pt_in_box3d
(
pts
,
boxes
+
k
*
7
,
local_x
,
local_y
);
if
(
cur_in_flag
)
{
box_idx_of_points
[
0
]
=
k
;
break
;
}
}
}
void
points_in_boxes_launcher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
){
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
batch_size
);
dim3
threads
(
THREADS_PER_BLOCK
);
points_in_boxes_kernel
<<<
blocks
,
threads
>>>
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
void
points_in_boxes_launcher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
)
{
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
batch_size
);
dim3
threads
(
THREADS_PER_BLOCK
);
points_in_boxes_kernel
<<<
blocks
,
threads
>>>
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
}
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
){
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
)
{
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
CHECK_INPUT
(
boxes_tensor
);
CHECK_INPUT
(
pts_tensor
);
CHECK_INPUT
(
box_idx_of_points_tensor
);
CHECK_INPUT
(
boxes_tensor
);
CHECK_INPUT
(
pts_tensor
);
CHECK_INPUT
(
box_idx_of_points_tensor
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
const
float
*
boxes
=
boxes_tensor
.
data
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
<
float
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data
<
int
>
();
const
float
*
boxes
=
boxes_tensor
.
data
_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
_ptr
<
float
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data
_ptr
<
int
>
();
points_in_boxes_launcher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
points_in_boxes_launcher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
return
1
;
return
1
;
}
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
View file @
aec41c7f
//Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
Modified from
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
RoI-aware point cloud feature pooling
//
Written by Shaoshuai Shi
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
);
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
);
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
at
::
Tensor
grad_in
,
int
pool_method
);
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
);
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
){
// params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
rois
);
CHECK_INPUT
(
pts
);
CHECK_INPUT
(
pts_feature
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
pooled_features
);
int
boxes_num
=
rois
.
size
(
0
);
int
pts_num
=
pts
.
size
(
0
);
int
channels
=
pts_feature
.
size
(
1
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
assert
((
out_x
<
256
)
&&
(
out_y
<
256
)
&&
(
out_z
<
256
));
// we encode index with 8bit
const
float
*
rois_data
=
rois
.
data
<
float
>
();
const
float
*
pts_data
=
pts
.
data
<
float
>
();
const
float
*
pts_feature_data
=
pts_feature
.
data
<
float
>
();
int
*
argmax_data
=
argmax
.
data
<
int
>
();
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data
<
int
>
();
float
*
pooled_features_data
=
pooled_features
.
data
<
float
>
();
roiaware_pool3d_launcher
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
rois_data
,
pts_data
,
pts_feature_data
,
argmax_data
,
pts_idx_of_voxels_data
,
pooled_features_data
,
pool_method
);
return
1
;
#define CHECK_CUDA(x) \
TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
);
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
);
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
at
::
Tensor
grad_in
,
int
pool_method
);
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
);
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
)
{
// params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
rois
);
CHECK_INPUT
(
pts
);
CHECK_INPUT
(
pts_feature
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
pooled_features
);
int
boxes_num
=
rois
.
size
(
0
);
int
pts_num
=
pts
.
size
(
0
);
int
channels
=
pts_feature
.
size
(
1
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
assert
((
out_x
<
256
)
&&
(
out_y
<
256
)
&&
(
out_z
<
256
));
// we encode index with 8bit
const
float
*
rois_data
=
rois
.
data_ptr
<
float
>
();
const
float
*
pts_data
=
pts
.
data_ptr
<
float
>
();
const
float
*
pts_feature_data
=
pts_feature
.
data_ptr
<
float
>
();
int
*
argmax_data
=
argmax
.
data_ptr
<
int
>
();
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data_ptr
<
int
>
();
float
*
pooled_features_data
=
pooled_features
.
data_ptr
<
float
>
();
roiaware_pool3d_launcher
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
rois_data
,
pts_data
,
pts_feature_data
,
argmax_data
,
pts_idx_of_voxels_data
,
pooled_features_data
,
pool_method
);
return
1
;
}
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
at
::
Tensor
grad_in
,
int
pool_method
){
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
grad_out
);
CHECK_INPUT
(
grad_in
);
int
boxes_num
=
pts_idx_of_voxels
.
size
(
0
);
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
channels
=
grad_out
.
size
(
4
);
const
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data
<
int
>
();
const
int
*
argmax_data
=
argmax
.
data
<
int
>
();
const
float
*
grad_out_data
=
grad_out
.
data
<
float
>
();
float
*
grad_in_data
=
grad_in
.
data
<
float
>
();
roiaware_pool3d_backward_launcher
(
boxes_num
,
out_x
,
out_y
,
out_z
,
channels
,
max_pts_each_voxel
,
pts_idx_of_voxels_data
,
argmax_data
,
grad_out_data
,
grad_in_data
,
pool_method
);
return
1
;
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
at
::
Tensor
grad_in
,
int
pool_method
)
{
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
grad_out
);
CHECK_INPUT
(
grad_in
);
int
boxes_num
=
pts_idx_of_voxels
.
size
(
0
);
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
channels
=
grad_out
.
size
(
4
);
const
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data_ptr
<
int
>
();
const
int
*
argmax_data
=
argmax
.
data_ptr
<
int
>
();
const
float
*
grad_out_data
=
grad_out
.
data_ptr
<
float
>
();
float
*
grad_in_data
=
grad_in
.
data_ptr
<
float
>
();
roiaware_pool3d_backward_launcher
(
boxes_num
,
out_x
,
out_y
,
out_z
,
channels
,
max_pts_each_voxel
,
pts_idx_of_voxels_data
,
argmax_data
,
grad_out_data
,
grad_in_data
,
pool_method
);
return
1
;
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"forward"
,
&
roiaware_pool3d_gpu
,
"roiaware pool3d forward (CUDA)"
);
m
.
def
(
"backward"
,
&
roiaware_pool3d_gpu_backward
,
"roiaware pool3d backward (CUDA)"
);
m
.
def
(
"points_in_boxes_gpu"
,
&
points_in_boxes_gpu
,
"points_in_boxes_gpu forward (CUDA)"
);
m
.
def
(
"points_in_boxes_cpu"
,
&
points_in_boxes_cpu
,
"points_in_boxes_cpu forward (CPU)"
);
m
.
def
(
"forward"
,
&
roiaware_pool3d_gpu
,
"roiaware pool3d forward (CUDA)"
);
m
.
def
(
"backward"
,
&
roiaware_pool3d_gpu_backward
,
"roiaware pool3d backward (CUDA)"
);
m
.
def
(
"points_in_boxes_gpu"
,
&
points_in_boxes_gpu
,
"points_in_boxes_gpu forward (CUDA)"
);
m
.
def
(
"points_in_boxes_cpu"
,
&
points_in_boxes_cpu
,
"points_in_boxes_cpu forward (CPU)"
);
}
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
View file @
aec41c7f
...
...
@@ -26,9 +26,10 @@ namespace spconv {
// torch.jit's doc says only support int64, so we need to convert to int32.
template
<
typename
T
>
torch
::
Tensor
fusedIndiceConvBatchNorm
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
bias
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
torch
::
Tensor
fusedIndiceConvBatchNorm
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
bias
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
bool
subM
=
_subM
!=
0
;
bool
inverse
=
_inverse
!=
0
;
auto
device
=
features
.
device
().
type
();
...
...
@@ -37,13 +38,16 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
auto
numInPlanes
=
features
.
size
(
1
);
auto
numOutPlanes
=
filters
.
size
(
ndim
+
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairMaxSizeIter
=
std
::
max_element
(
indicePairNumCpu
.
data
<
int
>
(),
indicePairNumCpu
.
data
<
int
>
()
+
kernelVolume
);
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data
<
int
>
();
auto
indicePairMaxSizeIter
=
std
::
max_element
(
indicePairNumCpu
.
data_ptr
<
int
>
(),
indicePairNumCpu
.
data_ptr
<
int
>
()
+
kernelVolume
);
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data_ptr
<
int
>
();
int
indicePairMaxSize
=
*
indicePairMaxSizeIter
;
/*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
std::vector<int> indicePairNumVec(indicePairNumCpu.data_ptr<int>(),
indicePairNumCpu.data_ptr<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
auto indicePairVecMaxSizeIter = std::max_element(
...
...
@@ -56,46 +60,49 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
// auto indicePairOptions =
// torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());
torch
::
Tensor
output
=
torch
::
zeros
({
numActOut
,
numOutPlanes
},
options
).
copy_
(
bias
);
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
Tensor
output
=
torch
::
zeros
({
numActOut
,
numOutPlanes
},
options
).
copy_
(
bias
);
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
Tensor
outputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numOutPlanes
},
options
);
filters
=
filters
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
if
(
subM
)
{
// the center index of subm conv don't need gather and scatter
// add.
if
(
subM
)
{
// the center index of subm conv don't need gather and scatter
// add.
torch
::
mm_out
(
output
,
features
,
filters
[
indicePairMaxOffset
]);
}
double
totalGatherTime
=
0
;
double
totalGEMMTime
=
0
;
double
totalSAddTime
=
0
;
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
auto
nHot
=
indicePairNumCpu
.
data
_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
||
(
subM
&&
i
==
indicePairMaxOffset
))
{
continue
;
}
// auto timer = spconv::CudaContextTimer<>();
auto
outputBufferBlob
=
torch
::
from_blob
(
outputBuffer
.
data
<
T
>
(),
{
nHot
,
numOutPlanes
},
options
);
auto
inputBufferBlob
=
torch
::
from_blob
(
inputBuffer
.
data
<
T
>
(),
{
nHot
,
numInPlanes
},
options
);
auto
outputBufferBlob
=
torch
::
from_blob
(
outputBuffer
.
data_ptr
<
T
>
(),
{
nHot
,
numOutPlanes
},
options
);
auto
inputBufferBlob
=
torch
::
from_blob
(
inputBuffer
.
data_ptr
<
T
>
(),
{
nHot
,
numInPlanes
},
options
);
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
}
else
{
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
/* slower than SparseGatherFunctor, may due to int->long conversion
auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);
auto indicePairBlob = torch::from_blob(indicePairLong.data<long>(), {nHot},
indicePairOptions);
torch::index_select_out(inputBufferBlob, features, 0,
indicePairBlob);*/
auto indicePairBlob = torch::from_blob(indicePairLong.data_ptr<long>(),
{nHot}, indicePairOptions); torch::index_select_out(inputBufferBlob,
features, 0, indicePairBlob);*/
}
// totalGatherTime += timer.report() / 1000.0;
torch
::
mm_out
(
outputBufferBlob
,
inputBufferBlob
,
filters
[
i
]);
...
...
@@ -105,14 +112,14 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
functor
::
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
}
else
{
functor
::
SparseScatterAddFunctor
<
tv
::
GPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
true
);
TV_CHECK_CUDA_ERR
();
}
// totalSAddTime += timer.report() / 1000.0;
...
...
@@ -122,6 +129,6 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
// std::cout << "scatteradd time " << totalSAddTime << std::endl;
return
output
;
}
}
// namespace spconv
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/pool_ops.h
View file @
aec41c7f
...
...
@@ -24,7 +24,7 @@
namespace
spconv
{
template
<
typename
T
>
torch
::
Tensor
indiceMaxPool
(
torch
::
Tensor
features
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numAct
)
{
torch
::
Tensor
indiceNum
,
int64_t
numAct
)
{
auto
device
=
features
.
device
().
type
();
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
numInPlanes
=
features
.
size
(
1
);
...
...
@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
torch
::
Tensor
output
=
torch
::
zeros
({
numAct
,
numInPlanes
},
options
);
double
totalTime
=
0
;
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
auto
nHot
=
indicePairNumCpu
.
data
_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
)
{
continue
;
}
...
...
@@ -59,18 +59,19 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
template
<
typename
T
>
torch
::
Tensor
indiceMaxPoolBackward
(
torch
::
Tensor
features
,
torch
::
Tensor
outFeatures
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
)
{
torch
::
Tensor
outFeatures
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
)
{
auto
device
=
features
.
device
().
type
();
auto
numInPlanes
=
features
.
size
(
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
options
=
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
torch
::
Tensor
inputGrad
=
torch
::
zeros
(
features
.
sizes
(),
options
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
auto
nHot
=
indicePairNumCpu
.
data
_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
)
{
continue
;
}
...
...
@@ -92,6 +93,6 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
return
inputGrad
;
}
}
// namespace spconv
}
// namespace spconv
#endif
mmdet3d/ops/spconv/include/spconv/spconv_ops.h
View file @
aec41c7f
This diff is collapsed.
Click to expand it.
mmdet3d/ops/spconv/include/torch_utils.h
View file @
aec41c7f
...
...
@@ -13,48 +13,49 @@
// limitations under the License.
#pragma once
#include <tensorview/tensorview.h>
#include <torch/script.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace
tv
{
struct
TorchGPU
:
public
tv
::
GPU
{
struct
TorchGPU
:
public
tv
::
GPU
{
virtual
cudaStream_t
getStream
()
const
override
{
return
at
::
cuda
::
getCurrentCUDAStream
();
}
};
template
<
typename
T
>
void
check_torch_dtype
(
const
torch
::
Tensor
&
tensor
)
{
template
<
typename
T
>
void
check_torch_dtype
(
const
torch
::
Tensor
&
tensor
)
{
switch
(
tensor
.
type
().
scalarType
())
{
case
at
::
ScalarType
::
Double
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
double
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Float
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
float
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Int
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
int
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Half
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
at
::
Half
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Long
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
long
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
default:
TV_ASSERT_RT_ERR
(
false
,
"error"
);
case
at
::
ScalarType
::
Double
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
double
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Float
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
float
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Int
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
int
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Half
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
at
::
Half
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
case
at
::
ScalarType
::
Long
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
long
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
}
default:
TV_ASSERT_RT_ERR
(
false
,
"error"
);
}
}
...
...
@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
for
(
auto
i
:
tensor
.
sizes
())
{
shape
.
push_back
(
i
);
}
return
tv
::
TensorView
<
T
>
(
tensor
.
data
<
std
::
remove_const_t
<
T
>>
(),
shape
);
return
tv
::
TensorView
<
T
>
(
tensor
.
data
_ptr
<
std
::
remove_const_t
<
T
>>
(),
shape
);
}
}
// namespace tv
}
// namespace tv
mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
View file @
aec41c7f
#include <torch/extension.h>
#include <ATen/TensorUtils.h>
#include <torch/extension.h>
// #include "voxelization.h"
namespace
{
template
<
typename
T_int
>
void
determin_max_points_kernel
(
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
int
&
max_points
,
const
int
num_points
)
{
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
// record voxel
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxel_num
+=
1
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
}
// put points into voxel
num
=
num_points_per_voxel
[
voxelidx
];
point_to_voxelidx
[
i
]
=
num
;
num_points_per_voxel
[
voxelidx
]
+=
1
;
// update max points per voxel
max_points
=
std
::
max
(
max_points
,
num
+
1
);
void
determin_max_points_kernel
(
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
int
&
max_points
,
const
int
num_points
)
{
int
voxelidx
,
num
;
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
// record voxel
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxel_num
+=
1
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
}
return
;
}
// put points into voxel
num
=
num_points_per_voxel
[
voxelidx
];
point_to_voxelidx
[
i
]
=
num
;
num_points_per_voxel
[
voxelidx
]
+=
1
;
// update max points per voxel
max_points
=
std
::
max
(
max_points
,
num
+
1
);
}
return
;
}
template
<
typename
T
,
typename
T_int
>
void
scatter_point_to_voxel_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
voxel_coors
,
const
int
num_features
,
const
int
num_points
,
const
int
NDim
){
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
int
num
=
point_to_voxelidx
[
i
];
int
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels
[
voxelidx
][
num
][
k
]
=
points
[
i
][
k
];
}
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
voxel_coors
[
voxelidx
][
k
]
=
coor
[
i
][
k
];
}
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
voxel_coors
,
const
int
num_features
,
const
int
num_points
,
const
int
NDim
)
{
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
int
num
=
point_to_voxelidx
[
i
];
int
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
voxels
[
voxelidx
][
num
][
k
]
=
points
[
i
][
k
];
}
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
voxel_coors
[
voxelidx
][
k
]
=
coor
[
i
][
k
];
}
}
}
}
// namespace
}
// namespace
namespace
voxelization
{
std
::
vector
<
at
::
Tensor
>
dynamic_point_to_voxel_cpu
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_mapping
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
)
{
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
const
int
NDim
=
voxel_mapping
.
size
(
1
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
std
::
vector
<
int
>
grid_size
(
NDim
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
at
::
Tensor
num_points_per_voxel
=
at
::
zeros
({
num_points
,},
voxel_mapping
.
options
());
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
voxel_mapping
.
options
());
at
::
Tensor
point_to_voxelidx
=
-
at
::
ones
({
num_points
,},
voxel_mapping
.
options
());
int
voxel_num
=
0
;
int
max_points
=
0
;
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
type
(),
"determin_max_point"
,
[
&
]
{
determin_max_points_kernel
<
scalar_t
>
(
voxel_mapping
.
accessor
<
scalar_t
,
2
>
(),
point_to_voxelidx
.
accessor
<
scalar_t
,
1
>
(),
num_points_per_voxel
.
accessor
<
scalar_t
,
1
>
(),
coor_to_voxelidx
.
accessor
<
scalar_t
,
3
>
(),
voxel_num
,
max_points
,
num_points
);
});
at
::
Tensor
voxels
=
at
::
zeros
({
voxel_num
,
max_points
,
num_features
},
points
.
options
());
at
::
Tensor
voxel_coors
=
at
::
zeros
({
voxel_num
,
NDim
},
points
.
options
().
dtype
(
at
::
kInt
));
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"scatter_point_to_voxel"
,
[
&
]
{
scatter_point_to_voxel_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
voxel_mapping
.
accessor
<
int
,
2
>
(),
point_to_voxelidx
.
accessor
<
int
,
1
>
(),
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
voxel_coors
.
accessor
<
int
,
2
>
(),
num_features
,
num_points
,
NDim
);
});
at
::
Tensor
num_points_per_voxel_out
=
num_points_per_voxel
.
slice
(
/*dim=*/
0
,
/*start=*/
0
,
/*end=*/
voxel_num
);
return
{
voxels
,
voxel_coors
,
num_points_per_voxel_out
};
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_mapping
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
)
{
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
const
int
NDim
=
voxel_mapping
.
size
(
1
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
std
::
vector
<
int
>
grid_size
(
NDim
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
at
::
Tensor
num_points_per_voxel
=
at
::
zeros
(
{
num_points
,
},
voxel_mapping
.
options
());
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
(
{
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
voxel_mapping
.
options
());
at
::
Tensor
point_to_voxelidx
=
-
at
::
ones
(
{
num_points
,
},
voxel_mapping
.
options
());
int
voxel_num
=
0
;
int
max_points
=
0
;
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
scalar_type
(),
"determin_max_point"
,
[
&
]
{
determin_max_points_kernel
<
scalar_t
>
(
voxel_mapping
.
accessor
<
scalar_t
,
2
>
(),
point_to_voxelidx
.
accessor
<
scalar_t
,
1
>
(),
num_points_per_voxel
.
accessor
<
scalar_t
,
1
>
(),
coor_to_voxelidx
.
accessor
<
scalar_t
,
3
>
(),
voxel_num
,
max_points
,
num_points
);
});
at
::
Tensor
voxels
=
at
::
zeros
({
voxel_num
,
max_points
,
num_features
},
points
.
options
());
at
::
Tensor
voxel_coors
=
at
::
zeros
({
voxel_num
,
NDim
},
points
.
options
().
dtype
(
at
::
kInt
));
AT_DISPATCH_ALL_TYPES
(
points
.
scalar_type
(),
"scatter_point_to_voxel"
,
[
&
]
{
scatter_point_to_voxel_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
voxel_mapping
.
accessor
<
int
,
2
>
(),
point_to_voxelidx
.
accessor
<
int
,
1
>
(),
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
voxel_coors
.
accessor
<
int
,
2
>
(),
num_features
,
num_points
,
NDim
);
});
at
::
Tensor
num_points_per_voxel_out
=
num_points_per_voxel
.
slice
(
/*dim=*/
0
,
/*start=*/
0
,
/*end=*/
voxel_num
);
return
{
voxels
,
voxel_coors
,
num_points_per_voxel_out
};
}
}
}
// namespace voxelization
mmdet3d/ops/voxel/src/scatter_points_cuda.cu
View file @
aec41c7f
...
...
@@ -6,7 +6,7 @@
#include <ATen/cuda/CUDAApplyUtils.cuh>
#define CHECK_CUDA(x) \
TORCH_CHECK(x.
typ
e().is_cuda(), #x " must be a CUDA tensor")
TORCH_CHECK(x.
devic
e().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
...
...
@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3
threads
(
threadsPerBlock
);
cudaStream_t
map_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
type
(),
"determin_duplicate"
,
([
&
]
{
voxel_mapping
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
point_to_voxelidx_kernel
<
int
><<<
blocks
,
threads
,
0
,
map_stream
>>>
(
voxel_mapping
.
data_ptr
<
int
>
(),
point_to_voxelidx
.
data_ptr
<
int
>
(),
point_to_pointidx
.
data_ptr
<
int
>
(),
num_points
,
NDim
);
...
...
@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
voxel_mapping
.
options
());
// must be zero from the begining
cudaStream_t
logic_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
type
(),
"determin_duplicate"
,
([
&
]
{
voxel_mapping
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
determin_voxel_num
<
int
><<<
1
,
1
,
0
,
logic_stream
>>>
(
voxel_mapping
.
data_ptr
<
int
>
(),
num_points_per_voxel
.
data_ptr
<
int
>
(),
point_to_voxelidx
.
data_ptr
<
int
>
(),
...
...
@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3
cp_threads
(
threadsPerBlock
,
4
);
cudaStream_t
cp_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"scatter_point_to_voxel"
,
([
&
]
{
points
.
scalar_
type
(),
"scatter_point_to_voxel"
,
([
&
]
{
scatter_point_to_voxel_kernel
<
float
,
int
>
<<<
blocks
,
cp_threads
,
0
,
cp_stream
>>>
(
points
.
data_ptr
<
float
>
(),
voxel_mapping
.
data_ptr
<
int
>
(),
...
...
@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points,
dim3
blocks
(
col_blocks
);
dim3
cp_threads
(
threadsPerBlock
,
4
);
cudaStream_t
cp_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
grad_input_points
.
type
(),
"scatter_point_to_voxel"
,
([
&
]
{
AT_DISPATCH_ALL_TYPES
(
grad_input_points
.
scalar_type
()
,
"scatter_point_to_voxel"
,
([
&
]
{
map_voxel_to_point_kernel
<
float
,
int
>
<<<
blocks
,
cp_threads
,
0
,
cp_stream
>>>
(
grad_input_points
.
data_ptr
<
float
>
(),
...
...
mmdet3d/ops/voxel/src/voxelization.h
View file @
aec41c7f
...
...
@@ -49,7 +49,7 @@ inline int hard_voxelize(const at::Tensor& points, at::Tensor& voxels,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
if
(
points
.
typ
e
().
is_cuda
())
{
if
(
points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
return
hard_voxelize_gpu
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size
,
coors_range
,
max_points
,
max_voxels
,
...
...
@@ -67,7 +67,7 @@ inline void dynamic_voxelize(const at::Tensor& points, at::Tensor& coors,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
if
(
points
.
typ
e
().
is_cuda
())
{
if
(
points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
return
dynamic_voxelize_gpu
(
points
,
coors
,
voxel_size
,
coors_range
,
NDim
);
#else
...
...
@@ -80,7 +80,7 @@ inline void dynamic_voxelize(const at::Tensor& points, at::Tensor& coors,
inline
std
::
vector
<
torch
::
Tensor
>
dynamic_point_to_voxel_forward
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_mapping
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
)
{
if
(
points
.
typ
e
().
is_cuda
())
{
if
(
points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
return
dynamic_point_to_voxel_forward_gpu
(
points
,
voxel_mapping
,
voxel_size
,
coors_range
);
...
...
@@ -95,7 +95,7 @@ inline std::vector<torch::Tensor> dynamic_point_to_voxel_forward(
inline
void
dynamic_point_to_voxel_backward
(
at
::
Tensor
&
grad_input_points
,
const
at
::
Tensor
&
grad_output_voxels
,
const
at
::
Tensor
&
point_to_voxelidx
,
const
at
::
Tensor
&
coor_to_voxelidx
)
{
if
(
grad_input_points
.
typ
e
().
is_cuda
())
{
if
(
grad_input_points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
return
dynamic_point_to_voxel_backward_gpu
(
grad_input_points
,
grad_output_voxels
,
point_to_voxelidx
,
...
...
mmdet3d/ops/voxel/src/voxelization_cpu.cpp
View file @
aec41c7f
#include <torch/extension.h>
#include <ATen/TensorUtils.h>
#include <torch/extension.h>
// #include "voxelization.h"
namespace
{
template
<
typename
T
,
typename
T_int
>
void
dynamic_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
void
dynamic_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
int
>
grid_size
,
const
int
num_points
,
const
int
num_features
,
const
int
NDim
)
{
const
int
num_points
,
const
int
num_features
,
const
int
NDim
)
{
const
int
ndim_minus_1
=
NDim
-
1
;
bool
failed
=
false
;
int
coor
[
NDim
];
...
...
@@ -44,56 +40,42 @@ void dynamic_voxelize_kernel(const torch::TensorAccessor<T,2> points,
return
;
}
template
<
typename
T
,
typename
T_int
>
void
hard_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
const
std
::
vector
<
float
>
voxel_size
,
void
hard_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
int
>
grid_size
,
const
int
max_points
,
const
int
max_voxels
,
const
int
num_points
,
const
int
num_features
,
const
int
NDim
)
{
const
int
max_points
,
const
int
max_voxels
,
const
int
num_points
,
const
int
num_features
,
const
int
NDim
)
{
// declare a temp coors
at
::
Tensor
temp_coors
=
at
::
zeros
({
num_points
,
NDim
},
at
::
TensorOptions
().
dtype
(
at
::
kInt
).
device
(
at
::
kCPU
));
at
::
Tensor
temp_coors
=
at
::
zeros
(
{
num_points
,
NDim
},
at
::
TensorOptions
().
dtype
(
at
::
kInt
).
device
(
at
::
kCPU
));
// First use dynamic voxelization to get coors,
// then check max points/voxels constraints
dynamic_voxelize_kernel
<
T
,
int
>
(
points
,
temp_coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
dynamic_voxelize_kernel
<
T
,
int
>
(
points
,
temp_coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
int
voxelidx
,
num
;
auto
coor
=
temp_coors
.
accessor
<
int
,
2
>
();
auto
coor
=
temp_coors
.
accessor
<
int
,
2
>
();
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim;
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
// record voxel
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
if
(
max_voxels
!=
-
1
&&
voxel_num
>=
max_voxels
)
break
;
if
(
max_voxels
!=
-
1
&&
voxel_num
>=
max_voxels
)
break
;
voxel_num
+=
1
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
...
...
@@ -116,93 +98,74 @@ void hard_voxelize_kernel(const torch::TensorAccessor<T,2> points,
return
;
}
}
// namespace
}
// namespace
namespace
voxelization
{
int
hard_voxelize_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
int
hard_voxelize_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
//printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2], grid_size[1], grid_size[0]);
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
coors
.
options
());
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
// printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2],
// grid_size[1], grid_size[0]);
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
coors
.
options
());
int
voxel_num
=
0
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
type
(),
"hard_voxelize_forward"
,
[
&
]
{
int
voxel_num
=
0
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
scalar_type
(),
"hard_voxelize_forward"
,
[
&
]
{
hard_voxelize_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
coors
.
accessor
<
int
,
2
>
(),
num_points_per_voxel
.
accessor
<
int
,
1
>
(),
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxel_num
,
voxel_size
,
coors_range
,
grid_size
,
max_points
,
max_voxels
,
num_points
,
num_features
,
NDim
);
});
return
voxel_num
;
points
.
accessor
<
scalar_t
,
2
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
coors
.
accessor
<
int
,
2
>
(),
num_points_per_voxel
.
accessor
<
int
,
1
>
(),
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxel_num
,
voxel_size
,
coors_range
,
grid_size
,
max_points
,
max_voxels
,
num_points
,
num_features
,
NDim
);
});
return
voxel_num
;
}
void
dynamic_voxelize_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
void
dynamic_voxelize_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
type
(),
"hard_voxelize_forward"
,
[
&
]
{
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
scalar_type
(),
"hard_voxelize_forward"
,
[
&
]
{
dynamic_voxelize_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
});
return
;
}
points
.
accessor
<
scalar_t
,
2
>
(),
coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
});
return
;
}
}
// namespace voxelization
mmdet3d/ops/voxel/src/voxelization_cuda.cu
View file @
aec41c7f
...
...
@@ -6,7 +6,7 @@
#include <ATen/cuda/CUDAApplyUtils.cuh>
#define CHECK_CUDA(x) \
TORCH_CHECK(x.
typ
e().is_cuda(), #x " must be a CUDA tensor")
TORCH_CHECK(x.
devic
e().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
...
...
@@ -219,7 +219,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
// 1. link point to corresponding voxel coors
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"hard_voxelize_kernel"
,
([
&
]
{
points
.
scalar_
type
(),
"hard_voxelize_kernel"
,
([
&
]
{
dynamic_voxelize_kernel
<
scalar_t
,
int
>
<<<
grid
,
block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
points
.
contiguous
().
data_ptr
<
scalar_t
>
(),
...
...
@@ -247,7 +247,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
dim3
map_grid
(
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
num_points
,
512
),
4096
));
dim3
map_block
(
512
);
AT_DISPATCH_ALL_TYPES
(
temp_coors
.
type
(),
"determin_duplicate"
,
([
&
]
{
temp_coors
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
point_to_voxelidx_kernel
<
int
>
<<<
map_grid
,
map_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
temp_coors
.
contiguous
().
data_ptr
<
int
>
(),
...
...
@@ -272,7 +272,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
points
.
options
().
dtype
(
at
::
kInt
));
// must be zero from the begining
AT_DISPATCH_ALL_TYPES
(
temp_coors
.
type
(),
"determin_duplicate"
,
([
&
]
{
temp_coors
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
determin_voxel_num
<
int
><<<
1
,
1
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
num_points_per_voxel
.
contiguous
().
data_ptr
<
int
>
(),
point_to_voxelidx
.
contiguous
().
data_ptr
<
int
>
(),
...
...
@@ -290,7 +290,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
dim3
cp_grid
(
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
pts_output_size
,
512
),
4096
));
dim3
cp_block
(
512
);
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"assign_point_to_voxel"
,
([
&
]
{
points
.
scalar_
type
(),
"assign_point_to_voxel"
,
([
&
]
{
assign_point_to_voxel
<
float
,
int
>
<<<
cp_grid
,
cp_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
pts_output_size
,
points
.
contiguous
().
data_ptr
<
float
>
(),
...
...
@@ -308,7 +308,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
coors_output_size
,
512
),
4096
));
dim3
coors_cp_block
(
512
);
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"assign_point_to_voxel"
,
([
&
]
{
points
.
scalar_
type
(),
"assign_point_to_voxel"
,
([
&
]
{
assign_voxel_coors
<
float
,
int
><<<
coors_cp_grid
,
coors_cp_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
coors_output_size
,
temp_coors
.
contiguous
().
data_ptr
<
int
>
(),
...
...
mmdet3d/utils/__init__.py
View file @
aec41c7f
from
mmdet.utils
import
(
Registry
,
build_from_cfg
,
get_model_complexity_info
,
get_root_logger
,
print_log
)
from
mmcv.utils
import
Registry
,
build_from_cfg
from
mmdet.utils
import
get_model_complexity_info
,
get_root_logger
,
print_log
from
.collect_env
import
collect_env
__all__
=
[
...
...
tests/test_anchor.py
0 → 100644
View file @
aec41c7f
"""
CommandLine:
pytest tests/test_anchor.py
xdoctest tests/test_anchor.py zero
"""
import
torch
def
test_aligned_anchor_generator
():
from
mmdet3d.core.anchor
import
build_anchor_generator
if
torch
.
cuda
.
is_available
():
device
=
'cuda'
else
:
device
=
'cpu'
anchor_generator_cfg
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
51.2
,
-
51.2
,
-
1.80
,
51.2
,
51.2
,
-
1.80
]],
strides
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
size_per_range
=
False
,
reshape_out
=
True
)
featmap_sizes
=
[(
256
,
256
),
(
128
,
128
),
(
64
,
64
)]
anchor_generator
=
build_anchor_generator
(
anchor_generator_cfg
)
assert
anchor_generator
.
num_base_anchors
==
8
# check base anchors
expected_grid_anchors
=
[
torch
.
tensor
([[
-
51.0000
,
-
51.0000
,
-
1.8000
,
0.8660
,
2.5981
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
51.0000
,
-
51.0000
,
-
1.8000
,
0.4000
,
0.4000
,
1.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
50.6000
,
-
51.0000
,
-
1.8000
,
0.4000
,
0.4000
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
50.2000
,
-
51.0000
,
-
1.8000
,
1.0000
,
1.0000
,
1.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
49.8000
,
-
51.0000
,
-
1.8000
,
1.0000
,
1.0000
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
49.4000
,
-
51.0000
,
-
1.8000
,
0.5774
,
1.7321
,
1.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
49.0000
,
-
51.0000
,
-
1.8000
,
0.5774
,
1.7321
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
48.6000
,
-
51.0000
,
-
1.8000
,
0.8660
,
2.5981
,
1.0000
,
1.5700
,
0.0000
,
0.0000
]],
device
=
device
),
torch
.
tensor
([[
-
50.8000
,
-
50.8000
,
-
1.8000
,
1.7320
,
5.1962
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
50.8000
,
-
50.8000
,
-
1.8000
,
0.8000
,
0.8000
,
2.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
50.0000
,
-
50.8000
,
-
1.8000
,
0.8000
,
0.8000
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
49.2000
,
-
50.8000
,
-
1.8000
,
2.0000
,
2.0000
,
2.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
48.4000
,
-
50.8000
,
-
1.8000
,
2.0000
,
2.0000
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
47.6000
,
-
50.8000
,
-
1.8000
,
1.1548
,
3.4642
,
2.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
46.8000
,
-
50.8000
,
-
1.8000
,
1.1548
,
3.4642
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
46.0000
,
-
50.8000
,
-
1.8000
,
1.7320
,
5.1962
,
2.0000
,
1.5700
,
0.0000
,
0.0000
]],
device
=
device
),
torch
.
tensor
([[
-
50.4000
,
-
50.4000
,
-
1.8000
,
3.4640
,
10.3924
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
50.4000
,
-
50.4000
,
-
1.8000
,
1.6000
,
1.6000
,
4.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
48.8000
,
-
50.4000
,
-
1.8000
,
1.6000
,
1.6000
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
47.2000
,
-
50.4000
,
-
1.8000
,
4.0000
,
4.0000
,
4.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
45.6000
,
-
50.4000
,
-
1.8000
,
4.0000
,
4.0000
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
44.0000
,
-
50.4000
,
-
1.8000
,
2.3096
,
6.9284
,
4.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
42.4000
,
-
50.4000
,
-
1.8000
,
2.3096
,
6.9284
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
40.8000
,
-
50.4000
,
-
1.8000
,
3.4640
,
10.3924
,
4.0000
,
1.5700
,
0.0000
,
0.0000
]],
device
=
device
)
]
multi_level_anchors
=
anchor_generator
.
grid_anchors
(
featmap_sizes
,
device
=
device
)
expected_multi_level_shapes
=
[
torch
.
Size
([
524288
,
9
]),
torch
.
Size
([
131072
,
9
]),
torch
.
Size
([
32768
,
9
])
]
for
i
,
single_level_anchor
in
enumerate
(
multi_level_anchors
):
assert
single_level_anchor
.
shape
==
expected_multi_level_shapes
[
i
]
# set [:56:7] thus it could cover 8 (len(size) * len(rotations))
# anchors on 8 location
assert
single_level_anchor
[:
56
:
7
].
allclose
(
expected_grid_anchors
[
i
])
tests/test_config.py
View file @
aec41c7f
...
...
@@ -70,6 +70,34 @@ def test_config_build_detector():
# _check_bbox_head(head_config, detector.bbox_head)
def
test_config_build_pipeline
():
"""
Test that all detection models defined in the configs can be initialized.
"""
from
mmcv
import
Config
from
mmdet3d.datasets.pipelines
import
Compose
config_dpath
=
_get_config_directory
()
print
(
'Found config_dpath = {!r}'
.
format
(
config_dpath
))
# Other configs needs database sampler.
config_names
=
[
'nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py'
,
]
print
(
'Using {} config files'
.
format
(
len
(
config_names
)))
for
config_fname
in
config_names
:
config_fpath
=
join
(
config_dpath
,
config_fname
)
config_mod
=
Config
.
fromfile
(
config_fpath
)
# build train_pipeline
train_pipeline
=
Compose
(
config_mod
.
train_pipeline
)
test_pipeline
=
Compose
(
config_mod
.
test_pipeline
)
assert
train_pipeline
is
not
None
assert
test_pipeline
is
not
None
def
test_config_data_pipeline
():
"""
Test whether the data pipeline is valid and can process corner cases.
...
...
@@ -77,7 +105,7 @@ def test_config_data_pipeline():
xdoctest -m tests/test_config.py test_config_build_data_pipeline
"""
from
mmcv
import
Config
from
mmdet.datasets.pipelines
import
Compose
from
mmdet
3d
.datasets.pipelines
import
Compose
import
numpy
as
np
config_dpath
=
_get_config_directory
()
...
...
tools/create_data.py
View file @
aec41c7f
...
...
@@ -3,6 +3,8 @@ import os.path as osp
import
tools.data_converter.kitti_converter
as
kitti
import
tools.data_converter.nuscenes_converter
as
nuscenes_converter
import
tools.data_converter.scannet_converter
as
scannet
import
tools.data_converter.sunrgbd_converter
as
sunrgbd
from
tools.data_converter.create_gt_database
import
create_groundtruth_database
...
...
@@ -43,6 +45,14 @@ def nuscenes_data_prep(root_path,
'{}/{}_infos_train.pkl'
.
format
(
out_dir
,
info_prefix
))
def
scannet_data_prep
(
root_path
,
info_prefix
,
out_dir
):
scannet
.
create_scannet_info_file
(
root_path
,
info_prefix
,
out_dir
)
def
sunrgbd_data_prep
(
root_path
,
info_prefix
,
out_dir
):
sunrgbd
.
create_sunrgbd_info_file
(
root_path
,
info_prefix
,
out_dir
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Data converter arg parser'
)
parser
.
add_argument
(
'dataset'
,
metavar
=
'kitti'
,
help
=
'name of the dataset'
)
parser
.
add_argument
(
...
...
@@ -104,3 +114,13 @@ if __name__ == '__main__':
dataset_name
=
'NuScenesDataset'
,
out_dir
=
args
.
out_dir
,
max_sweeps
=
args
.
max_sweeps
)
elif
args
.
dataset
==
'scannet'
:
scannet_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
)
elif
args
.
dataset
==
'sunrgbd'
:
sunrgbd_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
)
tools/data_converter/scannet_converter.py
0 → 100644
View file @
aec41c7f
import
os
import
pickle
from
pathlib
import
Path
from
tools.data_converter.scannet_data_utils
import
ScanNetData
def
create_scannet_info_file
(
data_path
,
pkl_prefix
=
'scannet'
,
save_path
=
None
):
assert
os
.
path
.
exists
(
data_path
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
assert
os
.
path
.
exists
(
save_path
)
train_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
val_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
train_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'train'
)
val_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'val'
)
scannet_infos_train
=
train_dataset
.
get_scannet_infos
(
has_label
=
True
)
with
open
(
train_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
scannet_infos_train
,
f
)
print
(
'Scannet info train file is saved to %s'
%
train_filename
)
scannet_infos_val
=
val_dataset
.
get_scannet_infos
(
has_label
=
True
)
with
open
(
val_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
scannet_infos_val
,
f
)
print
(
'Scannet info val file is saved to %s'
%
val_filename
)
if
__name__
==
'__main__'
:
create_scannet_info_file
(
data_path
=
'./data/scannet'
,
save_path
=
'./data/scannet'
)
tools/data_converter/scannet_data_utils.py
0 → 100644
View file @
aec41c7f
import
os
import
numpy
as
np
class
ScanNetData
(
object
):
''' Load and parse object data '''
def
__init__
(
self
,
root_path
,
split
=
'train'
):
self
.
root_dir
=
root_path
self
.
split
=
split
self
.
split_dir
=
os
.
path
.
join
(
root_path
)
self
.
classes
=
[
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
]
self
.
cat2label
=
{
cat
:
self
.
classes
.
index
(
cat
)
for
cat
in
self
.
classes
}
self
.
label2cat
=
{
self
.
cat2label
[
t
]:
t
for
t
in
self
.
cat2label
}
self
.
cat_ids
=
np
.
array
(
[
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
])
self
.
cat_ids2class
=
{
nyu40id
:
i
for
i
,
nyu40id
in
enumerate
(
list
(
self
.
cat_ids
))
}
assert
split
in
[
'train'
,
'val'
,
'test'
]
split_dir
=
os
.
path
.
join
(
self
.
root_dir
,
'meta_data'
,
'scannetv2_%s.txt'
%
split
)
self
.
sample_id_list
=
[
x
.
strip
()
for
x
in
open
(
split_dir
).
readlines
()
]
if
os
.
path
.
exists
(
split_dir
)
else
None
def
__len__
(
self
):
return
len
(
self
.
sample_id_list
)
def
get_box_label
(
self
,
idx
):
box_file
=
os
.
path
.
join
(
self
.
root_dir
,
'scannet_train_instance_data'
,
'%s_bbox.npy'
%
idx
)
assert
os
.
path
.
exists
(
box_file
)
return
np
.
load
(
box_file
)
def
get_scannet_infos
(
self
,
num_workers
=
4
,
has_label
=
True
,
sample_id_list
=
None
):
import
concurrent.futures
as
futures
def
process_single_scene
(
sample_idx
):
print
(
'%s sample_idx: %s'
%
(
self
.
split
,
sample_idx
))
info
=
dict
()
pc_info
=
{
'num_features'
:
6
,
'lidar_idx'
:
sample_idx
}
info
[
'point_cloud'
]
=
pc_info
if
has_label
:
annotations
=
{}
boxes_with_classes
=
self
.
get_box_label
(
sample_idx
)
# k, 6 + class
annotations
[
'gt_num'
]
=
boxes_with_classes
.
shape
[
0
]
if
annotations
[
'gt_num'
]
!=
0
:
minmax_boxes3d
=
boxes_with_classes
[:,
:
-
1
]
# k, 6
classes
=
boxes_with_classes
[:,
-
1
]
# k, 1
annotations
[
'name'
]
=
np
.
array
([
self
.
label2cat
[
self
.
cat_ids2class
[
classes
[
i
]]]
for
i
in
range
(
annotations
[
'gt_num'
])
])
annotations
[
'location'
]
=
minmax_boxes3d
[:,
:
3
]
annotations
[
'dimensions'
]
=
minmax_boxes3d
[:,
3
:
6
]
annotations
[
'gt_boxes_upright_depth'
]
=
minmax_boxes3d
annotations
[
'index'
]
=
np
.
arange
(
annotations
[
'gt_num'
],
dtype
=
np
.
int32
)
annotations
[
'class'
]
=
np
.
array
([
self
.
cat_ids2class
[
classes
[
i
]]
for
i
in
range
(
annotations
[
'gt_num'
])
])
info
[
'annos'
]
=
annotations
return
info
sample_id_list
=
sample_id_list
if
sample_id_list
is
not
None
\
else
self
.
sample_id_list
with
futures
.
ThreadPoolExecutor
(
num_workers
)
as
executor
:
infos
=
executor
.
map
(
process_single_scene
,
sample_id_list
)
return
list
(
infos
)
tools/data_converter/sunrgbd_converter.py
0 → 100644
View file @
aec41c7f
import
os
import
pickle
from
pathlib
import
Path
from
tools.data_converter.sunrgbd_data_utils
import
SUNRGBDData
def
create_sunrgbd_info_file
(
data_path
,
pkl_prefix
=
'sunrgbd'
,
save_path
=
None
,
use_v1
=
False
):
assert
os
.
path
.
exists
(
data_path
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
assert
os
.
path
.
exists
(
save_path
)
train_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
val_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
train_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'train'
,
use_v1
=
use_v1
)
val_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'val'
,
use_v1
=
use_v1
)
sunrgbd_infos_train
=
train_dataset
.
get_sunrgbd_infos
(
has_label
=
True
)
with
open
(
train_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
sunrgbd_infos_train
,
f
)
print
(
'Sunrgbd info train file is saved to %s'
%
train_filename
)
sunrgbd_infos_val
=
val_dataset
.
get_sunrgbd_infos
(
has_label
=
True
)
with
open
(
val_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
sunrgbd_infos_val
,
f
)
print
(
'Sunrgbd info val file is saved to %s'
%
val_filename
)
if
__name__
==
'__main__'
:
create_sunrgbd_info_file
(
data_path
=
'./data/sunrgbd/sunrgbd_trainval'
,
save_path
=
'./data/sunrgbd'
)
tools/data_converter/sunrgbd_data_utils.py
0 → 100644
View file @
aec41c7f
import
os
import
cv2
import
numpy
as
np
import
scipy.io
as
sio
def
random_sampling
(
pc
,
num_sample
,
replace
=
None
,
return_choices
=
False
):
""" Input is NxC, output is num_samplexC
"""
if
replace
is
None
:
replace
=
(
pc
.
shape
[
0
]
<
num_sample
)
choices
=
np
.
random
.
choice
(
pc
.
shape
[
0
],
num_sample
,
replace
=
replace
)
if
return_choices
:
return
pc
[
choices
],
choices
else
:
return
pc
[
choices
]
class
SUNRGBDInstance
(
object
):
def
__init__
(
self
,
line
):
data
=
line
.
split
(
' '
)
data
[
1
:]
=
[
float
(
x
)
for
x
in
data
[
1
:]]
self
.
classname
=
data
[
0
]
self
.
xmin
=
data
[
1
]
self
.
ymin
=
data
[
2
]
self
.
xmax
=
data
[
1
]
+
data
[
3
]
self
.
ymax
=
data
[
2
]
+
data
[
4
]
self
.
box2d
=
np
.
array
([
self
.
xmin
,
self
.
ymin
,
self
.
xmax
,
self
.
ymax
])
self
.
centroid
=
np
.
array
([
data
[
5
],
data
[
6
],
data
[
7
]])
self
.
w
=
data
[
8
]
self
.
l
=
data
[
9
]
# noqa: E741
self
.
h
=
data
[
10
]
self
.
orientation
=
np
.
zeros
((
3
,
))
self
.
orientation
[
0
]
=
data
[
11
]
self
.
orientation
[
1
]
=
data
[
12
]
self
.
heading_angle
=
-
1
*
np
.
arctan2
(
self
.
orientation
[
1
],
self
.
orientation
[
0
])
self
.
box3d
=
np
.
concatenate
([
self
.
centroid
,
np
.
array
([
self
.
l
*
2
,
self
.
w
*
2
,
self
.
h
*
2
,
self
.
heading_angle
])
])
class
SUNRGBDData
(
object
):
''' Load and parse object data '''
def
__init__
(
self
,
root_path
,
split
=
'train'
,
use_v1
=
False
):
self
.
root_dir
=
root_path
self
.
split
=
split
self
.
split_dir
=
os
.
path
.
join
(
root_path
)
self
.
classes
=
[
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
]
self
.
cat2label
=
{
cat
:
self
.
classes
.
index
(
cat
)
for
cat
in
self
.
classes
}
self
.
label2cat
=
{
label
:
self
.
classes
[
label
]
for
label
in
range
(
len
(
self
.
classes
))
}
assert
split
in
[
'train'
,
'val'
,
'test'
]
split_dir
=
os
.
path
.
join
(
self
.
root_dir
,
'%s_data_idx.txt'
%
split
)
self
.
sample_id_list
=
[
int
(
x
.
strip
())
for
x
in
open
(
split_dir
).
readlines
()
]
if
os
.
path
.
exists
(
split_dir
)
else
None
self
.
image_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'image'
)
self
.
calib_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'calib'
)
self
.
depth_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'depth'
)
if
use_v1
:
self
.
label_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'label_v1'
)
else
:
self
.
label_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'label'
)
def
__len__
(
self
):
return
len
(
self
.
sample_id_list
)
def
get_image
(
self
,
idx
):
img_filename
=
os
.
path
.
join
(
self
.
image_dir
,
'%06d.jpg'
%
(
idx
))
return
cv2
.
imread
(
img_filename
)
def
get_image_shape
(
self
,
idx
):
image
=
self
.
get_image
(
idx
)
return
np
.
array
(
image
.
shape
[:
2
],
dtype
=
np
.
int32
)
def
get_depth
(
self
,
idx
):
depth_filename
=
os
.
path
.
join
(
self
.
depth_dir
,
'%06d.mat'
%
(
idx
))
depth
=
sio
.
loadmat
(
depth_filename
)[
'instance'
]
return
depth
def
get_calibration
(
self
,
idx
):
calib_filepath
=
os
.
path
.
join
(
self
.
calib_dir
,
'%06d.txt'
%
(
idx
))
lines
=
[
line
.
rstrip
()
for
line
in
open
(
calib_filepath
)]
Rt
=
np
.
array
([
float
(
x
)
for
x
in
lines
[
0
].
split
(
' '
)])
Rt
=
np
.
reshape
(
Rt
,
(
3
,
3
),
order
=
'F'
)
K
=
np
.
array
([
float
(
x
)
for
x
in
lines
[
1
].
split
(
' '
)])
return
K
,
Rt
def
get_label_objects
(
self
,
idx
):
label_filename
=
os
.
path
.
join
(
self
.
label_dir
,
'%06d.txt'
%
(
idx
))
lines
=
[
line
.
rstrip
()
for
line
in
open
(
label_filename
)]
objects
=
[
SUNRGBDInstance
(
line
)
for
line
in
lines
]
return
objects
def
get_sunrgbd_infos
(
self
,
num_workers
=
4
,
has_label
=
True
,
sample_id_list
=
None
):
import
concurrent.futures
as
futures
def
process_single_scene
(
sample_idx
):
print
(
'%s sample_idx: %s'
%
(
self
.
split
,
sample_idx
))
# convert depth to points
SAMPLE_NUM
=
50000
pc_upright_depth
=
self
.
get_depth
(
sample_idx
)
# TODO : sample points in loading process and test
pc_upright_depth_subsampled
=
random_sampling
(
pc_upright_depth
,
SAMPLE_NUM
)
np
.
savez_compressed
(
os
.
path
.
join
(
self
.
root_dir
,
'lidar'
,
'%06d.npz'
%
sample_idx
),
pc
=
pc_upright_depth_subsampled
)
info
=
dict
()
pc_info
=
{
'num_features'
:
6
,
'lidar_idx'
:
sample_idx
}
info
[
'point_cloud'
]
=
pc_info
img_name
=
os
.
path
.
join
(
self
.
image_dir
,
'%06d.jpg'
%
(
sample_idx
))
img_path
=
os
.
path
.
join
(
self
.
image_dir
,
img_name
)
image_info
=
{
'image_idx'
:
sample_idx
,
'image_shape'
:
self
.
get_image_shape
(
sample_idx
),
'image_path'
:
img_path
}
info
[
'image'
]
=
image_info
K
,
Rt
=
self
.
get_calibration
(
sample_idx
)
calib_info
=
{
'K'
:
K
,
'Rt'
:
Rt
}
info
[
'calib'
]
=
calib_info
if
has_label
:
obj_list
=
self
.
get_label_objects
(
sample_idx
)
annotations
=
{}
annotations
[
'gt_num'
]
=
len
([
obj
.
classname
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
if
annotations
[
'gt_num'
]
!=
0
:
annotations
[
'name'
]
=
np
.
array
([
obj
.
classname
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
annotations
[
'bbox'
]
=
np
.
concatenate
([
obj
.
box2d
.
reshape
(
1
,
4
)
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
],
axis
=
0
)
annotations
[
'location'
]
=
np
.
concatenate
([
obj
.
centroid
.
reshape
(
1
,
3
)
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
],
axis
=
0
)
annotations
[
'dimensions'
]
=
2
*
np
.
array
([
[
obj
.
l
,
obj
.
h
,
obj
.
w
]
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
# lhw(depth) format
annotations
[
'rotation_y'
]
=
np
.
array
([
obj
.
heading_angle
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
annotations
[
'index'
]
=
np
.
arange
(
len
(
obj_list
),
dtype
=
np
.
int32
)
annotations
[
'class'
]
=
np
.
array
([
self
.
cat2label
[
obj
.
classname
]
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
annotations
[
'gt_boxes_upright_depth'
]
=
np
.
stack
(
[
obj
.
box3d
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
],
axis
=
0
)
# (K,8)
info
[
'annos'
]
=
annotations
return
info
lidar_save_dir
=
os
.
path
.
join
(
self
.
root_dir
,
'lidar'
)
if
not
os
.
path
.
exists
(
lidar_save_dir
):
os
.
mkdir
(
lidar_save_dir
)
sample_id_list
=
sample_id_list
if
\
sample_id_list
is
not
None
else
self
.
sample_id_list
with
futures
.
ThreadPoolExecutor
(
num_workers
)
as
executor
:
infos
=
executor
.
map
(
process_single_scene
,
sample_id_list
)
return
list
(
infos
)
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment