Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
aec41c7f
"vscode:/vscode.git/clone" did not exist on "45c726c2c43f44081817ae2e05edc2ff1ed86e3a"
Commit
aec41c7f
authored
Apr 27, 2020
by
zhangwenwei
Browse files
Merge master
parents
49f06039
4eca6606
Changes
62
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1257 additions
and
687 deletions
+1257
-687
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
+56
-54
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
+102
-90
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
+123
-100
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
+34
-27
mmdet3d/ops/spconv/include/spconv/pool_ops.h
mmdet3d/ops/spconv/include/spconv/pool_ops.h
+9
-8
mmdet3d/ops/spconv/include/spconv/spconv_ops.h
mmdet3d/ops/spconv/include/spconv/spconv_ops.h
+162
-131
mmdet3d/ops/spconv/include/torch_utils.h
mmdet3d/ops/spconv/include/torch_utils.h
+34
-33
mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
+101
-110
mmdet3d/ops/voxel/src/scatter_points_cuda.cu
mmdet3d/ops/voxel/src/scatter_points_cuda.cu
+6
-6
mmdet3d/ops/voxel/src/voxelization.h
mmdet3d/ops/voxel/src/voxelization.h
+4
-4
mmdet3d/ops/voxel/src/voxelization_cpu.cpp
mmdet3d/ops/voxel/src/voxelization_cpu.cpp
+78
-115
mmdet3d/ops/voxel/src/voxelization_cuda.cu
mmdet3d/ops/voxel/src/voxelization_cuda.cu
+6
-6
mmdet3d/utils/__init__.py
mmdet3d/utils/__init__.py
+3
-2
tests/test_anchor.py
tests/test_anchor.py
+149
-0
tests/test_config.py
tests/test_config.py
+29
-1
tools/create_data.py
tools/create_data.py
+20
-0
tools/data_converter/scannet_converter.py
tools/data_converter/scannet_converter.py
+31
-0
tools/data_converter/scannet_data_utils.py
tools/data_converter/scannet_data_utils.py
+82
-0
tools/data_converter/sunrgbd_converter.py
tools/data_converter/sunrgbd_converter.py
+36
-0
tools/data_converter/sunrgbd_data_utils.py
tools/data_converter/sunrgbd_data_utils.py
+192
-0
No files found.
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cpu.cpp
View file @
aec41c7f
//Modified from
//
Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//Points in boxes cpu
//
Points in boxes cpu
//Written by Shaoshuai Shi
//
Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <assert.h>
#include <math.h>
#include <math.h>
#include <stdio.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
// #define DEBUG
// #define DEBUG
inline
void
lidar_to_local_coords_cpu
(
float
shift_x
,
float
shift_y
,
float
rz
,
inline
void
lidar_to_local_coords_cpu
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
){
float
&
local_x
,
float
&
local_y
)
{
// should rotate pi/2 + alpha to translate LiDAR to local
// should rotate pi/2 + alpha to translate LiDAR to local
float
rot_angle
=
rz
+
M_PI
/
2
;
float
rot_angle
=
rz
+
M_PI
/
2
;
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
}
inline
int
check_pt_in_box3d_cpu
(
const
float
*
pt
,
const
float
*
box3d
,
inline
int
check_pt_in_box3d_cpu
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
// bottom center
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords_cpu
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
lidar_to_local_coords_cpu
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
return
in_flag
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
}
}
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
){
at
::
Tensor
pts_indices_tensor
)
{
// params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the
bottom center, each box DO NOT overlaps
// params boxes: (N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the
// params pts: (npoints, 3) [x, y, z]
in LiDAR coordinate
//
bottom center, each box DO NOT overlaps
params pts: (npoints, 3) [x, y, z]
// params pts_indices: (N, npoints)
//
in LiDAR coordinate
params pts_indices: (N, npoints)
CHECK_CONTIGUOUS
(
boxes_tensor
);
CHECK_CONTIGUOUS
(
boxes_tensor
);
CHECK_CONTIGUOUS
(
pts_tensor
);
CHECK_CONTIGUOUS
(
pts_tensor
);
CHECK_CONTIGUOUS
(
pts_indices_tensor
);
CHECK_CONTIGUOUS
(
pts_indices_tensor
);
int
boxes_num
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
0
);
int
pts_num
=
pts_tensor
.
size
(
0
);
int
pts_num
=
pts_tensor
.
size
(
0
);
const
float
*
boxes
=
boxes_tensor
.
data
<
float
>
();
const
float
*
boxes
=
boxes_tensor
.
data
_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
_ptr
<
float
>
();
int
*
pts_indices
=
pts_indices_tensor
.
data
<
int
>
();
int
*
pts_indices
=
pts_indices_tensor
.
data
_ptr
<
int
>
();
float
local_x
=
0
,
local_y
=
0
;
float
local_x
=
0
,
local_y
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
)
{
for
(
int
j
=
0
;
j
<
pts_num
;
j
++
){
for
(
int
j
=
0
;
j
<
pts_num
;
j
++
)
{
int
cur_in_flag
=
check_pt_in_box3d_cpu
(
pts
+
j
*
3
,
boxes
+
i
*
7
,
local_x
,
local_y
);
int
cur_in_flag
=
pt
s
_in
dices
[
i
*
pts_num
+
j
]
=
cur_in_flag
;
check_
pt_in
_box3d_cpu
(
pts
+
j
*
3
,
boxes
+
i
*
7
,
local_x
,
local_y
)
;
}
pts_indices
[
i
*
pts_num
+
j
]
=
cur_in_flag
;
}
}
}
return
1
;
return
1
;
}
}
mmdet3d/ops/roiaware_pool3d/src/points_in_boxes_cuda.cu
View file @
aec41c7f
//Modified from
//
Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//Points in boxes gpu
//
Points in boxes gpu
//Written by Shaoshuai Shi
//
Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <assert.h>
#include <math.h>
#include <math.h>
#include <stdio.h>
#include <stdio.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define THREADS_PER_BLOCK 256
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CUDA(x) \
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
// #define DEBUG
// #define DEBUG
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
float
rz
,
float
&
local_x
,
float
&
local_y
){
float
rz
,
float
&
local_x
,
// should rotate pi/2 + alpha to translate LiDAR to local
float
&
local_y
)
{
float
rot_angle
=
rz
+
M_PI
/
2
;
// should rotate pi/2 + alpha to translate LiDAR to local
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
float
rot_angle
=
rz
+
M_PI
/
2
;
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
float
cosa
=
cos
(
rot_angle
),
sina
=
sin
(
rot_angle
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
}
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
float
&
local_x
,
float
&
local_y
)
{
// param pt: (x, y, z)
// param pt: (x, y, z)
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the bottom center
// param box3d: (cx, cy, cz, w, l, h, rz) in LiDAR coordinate, cz in the
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
// bottom center
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
float
w
=
box3d
[
3
],
l
=
box3d
[
4
],
h
=
box3d
[
5
],
rz
=
box3d
[
6
];
cz
+=
h
/
2.0
;
// shift to the center since cz in box3d is the bottom center
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
if
(
fabsf
(
z
-
cz
)
>
h
/
2.0
)
return
0
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
return
in_flag
;
float
in_flag
=
(
local_x
>
-
l
/
2.0
)
&
(
local_x
<
l
/
2.0
)
&
(
local_y
>
-
w
/
2.0
)
&
(
local_y
<
w
/
2.0
);
return
in_flag
;
}
}
__global__
void
points_in_boxes_kernel
(
int
batch_size
,
int
boxes_num
,
__global__
void
points_in_boxes_kernel
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
){
const
float
*
pts
,
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
int
*
box_idx_of_points
)
{
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// params boxes_idx_of_points: (B, npoints), default -1
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
int
bs_idx
=
blockIdx
.
y
;
// -1
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
bs_idx
>=
batch_size
||
pt_idx
>=
pts_num
)
return
;
int
bs_idx
=
blockIdx
.
y
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
boxes
+=
bs_idx
*
boxes_num
*
7
;
if
(
bs_idx
>=
batch_size
||
pt_idx
>=
pts_num
)
return
;
pts
+=
bs_idx
*
pts_num
*
3
+
pt_idx
*
3
;
box_idx_of_points
+=
bs_idx
*
pts_num
+
pt_idx
;
boxes
+=
bs_idx
*
boxes_num
*
7
;
pts
+=
bs_idx
*
pts_num
*
3
+
pt_idx
*
3
;
float
local_x
=
0
,
local_y
=
0
;
box_idx_of_points
+=
bs_idx
*
pts_num
+
pt_idx
;
int
cur_in_flag
=
0
;
for
(
int
k
=
0
;
k
<
boxes_num
;
k
++
){
float
local_x
=
0
,
local_y
=
0
;
cur_in_flag
=
check_pt_in_box3d
(
pts
,
boxes
+
k
*
7
,
local_x
,
local_y
);
int
cur_in_flag
=
0
;
if
(
cur_in_flag
){
for
(
int
k
=
0
;
k
<
boxes_num
;
k
++
)
{
box_idx_of_points
[
0
]
=
k
;
cur_in_flag
=
check_pt_in_box3d
(
pts
,
boxes
+
k
*
7
,
local_x
,
local_y
);
break
;
if
(
cur_in_flag
)
{
}
box_idx_of_points
[
0
]
=
k
;
break
;
}
}
}
}
}
void
points_in_boxes_launcher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
void
points_in_boxes_launcher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
boxes
,
const
float
*
pts
,
const
float
*
pts
,
int
*
box_idx_of_points
){
int
*
box_idx_of_points
)
{
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// params boxes_idx_of_points: (B, npoints), default -1
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
cudaError_t
err
;
// -1
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
batch_size
);
dim3
threads
(
THREADS_PER_BLOCK
);
dim3
blocks
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
batch_size
);
points_in_boxes_kernel
<<<
blocks
,
threads
>>>
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
dim3
threads
(
THREADS_PER_BLOCK
);
points_in_boxes_kernel
<<<
blocks
,
threads
>>>
(
batch_size
,
boxes_num
,
pts_num
,
err
=
cudaGetLastError
();
boxes
,
pts
,
box_idx_of_points
);
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
err
=
cudaGetLastError
();
exit
(
-
1
);
if
(
cudaSuccess
!=
err
)
{
}
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
#ifdef DEBUG
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
#endif
}
}
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
){
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is the bottom center, each box DO NOT overlaps
at
::
Tensor
box_idx_of_points_tensor
)
{
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes: (B, N, 7) [x, y, z, w, l, h, rz] in LiDAR coordinate, z is
// params boxes_idx_of_points: (B, npoints), default -1
// the bottom center, each box DO NOT overlaps params pts: (B, npoints, 3) [x,
// y, z] in LiDAR coordinate params boxes_idx_of_points: (B, npoints), default
// -1
CHECK_INPUT
(
boxes_tensor
);
CHECK_INPUT
(
boxes_tensor
);
CHECK_INPUT
(
pts_tensor
);
CHECK_INPUT
(
pts_tensor
);
CHECK_INPUT
(
box_idx_of_points_tensor
);
CHECK_INPUT
(
box_idx_of_points_tensor
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
const
float
*
boxes
=
boxes_tensor
.
data
<
float
>
();
const
float
*
boxes
=
boxes_tensor
.
data
_ptr
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
_ptr
<
float
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data
<
int
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data
_ptr
<
int
>
();
points_in_boxes_launcher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
points_in_boxes_launcher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
return
1
;
return
1
;
}
}
mmdet3d/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
View file @
aec41c7f
//Modified from
//
Modified from
//https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//
https://github.com/sshaoshuai/PCDet/blob/master/pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
//RoI-aware point cloud feature pooling
//
RoI-aware point cloud feature pooling
//Written by Shaoshuai Shi
//
Written by Shaoshuai Shi
//All Rights Reserved 2019.
//
All Rights Reserved 2019.
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#include <assert.h>
#include <torch/extension.h>
#include <torch/serialize/tensor.h>
#define CHECK_CUDA(x) \
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
TORCH_CHECK(x.device().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_CONTIGUOUS(x) \
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
TORCH_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
CHECK_CONTIGUOUS(x)
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
);
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
);
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
int
pool_method
);
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
);
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
int
out_z
,
int
channels
,
at
::
Tensor
grad_in
,
int
pool_method
);
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
);
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
);
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
){
at
::
Tensor
pooled_features
,
int
pool_method
);
// params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
// params pts_feature: (npoints, C)
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
// params argmax: (N, out_x, out_y, out_z, C)
at
::
Tensor
grad_in
,
int
pool_method
);
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
// params pool_method: 0: max_pool 1: avg_pool
at
::
Tensor
pts_indices_tensor
);
CHECK_INPUT
(
rois
);
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
CHECK_INPUT
(
pts
);
at
::
Tensor
box_idx_of_points_tensor
);
CHECK_INPUT
(
pts_feature
);
CHECK_INPUT
(
argmax
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
CHECK_INPUT
(
pts_idx_of_voxels
);
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
CHECK_INPUT
(
pooled_features
);
at
::
Tensor
pooled_features
,
int
pool_method
)
{
// params rois: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coordinate
int
boxes_num
=
rois
.
size
(
0
);
// params pts: (npoints, 3) [x, y, z] in LiDAR coordinate
int
pts_num
=
pts
.
size
(
0
);
// params pts_feature: (npoints, C)
int
channels
=
pts_feature
.
size
(
1
);
// params argmax: (N, out_x, out_y, out_z, C)
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
// params pooled_features: (N, out_x, out_y, out_z, C)
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
// params pool_method: 0: max_pool 1: avg_pool
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
assert
((
out_x
<
256
)
&&
(
out_y
<
256
)
&&
(
out_z
<
256
));
// we encode index with 8bit
CHECK_INPUT
(
rois
);
CHECK_INPUT
(
pts
);
const
float
*
rois_data
=
rois
.
data
<
float
>
();
CHECK_INPUT
(
pts_feature
);
const
float
*
pts_data
=
pts
.
data
<
float
>
();
CHECK_INPUT
(
argmax
);
const
float
*
pts_feature_data
=
pts_feature
.
data
<
float
>
();
CHECK_INPUT
(
pts_idx_of_voxels
);
int
*
argmax_data
=
argmax
.
data
<
int
>
();
CHECK_INPUT
(
pooled_features
);
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data
<
int
>
();
float
*
pooled_features_data
=
pooled_features
.
data
<
float
>
();
int
boxes_num
=
rois
.
size
(
0
);
int
pts_num
=
pts
.
size
(
0
);
roiaware_pool3d_launcher
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
int
channels
=
pts_feature
.
size
(
1
);
rois_data
,
pts_data
,
pts_feature_data
,
argmax_data
,
pts_idx_of_voxels_data
,
pooled_features_data
,
pool_method
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
return
1
;
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
assert
((
out_x
<
256
)
&&
(
out_y
<
256
)
&&
(
out_z
<
256
));
// we encode index with 8bit
const
float
*
rois_data
=
rois
.
data_ptr
<
float
>
();
const
float
*
pts_data
=
pts
.
data_ptr
<
float
>
();
const
float
*
pts_feature_data
=
pts_feature
.
data_ptr
<
float
>
();
int
*
argmax_data
=
argmax
.
data_ptr
<
int
>
();
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data_ptr
<
int
>
();
float
*
pooled_features_data
=
pooled_features
.
data_ptr
<
float
>
();
roiaware_pool3d_launcher
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
rois_data
,
pts_data
,
pts_feature_data
,
argmax_data
,
pts_idx_of_voxels_data
,
pooled_features_data
,
pool_method
);
return
1
;
}
}
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
at
::
Tensor
grad_in
,
int
pool_method
){
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
// params argmax: (N, out_x, out_y, out_z, C)
at
::
Tensor
grad_in
,
int
pool_method
)
{
// params grad_out: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_in: (npoints, C), return value
// params argmax: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
CHECK_INPUT
(
pts_idx_of_voxels
);
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
grad_out
);
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
grad_in
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
grad_out
);
int
boxes_num
=
pts_idx_of_voxels
.
size
(
0
);
CHECK_INPUT
(
grad_in
);
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
boxes_num
=
pts_idx_of_voxels
.
size
(
0
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
channels
=
grad_out
.
size
(
4
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
const
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data
<
int
>
();
int
channels
=
grad_out
.
size
(
4
);
const
int
*
argmax_data
=
argmax
.
data
<
int
>
();
const
float
*
grad_out_data
=
grad_out
.
data
<
float
>
();
const
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data_ptr
<
int
>
();
float
*
grad_in_data
=
grad_in
.
data
<
float
>
();
const
int
*
argmax_data
=
argmax
.
data_ptr
<
int
>
();
const
float
*
grad_out_data
=
grad_out
.
data_ptr
<
float
>
();
roiaware_pool3d_backward_launcher
(
boxes_num
,
out_x
,
out_y
,
out_z
,
channels
,
max_pts_each_voxel
,
float
*
grad_in_data
=
grad_in
.
data_ptr
<
float
>
();
pts_idx_of_voxels_data
,
argmax_data
,
grad_out_data
,
grad_in_data
,
pool_method
);
roiaware_pool3d_backward_launcher
(
boxes_num
,
out_x
,
out_y
,
out_z
,
channels
,
return
1
;
max_pts_each_voxel
,
pts_idx_of_voxels_data
,
argmax_data
,
grad_out_data
,
grad_in_data
,
pool_method
);
return
1
;
}
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"forward"
,
&
roiaware_pool3d_gpu
,
"roiaware pool3d forward (CUDA)"
);
m
.
def
(
"forward"
,
&
roiaware_pool3d_gpu
,
"roiaware pool3d forward (CUDA)"
);
m
.
def
(
"backward"
,
&
roiaware_pool3d_gpu_backward
,
"roiaware pool3d backward (CUDA)"
);
m
.
def
(
"backward"
,
&
roiaware_pool3d_gpu_backward
,
m
.
def
(
"points_in_boxes_gpu"
,
&
points_in_boxes_gpu
,
"points_in_boxes_gpu forward (CUDA)"
);
"roiaware pool3d backward (CUDA)"
);
m
.
def
(
"points_in_boxes_cpu"
,
&
points_in_boxes_cpu
,
"points_in_boxes_cpu forward (CPU)"
);
m
.
def
(
"points_in_boxes_gpu"
,
&
points_in_boxes_gpu
,
"points_in_boxes_gpu forward (CUDA)"
);
m
.
def
(
"points_in_boxes_cpu"
,
&
points_in_boxes_cpu
,
"points_in_boxes_cpu forward (CPU)"
);
}
}
mmdet3d/ops/spconv/include/spconv/fused_spconv_ops.h
View file @
aec41c7f
...
@@ -26,9 +26,10 @@ namespace spconv {
...
@@ -26,9 +26,10 @@ namespace spconv {
// torch.jit's doc says only support int64, so we need to convert to int32.
// torch.jit's doc says only support int64, so we need to convert to int32.
template
<
typename
T
>
template
<
typename
T
>
torch
::
Tensor
fusedIndiceConvBatchNorm
(
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
bias
,
torch
::
Tensor
fusedIndiceConvBatchNorm
(
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
torch
::
Tensor
features
,
torch
::
Tensor
filters
,
torch
::
Tensor
bias
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numActOut
,
int64_t
_inverse
,
int64_t
_subM
)
{
bool
subM
=
_subM
!=
0
;
bool
subM
=
_subM
!=
0
;
bool
inverse
=
_inverse
!=
0
;
bool
inverse
=
_inverse
!=
0
;
auto
device
=
features
.
device
().
type
();
auto
device
=
features
.
device
().
type
();
...
@@ -37,13 +38,16 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
...
@@ -37,13 +38,16 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
auto
numInPlanes
=
features
.
size
(
1
);
auto
numInPlanes
=
features
.
size
(
1
);
auto
numOutPlanes
=
filters
.
size
(
ndim
+
1
);
auto
numOutPlanes
=
filters
.
size
(
ndim
+
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairMaxSizeIter
=
std
::
max_element
(
auto
indicePairMaxSizeIter
=
indicePairNumCpu
.
data
<
int
>
(),
indicePairNumCpu
.
data
<
int
>
()
+
kernelVolume
);
std
::
max_element
(
indicePairNumCpu
.
data_ptr
<
int
>
(),
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data
<
int
>
();
indicePairNumCpu
.
data_ptr
<
int
>
()
+
kernelVolume
);
int
indicePairMaxOffset
=
indicePairMaxSizeIter
-
indicePairNumCpu
.
data_ptr
<
int
>
();
int
indicePairMaxSize
=
*
indicePairMaxSizeIter
;
int
indicePairMaxSize
=
*
indicePairMaxSizeIter
;
/*if (_subM){
/*if (_subM){
std::vector<int> indicePairNumVec(indicePairNumCpu.data<int>(), indicePairNumCpu.data<int>() + kernelVolume);
std::vector<int> indicePairNumVec(indicePairNumCpu.data_ptr<int>(),
indicePairNumCpu.data_ptr<int>() + kernelVolume);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
indicePairNumVec.erase(indicePairNumVec.begin() + indicePairMaxOffset);
auto indicePairVecMaxSizeIter = std::max_element(
auto indicePairVecMaxSizeIter = std::max_element(
...
@@ -56,46 +60,49 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
...
@@ -56,46 +60,49 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
// auto indicePairOptions =
// auto indicePairOptions =
// torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());
// torch::TensorOptions().dtype(torch::kInt64).device(indicePairs.device());
torch
::
Tensor
output
=
torch
::
zeros
({
numActOut
,
numOutPlanes
},
options
).
copy_
(
bias
);
torch
::
Tensor
output
=
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
zeros
({
numActOut
,
numOutPlanes
},
options
).
copy_
(
bias
);
torch
::
Tensor
inputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numInPlanes
},
options
);
torch
::
Tensor
outputBuffer
=
torch
::
Tensor
outputBuffer
=
torch
::
zeros
({
indicePairMaxSize
,
numOutPlanes
},
options
);
torch
::
zeros
({
indicePairMaxSize
,
numOutPlanes
},
options
);
filters
=
filters
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
filters
=
filters
.
view
({
-
1
,
numInPlanes
,
numOutPlanes
});
if
(
subM
)
{
// the center index of subm conv don't need gather and scatter
if
(
subM
)
{
// the center index of subm conv don't need gather and scatter
// add.
// add.
torch
::
mm_out
(
output
,
features
,
filters
[
indicePairMaxOffset
]);
torch
::
mm_out
(
output
,
features
,
filters
[
indicePairMaxOffset
]);
}
}
double
totalGatherTime
=
0
;
double
totalGatherTime
=
0
;
double
totalGEMMTime
=
0
;
double
totalGEMMTime
=
0
;
double
totalSAddTime
=
0
;
double
totalSAddTime
=
0
;
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
auto
nHot
=
indicePairNumCpu
.
data
_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
||
(
subM
&&
i
==
indicePairMaxOffset
))
{
if
(
nHot
<=
0
||
(
subM
&&
i
==
indicePairMaxOffset
))
{
continue
;
continue
;
}
}
// auto timer = spconv::CudaContextTimer<>();
// auto timer = spconv::CudaContextTimer<>();
auto
outputBufferBlob
=
auto
outputBufferBlob
=
torch
::
from_blob
(
outputBuffer
.
data_ptr
<
T
>
(),
torch
::
from_blob
(
outputBuffer
.
data
<
T
>
(),
{
nHot
,
numOutPlanes
},
options
);
{
nHot
,
numOutPlanes
},
options
);
auto
inputBufferBlob
=
auto
inputBufferBlob
=
torch
::
from_blob
(
inputBuffer
.
data_ptr
<
T
>
(),
torch
::
from_blob
(
inputBuffer
.
data
<
T
>
(),
{
nHot
,
numInPlanes
},
options
);
{
nHot
,
numInPlanes
},
options
);
if
(
device
==
torch
::
kCPU
)
{
if
(
device
==
torch
::
kCPU
)
{
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtor
;
functor
::
SparseGatherFunctor
<
tv
::
CPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
gatherFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
}
else
{
}
else
{
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtor
;
functor
::
SparseGatherFunctor
<
tv
::
GPU
,
T
,
int
>
gatherFtor
;
gatherFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
gatherFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
inputBuffer
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
T
>
(
features
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
inverse
),
nHot
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
/* slower than SparseGatherFunctor, may due to int->long conversion
/* slower than SparseGatherFunctor, may due to int->long conversion
auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);
auto indicePairLong = indicePairs[i][inverse].to(torch::kInt64);
auto indicePairBlob = torch::from_blob(indicePairLong.data<long>(), {nHot},
auto indicePairBlob = torch::from_blob(indicePairLong.data_ptr<long>(),
indicePairOptions);
{nHot}, indicePairOptions); torch::index_select_out(inputBufferBlob,
torch::index_select_out(inputBufferBlob, features, 0,
features, 0, indicePairBlob);*/
indicePairBlob);*/
}
}
// totalGatherTime += timer.report() / 1000.0;
// totalGatherTime += timer.report() / 1000.0;
torch
::
mm_out
(
outputBufferBlob
,
inputBufferBlob
,
filters
[
i
]);
torch
::
mm_out
(
outputBufferBlob
,
inputBufferBlob
,
filters
[
i
]);
...
@@ -105,14 +112,14 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
...
@@ -105,14 +112,14 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
functor
::
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
int
>
scatterFtor
;
functor
::
SparseScatterAddFunctor
<
tv
::
CPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
output
),
scatterFtor
(
tv
::
CPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
true
);
nHot
,
true
);
}
else
{
}
else
{
functor
::
SparseScatterAddFunctor
<
tv
::
GPU
,
T
,
int
>
scatterFtor
;
functor
::
SparseScatterAddFunctor
<
tv
::
GPU
,
T
,
int
>
scatterFtor
;
scatterFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
output
),
scatterFtor
(
tv
::
TorchGPU
(),
tv
::
torch2tv
<
T
>
(
output
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
T
>
(
outputBuffer
),
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
nHot
,
tv
::
torch2tv
<
const
int
>
(
indicePairs
).
subview
(
i
,
!
inverse
),
true
);
nHot
,
true
);
TV_CHECK_CUDA_ERR
();
TV_CHECK_CUDA_ERR
();
}
}
// totalSAddTime += timer.report() / 1000.0;
// totalSAddTime += timer.report() / 1000.0;
...
@@ -122,6 +129,6 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
...
@@ -122,6 +129,6 @@ torch::Tensor fusedIndiceConvBatchNorm(torch::Tensor features, torch::Tensor fil
// std::cout << "scatteradd time " << totalSAddTime << std::endl;
// std::cout << "scatteradd time " << totalSAddTime << std::endl;
return
output
;
return
output
;
}
}
}
// namespace spconv
}
// namespace spconv
#endif
#endif
mmdet3d/ops/spconv/include/spconv/pool_ops.h
View file @
aec41c7f
...
@@ -24,7 +24,7 @@
...
@@ -24,7 +24,7 @@
namespace
spconv
{
namespace
spconv
{
template
<
typename
T
>
template
<
typename
T
>
torch
::
Tensor
indiceMaxPool
(
torch
::
Tensor
features
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceMaxPool
(
torch
::
Tensor
features
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
,
int64_t
numAct
)
{
torch
::
Tensor
indiceNum
,
int64_t
numAct
)
{
auto
device
=
features
.
device
().
type
();
auto
device
=
features
.
device
().
type
();
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
numInPlanes
=
features
.
size
(
1
);
auto
numInPlanes
=
features
.
size
(
1
);
...
@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
...
@@ -34,7 +34,7 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
torch
::
Tensor
output
=
torch
::
zeros
({
numAct
,
numInPlanes
},
options
);
torch
::
Tensor
output
=
torch
::
zeros
({
numAct
,
numInPlanes
},
options
);
double
totalTime
=
0
;
double
totalTime
=
0
;
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
auto
nHot
=
indicePairNumCpu
.
data
_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
)
{
if
(
nHot
<=
0
)
{
continue
;
continue
;
}
}
...
@@ -59,18 +59,19 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
...
@@ -59,18 +59,19 @@ torch::Tensor indiceMaxPool(torch::Tensor features, torch::Tensor indicePairs,
template
<
typename
T
>
template
<
typename
T
>
torch
::
Tensor
indiceMaxPoolBackward
(
torch
::
Tensor
features
,
torch
::
Tensor
indiceMaxPoolBackward
(
torch
::
Tensor
features
,
torch
::
Tensor
outFeatures
,
torch
::
Tensor
outFeatures
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indicePairs
,
torch
::
Tensor
outGrad
,
torch
::
Tensor
indiceNum
)
{
torch
::
Tensor
indicePairs
,
torch
::
Tensor
indiceNum
)
{
auto
device
=
features
.
device
().
type
();
auto
device
=
features
.
device
().
type
();
auto
numInPlanes
=
features
.
size
(
1
);
auto
numInPlanes
=
features
.
size
(
1
);
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
indicePairNumCpu
=
indiceNum
.
to
({
torch
::
kCPU
});
auto
options
=
auto
options
=
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
torch
::
TensorOptions
().
dtype
(
features
.
dtype
()).
device
(
features
.
device
());
torch
::
Tensor
inputGrad
=
torch
::
zeros
(
features
.
sizes
(),
options
);
torch
::
Tensor
inputGrad
=
torch
::
zeros
(
features
.
sizes
(),
options
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
auto
kernelVolume
=
indicePairs
.
size
(
0
);
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kernelVolume
;
++
i
)
{
auto
nHot
=
indicePairNumCpu
.
data
<
int
>
()[
i
];
auto
nHot
=
indicePairNumCpu
.
data
_ptr
<
int
>
()[
i
];
if
(
nHot
<=
0
)
{
if
(
nHot
<=
0
)
{
continue
;
continue
;
}
}
...
@@ -92,6 +93,6 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
...
@@ -92,6 +93,6 @@ torch::Tensor indiceMaxPoolBackward(torch::Tensor features,
return
inputGrad
;
return
inputGrad
;
}
}
}
// namespace spconv
}
// namespace spconv
#endif
#endif
mmdet3d/ops/spconv/include/spconv/spconv_ops.h
View file @
aec41c7f
This diff is collapsed.
Click to expand it.
mmdet3d/ops/spconv/include/torch_utils.h
View file @
aec41c7f
...
@@ -13,48 +13,49 @@
...
@@ -13,48 +13,49 @@
// limitations under the License.
// limitations under the License.
#pragma once
#pragma once
#include <tensorview/tensorview.h>
#include <torch/script.h>
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
#include <ATen/cuda/CUDAContext.h>
#include <tensorview/tensorview.h>
#include <torch/script.h>
namespace
tv
{
namespace
tv
{
struct
TorchGPU
:
public
tv
::
GPU
{
struct
TorchGPU
:
public
tv
::
GPU
{
virtual
cudaStream_t
getStream
()
const
override
{
virtual
cudaStream_t
getStream
()
const
override
{
return
at
::
cuda
::
getCurrentCUDAStream
();
return
at
::
cuda
::
getCurrentCUDAStream
();
}
}
};
};
template
<
typename
T
>
void
check_torch_dtype
(
const
torch
::
Tensor
&
tensor
)
{
template
<
typename
T
>
void
check_torch_dtype
(
const
torch
::
Tensor
&
tensor
)
{
switch
(
tensor
.
type
().
scalarType
())
{
switch
(
tensor
.
type
().
scalarType
())
{
case
at
::
ScalarType
::
Double
:
{
case
at
::
ScalarType
::
Double
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
double
>::
value
;
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
double
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
break
;
}
}
case
at
::
ScalarType
::
Float
:
{
case
at
::
ScalarType
::
Float
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
float
>::
value
;
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
float
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
break
;
}
}
case
at
::
ScalarType
::
Int
:
{
case
at
::
ScalarType
::
Int
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
int
>::
value
;
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
int
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
break
;
}
}
case
at
::
ScalarType
::
Half
:
{
case
at
::
ScalarType
::
Half
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
at
::
Half
>::
value
;
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
at
::
Half
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
break
;
}
}
case
at
::
ScalarType
::
Long
:
{
case
at
::
ScalarType
::
Long
:
{
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
long
>::
value
;
auto
val
=
std
::
is_same
<
std
::
remove_const_t
<
T
>
,
long
>::
value
;
TV_ASSERT_RT_ERR
(
val
,
"error"
);
TV_ASSERT_RT_ERR
(
val
,
"error"
);
break
;
break
;
}
}
default:
default:
TV_ASSERT_RT_ERR
(
false
,
"error"
);
TV_ASSERT_RT_ERR
(
false
,
"error"
);
}
}
}
}
...
@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
...
@@ -65,6 +66,6 @@ tv::TensorView<T> torch2tv(const torch::Tensor &tensor) {
for
(
auto
i
:
tensor
.
sizes
())
{
for
(
auto
i
:
tensor
.
sizes
())
{
shape
.
push_back
(
i
);
shape
.
push_back
(
i
);
}
}
return
tv
::
TensorView
<
T
>
(
tensor
.
data
<
std
::
remove_const_t
<
T
>>
(),
shape
);
return
tv
::
TensorView
<
T
>
(
tensor
.
data
_ptr
<
std
::
remove_const_t
<
T
>>
(),
shape
);
}
}
}
// namespace tv
}
// namespace tv
mmdet3d/ops/voxel/src/scatter_points_cpu.cpp
View file @
aec41c7f
#include <torch/extension.h>
#include <ATen/TensorUtils.h>
#include <ATen/TensorUtils.h>
#include <torch/extension.h>
// #include "voxelization.h"
// #include "voxelization.h"
namespace
{
namespace
{
template
<
typename
T_int
>
template
<
typename
T_int
>
void
determin_max_points_kernel
(
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
void
determin_max_points_kernel
(
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
int
&
voxel_num
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
int
&
max_points
,
int
&
max_points
,
const
int
num_points
)
{
const
int
num_points
int
voxelidx
,
num
;
)
{
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
int
voxelidx
,
num
;
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
if
(
coor
[
i
][
0
]
==
-
1
)
// record voxel
continue
;
if
(
voxelidx
==
-
1
)
{
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
voxelidx
=
voxel_num
;
voxel_num
+=
1
;
// record voxel
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxel_num
+=
1
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
}
// put points into voxel
num
=
num_points_per_voxel
[
voxelidx
];
point_to_voxelidx
[
i
]
=
num
;
num_points_per_voxel
[
voxelidx
]
+=
1
;
// update max points per voxel
max_points
=
std
::
max
(
max_points
,
num
+
1
);
}
}
return
;
// put points into voxel
}
num
=
num_points_per_voxel
[
voxelidx
];
point_to_voxelidx
[
i
]
=
num
;
num_points_per_voxel
[
voxelidx
]
+=
1
;
// update max points per voxel
max_points
=
std
::
max
(
max_points
,
num
+
1
);
}
return
;
}
template
<
typename
T
,
typename
T_int
>
template
<
typename
T
,
typename
T_int
>
void
scatter_point_to_voxel_kernel
(
void
scatter_point_to_voxel_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
2
>
coor
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
1
>
point_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
voxel_coors
,
torch
::
TensorAccessor
<
T_int
,
2
>
voxel_coors
,
const
int
num_features
,
const
int
num_features
,
const
int
num_points
,
const
int
NDim
)
{
const
int
num_points
,
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
const
int
NDim
int
num
=
point_to_voxelidx
[
i
];
){
int
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
int
num
=
point_to_voxelidx
[
i
];
voxels
[
voxelidx
][
num
][
k
]
=
points
[
i
][
k
];
int
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
}
for
(
int
k
=
0
;
k
<
num_features
;
++
k
)
{
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
voxels
[
voxelidx
][
num
][
k
]
=
points
[
i
][
k
];
voxel_coors
[
voxelidx
][
k
]
=
coor
[
i
][
k
];
}
for
(
int
k
=
0
;
k
<
NDim
;
++
k
)
{
voxel_coors
[
voxelidx
][
k
]
=
coor
[
i
][
k
];
}
}
}
}
}
}
}
// namespace
}
// namespace
namespace
voxelization
{
namespace
voxelization
{
std
::
vector
<
at
::
Tensor
>
dynamic_point_to_voxel_cpu
(
std
::
vector
<
at
::
Tensor
>
dynamic_point_to_voxel_cpu
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_mapping
,
const
at
::
Tensor
&
voxel_mapping
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
)
{
const
std
::
vector
<
float
>
voxel_size
,
// current version tooks about 0.02s_0.03s for one frame on cpu
const
std
::
vector
<
float
>
coors_range
)
{
// check device
// current version tooks about 0.02s_0.03s for one frame on cpu
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
const
int
NDim
=
voxel_mapping
.
size
(
1
);
const
int
num_points
=
points
.
size
(
0
);
const
int
NDim
=
voxel_mapping
.
size
(
1
);
const
int
num_features
=
points
.
size
(
1
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
std
::
vector
<
int
>
grid_size
(
NDim
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
std
::
vector
<
int
>
grid_size
(
NDim
);
grid_size
[
i
]
=
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
}
at
::
Tensor
num_points_per_voxel
=
at
::
zeros
(
at
::
Tensor
num_points_per_voxel
=
at
::
zeros
({
num_points
,},
voxel_mapping
.
options
());
{
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
voxel_mapping
.
options
());
num_points
,
at
::
Tensor
point_to_voxelidx
=
-
at
::
ones
({
num_points
,},
voxel_mapping
.
options
());
},
voxel_mapping
.
options
());
int
voxel_num
=
0
;
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
(
int
max_points
=
0
;
{
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
voxel_mapping
.
options
());
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
type
(),
"determin_max_point"
,
[
&
]
{
at
::
Tensor
point_to_voxelidx
=
-
at
::
ones
(
determin_max_points_kernel
<
scalar_t
>
(
{
voxel_mapping
.
accessor
<
scalar_t
,
2
>
(),
num_points
,
point_to_voxelidx
.
accessor
<
scalar_t
,
1
>
(),
},
num_points_per_voxel
.
accessor
<
scalar_t
,
1
>
(),
voxel_mapping
.
options
());
coor_to_voxelidx
.
accessor
<
scalar_t
,
3
>
(),
voxel_num
,
int
voxel_num
=
0
;
max_points
,
int
max_points
=
0
;
num_points
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
scalar_type
(),
"determin_max_point"
,
[
&
]
{
);
determin_max_points_kernel
<
scalar_t
>
(
});
voxel_mapping
.
accessor
<
scalar_t
,
2
>
(),
point_to_voxelidx
.
accessor
<
scalar_t
,
1
>
(),
at
::
Tensor
voxels
=
at
::
zeros
({
voxel_num
,
max_points
,
num_features
},
points
.
options
());
num_points_per_voxel
.
accessor
<
scalar_t
,
1
>
(),
at
::
Tensor
voxel_coors
=
at
::
zeros
({
voxel_num
,
NDim
},
points
.
options
().
dtype
(
at
::
kInt
));
coor_to_voxelidx
.
accessor
<
scalar_t
,
3
>
(),
voxel_num
,
max_points
,
num_points
);
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"scatter_point_to_voxel"
,
[
&
]
{
});
scatter_point_to_voxel_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
at
::
Tensor
voxels
=
voxel_mapping
.
accessor
<
int
,
2
>
(),
at
::
zeros
({
voxel_num
,
max_points
,
num_features
},
points
.
options
());
point_to_voxelidx
.
accessor
<
int
,
1
>
(),
at
::
Tensor
voxel_coors
=
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
at
::
zeros
({
voxel_num
,
NDim
},
points
.
options
().
dtype
(
at
::
kInt
));
voxels
.
accessor
<
scalar_t
,
3
>
(),
voxel_coors
.
accessor
<
int
,
2
>
(),
AT_DISPATCH_ALL_TYPES
(
points
.
scalar_type
(),
"scatter_point_to_voxel"
,
[
&
]
{
num_features
,
scatter_point_to_voxel_kernel
<
scalar_t
,
int
>
(
num_points
,
points
.
accessor
<
scalar_t
,
2
>
(),
voxel_mapping
.
accessor
<
int
,
2
>
(),
NDim
point_to_voxelidx
.
accessor
<
int
,
1
>
(),
);
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
});
voxel_coors
.
accessor
<
int
,
2
>
(),
num_features
,
num_points
,
NDim
);
});
at
::
Tensor
num_points_per_voxel_out
=
num_points_per_voxel
.
slice
(
/*dim=*/
0
,
/*start=*/
0
,
/*end=*/
voxel_num
);
return
{
voxels
,
voxel_coors
,
num_points_per_voxel_out
};
at
::
Tensor
num_points_per_voxel_out
=
num_points_per_voxel
.
slice
(
/*dim=*/
0
,
/*start=*/
0
,
/*end=*/
voxel_num
);
return
{
voxels
,
voxel_coors
,
num_points_per_voxel_out
};
}
}
}
}
// namespace voxelization
mmdet3d/ops/voxel/src/scatter_points_cuda.cu
View file @
aec41c7f
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#define CHECK_CUDA(x) \
#define CHECK_CUDA(x) \
TORCH_CHECK(x.
typ
e().is_cuda(), #x " must be a CUDA tensor")
TORCH_CHECK(x.
devic
e().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
#define CHECK_INPUT(x) \
...
@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
...
@@ -177,7 +177,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3
threads
(
threadsPerBlock
);
dim3
threads
(
threadsPerBlock
);
cudaStream_t
map_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
cudaStream_t
map_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
type
(),
"determin_duplicate"
,
([
&
]
{
voxel_mapping
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
point_to_voxelidx_kernel
<
int
><<<
blocks
,
threads
,
0
,
map_stream
>>>
(
point_to_voxelidx_kernel
<
int
><<<
blocks
,
threads
,
0
,
map_stream
>>>
(
voxel_mapping
.
data_ptr
<
int
>
(),
point_to_voxelidx
.
data_ptr
<
int
>
(),
voxel_mapping
.
data_ptr
<
int
>
(),
point_to_voxelidx
.
data_ptr
<
int
>
(),
point_to_pointidx
.
data_ptr
<
int
>
(),
num_points
,
NDim
);
point_to_pointidx
.
data_ptr
<
int
>
(),
num_points
,
NDim
);
...
@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
...
@@ -203,7 +203,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
voxel_mapping
.
options
());
// must be zero from the begining
voxel_mapping
.
options
());
// must be zero from the begining
cudaStream_t
logic_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
cudaStream_t
logic_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
voxel_mapping
.
type
(),
"determin_duplicate"
,
([
&
]
{
voxel_mapping
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
determin_voxel_num
<
int
><<<
1
,
1
,
0
,
logic_stream
>>>
(
determin_voxel_num
<
int
><<<
1
,
1
,
0
,
logic_stream
>>>
(
voxel_mapping
.
data_ptr
<
int
>
(),
num_points_per_voxel
.
data_ptr
<
int
>
(),
voxel_mapping
.
data_ptr
<
int
>
(),
num_points_per_voxel
.
data_ptr
<
int
>
(),
point_to_voxelidx
.
data_ptr
<
int
>
(),
point_to_voxelidx
.
data_ptr
<
int
>
(),
...
@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
...
@@ -228,7 +228,7 @@ std::vector<at::Tensor> dynamic_point_to_voxel_forward_gpu(
dim3
cp_threads
(
threadsPerBlock
,
4
);
dim3
cp_threads
(
threadsPerBlock
,
4
);
cudaStream_t
cp_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
cudaStream_t
cp_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"scatter_point_to_voxel"
,
([
&
]
{
points
.
scalar_
type
(),
"scatter_point_to_voxel"
,
([
&
]
{
scatter_point_to_voxel_kernel
<
float
,
int
>
scatter_point_to_voxel_kernel
<
float
,
int
>
<<<
blocks
,
cp_threads
,
0
,
cp_stream
>>>
(
<<<
blocks
,
cp_threads
,
0
,
cp_stream
>>>
(
points
.
data_ptr
<
float
>
(),
voxel_mapping
.
data_ptr
<
int
>
(),
points
.
data_ptr
<
float
>
(),
voxel_mapping
.
data_ptr
<
int
>
(),
...
@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points,
...
@@ -265,8 +265,8 @@ void dynamic_point_to_voxel_backward_gpu(at::Tensor& grad_input_points,
dim3
blocks
(
col_blocks
);
dim3
blocks
(
col_blocks
);
dim3
cp_threads
(
threadsPerBlock
,
4
);
dim3
cp_threads
(
threadsPerBlock
,
4
);
cudaStream_t
cp_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
cudaStream_t
cp_stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_ALL_TYPES
(
grad_input_points
.
type
(),
"scatter_point_to_voxel"
,
AT_DISPATCH_ALL_TYPES
(
grad_input_points
.
scalar_type
()
,
([
&
]
{
"scatter_point_to_voxel"
,
([
&
]
{
map_voxel_to_point_kernel
<
float
,
int
>
map_voxel_to_point_kernel
<
float
,
int
>
<<<
blocks
,
cp_threads
,
0
,
cp_stream
>>>
(
<<<
blocks
,
cp_threads
,
0
,
cp_stream
>>>
(
grad_input_points
.
data_ptr
<
float
>
(),
grad_input_points
.
data_ptr
<
float
>
(),
...
...
mmdet3d/ops/voxel/src/voxelization.h
View file @
aec41c7f
...
@@ -49,7 +49,7 @@ inline int hard_voxelize(const at::Tensor& points, at::Tensor& voxels,
...
@@ -49,7 +49,7 @@ inline int hard_voxelize(const at::Tensor& points, at::Tensor& voxels,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
float
>
coors_range
,
const
int
max_points
,
const
int
max_voxels
,
const
int
max_points
,
const
int
max_voxels
,
const
int
NDim
=
3
)
{
const
int
NDim
=
3
)
{
if
(
points
.
typ
e
().
is_cuda
())
{
if
(
points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
#ifdef WITH_CUDA
return
hard_voxelize_gpu
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
return
hard_voxelize_gpu
(
points
,
voxels
,
coors
,
num_points_per_voxel
,
voxel_size
,
coors_range
,
max_points
,
max_voxels
,
voxel_size
,
coors_range
,
max_points
,
max_voxels
,
...
@@ -67,7 +67,7 @@ inline void dynamic_voxelize(const at::Tensor& points, at::Tensor& coors,
...
@@ -67,7 +67,7 @@ inline void dynamic_voxelize(const at::Tensor& points, at::Tensor& coors,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
const
int
NDim
=
3
)
{
if
(
points
.
typ
e
().
is_cuda
())
{
if
(
points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
#ifdef WITH_CUDA
return
dynamic_voxelize_gpu
(
points
,
coors
,
voxel_size
,
coors_range
,
NDim
);
return
dynamic_voxelize_gpu
(
points
,
coors
,
voxel_size
,
coors_range
,
NDim
);
#else
#else
...
@@ -80,7 +80,7 @@ inline void dynamic_voxelize(const at::Tensor& points, at::Tensor& coors,
...
@@ -80,7 +80,7 @@ inline void dynamic_voxelize(const at::Tensor& points, at::Tensor& coors,
inline
std
::
vector
<
torch
::
Tensor
>
dynamic_point_to_voxel_forward
(
inline
std
::
vector
<
torch
::
Tensor
>
dynamic_point_to_voxel_forward
(
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_mapping
,
const
at
::
Tensor
&
points
,
const
at
::
Tensor
&
voxel_mapping
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
)
{
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
)
{
if
(
points
.
typ
e
().
is_cuda
())
{
if
(
points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
#ifdef WITH_CUDA
return
dynamic_point_to_voxel_forward_gpu
(
points
,
voxel_mapping
,
voxel_size
,
return
dynamic_point_to_voxel_forward_gpu
(
points
,
voxel_mapping
,
voxel_size
,
coors_range
);
coors_range
);
...
@@ -95,7 +95,7 @@ inline std::vector<torch::Tensor> dynamic_point_to_voxel_forward(
...
@@ -95,7 +95,7 @@ inline std::vector<torch::Tensor> dynamic_point_to_voxel_forward(
inline
void
dynamic_point_to_voxel_backward
(
inline
void
dynamic_point_to_voxel_backward
(
at
::
Tensor
&
grad_input_points
,
const
at
::
Tensor
&
grad_output_voxels
,
at
::
Tensor
&
grad_input_points
,
const
at
::
Tensor
&
grad_output_voxels
,
const
at
::
Tensor
&
point_to_voxelidx
,
const
at
::
Tensor
&
coor_to_voxelidx
)
{
const
at
::
Tensor
&
point_to_voxelidx
,
const
at
::
Tensor
&
coor_to_voxelidx
)
{
if
(
grad_input_points
.
typ
e
().
is_cuda
())
{
if
(
grad_input_points
.
devic
e
().
is_cuda
())
{
#ifdef WITH_CUDA
#ifdef WITH_CUDA
return
dynamic_point_to_voxel_backward_gpu
(
return
dynamic_point_to_voxel_backward_gpu
(
grad_input_points
,
grad_output_voxels
,
point_to_voxelidx
,
grad_input_points
,
grad_output_voxels
,
point_to_voxelidx
,
...
...
mmdet3d/ops/voxel/src/voxelization_cpu.cpp
View file @
aec41c7f
#include <torch/extension.h>
#include <ATen/TensorUtils.h>
#include <ATen/TensorUtils.h>
#include <torch/extension.h>
// #include "voxelization.h"
// #include "voxelization.h"
namespace
{
namespace
{
template
<
typename
T
,
typename
T_int
>
template
<
typename
T
,
typename
T_int
>
void
dynamic_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
void
dynamic_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
int
>
grid_size
,
const
std
::
vector
<
int
>
grid_size
,
const
int
num_points
,
const
int
num_points
,
const
int
num_features
,
const
int
num_features
,
const
int
NDim
)
{
const
int
NDim
)
{
const
int
ndim_minus_1
=
NDim
-
1
;
const
int
ndim_minus_1
=
NDim
-
1
;
bool
failed
=
false
;
bool
failed
=
false
;
int
coor
[
NDim
];
int
coor
[
NDim
];
...
@@ -44,56 +40,42 @@ void dynamic_voxelize_kernel(const torch::TensorAccessor<T,2> points,
...
@@ -44,56 +40,42 @@ void dynamic_voxelize_kernel(const torch::TensorAccessor<T,2> points,
return
;
return
;
}
}
template
<
typename
T
,
typename
T_int
>
template
<
typename
T
,
typename
T_int
>
void
hard_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
void
hard_voxelize_kernel
(
const
torch
::
TensorAccessor
<
T
,
2
>
points
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T
,
3
>
voxels
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
torch
::
TensorAccessor
<
T_int
,
2
>
coors
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
1
>
num_points_per_voxel
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
torch
::
TensorAccessor
<
T_int
,
3
>
coor_to_voxelidx
,
int
&
voxel_num
,
int
&
voxel_num
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
float
>
coors_range
,
const
std
::
vector
<
int
>
grid_size
,
const
std
::
vector
<
int
>
grid_size
,
const
int
max_points
,
const
int
max_points
,
const
int
max_voxels
,
const
int
max_voxels
,
const
int
num_points
,
const
int
num_features
,
const
int
num_points
,
const
int
NDim
)
{
const
int
num_features
,
const
int
NDim
)
{
// declare a temp coors
// declare a temp coors
at
::
Tensor
temp_coors
=
at
::
zeros
({
num_points
,
NDim
},
at
::
TensorOptions
().
dtype
(
at
::
kInt
).
device
(
at
::
kCPU
));
at
::
Tensor
temp_coors
=
at
::
zeros
(
{
num_points
,
NDim
},
at
::
TensorOptions
().
dtype
(
at
::
kInt
).
device
(
at
::
kCPU
));
// First use dynamic voxelization to get coors,
// First use dynamic voxelization to get coors,
// then check max points/voxels constraints
// then check max points/voxels constraints
dynamic_voxelize_kernel
<
T
,
int
>
(
dynamic_voxelize_kernel
<
T
,
int
>
(
points
,
temp_coors
.
accessor
<
int
,
2
>
(),
points
,
voxel_size
,
coors_range
,
grid_size
,
temp_coors
.
accessor
<
int
,
2
>
(),
num_points
,
num_features
,
NDim
);
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
int
voxelidx
,
num
;
int
voxelidx
,
num
;
auto
coor
=
temp_coors
.
accessor
<
int
,
2
>
();
auto
coor
=
temp_coors
.
accessor
<
int
,
2
>
();
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_points
;
++
i
)
{
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim;
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim;
if
(
coor
[
i
][
0
]
==
-
1
)
if
(
coor
[
i
][
0
]
==
-
1
)
continue
;
continue
;
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
voxelidx
=
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]];
// record voxel
// record voxel
if
(
voxelidx
==
-
1
)
{
if
(
voxelidx
==
-
1
)
{
voxelidx
=
voxel_num
;
voxelidx
=
voxel_num
;
if
(
max_voxels
!=
-
1
&&
voxel_num
>=
max_voxels
)
if
(
max_voxels
!=
-
1
&&
voxel_num
>=
max_voxels
)
break
;
break
;
voxel_num
+=
1
;
voxel_num
+=
1
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
coor_to_voxelidx
[
coor
[
i
][
0
]][
coor
[
i
][
1
]][
coor
[
i
][
2
]]
=
voxelidx
;
...
@@ -116,93 +98,74 @@ void hard_voxelize_kernel(const torch::TensorAccessor<T,2> points,
...
@@ -116,93 +98,74 @@ void hard_voxelize_kernel(const torch::TensorAccessor<T,2> points,
return
;
return
;
}
}
}
// namespace
}
// namespace
namespace
voxelization
{
namespace
voxelization
{
int
hard_voxelize_cpu
(
int
hard_voxelize_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
voxels
,
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
at
::
Tensor
&
num_points_per_voxel
,
at
::
Tensor
&
voxels
,
const
std
::
vector
<
float
>
voxel_size
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
coors_range
,
at
::
Tensor
&
num_points_per_voxel
,
const
int
max_points
,
const
int
max_voxels
,
const
std
::
vector
<
float
>
voxel_size
,
const
int
NDim
=
3
)
{
const
std
::
vector
<
float
>
coors_range
,
// current version tooks about 0.02s_0.03s for one frame on cpu
const
int
max_points
,
// check device
const
int
max_voxels
,
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
const
int
NDim
=
3
)
{
// current version tooks about 0.02s_0.03s for one frame on cpu
std
::
vector
<
int
>
grid_size
(
NDim
);
// check device
const
int
num_points
=
points
.
size
(
0
);
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
const
int
num_features
=
points
.
size
(
1
);
std
::
vector
<
int
>
grid_size
(
NDim
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
const
int
num_points
=
points
.
size
(
0
);
grid_size
[
i
]
=
const
int
num_features
=
points
.
size
(
1
);
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
//printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2], grid_size[1], grid_size[0]);
// printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2],
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
coors
.
options
());
// grid_size[1], grid_size[0]);
at
::
Tensor
coor_to_voxelidx
=
-
at
::
ones
({
grid_size
[
2
],
grid_size
[
1
],
grid_size
[
0
]},
coors
.
options
());
int
voxel_num
=
0
;
int
voxel_num
=
0
;
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
type
(),
"hard_voxelize_forward"
,
[
&
]
{
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
scalar_type
(),
"hard_voxelize_forward"
,
[
&
]
{
hard_voxelize_kernel
<
scalar_t
,
int
>
(
hard_voxelize_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
points
.
accessor
<
scalar_t
,
2
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
voxels
.
accessor
<
scalar_t
,
3
>
(),
coors
.
accessor
<
int
,
2
>
(),
num_points_per_voxel
.
accessor
<
int
,
1
>
(),
coors
.
accessor
<
int
,
2
>
(),
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
voxel_num
,
voxel_size
,
num_points_per_voxel
.
accessor
<
int
,
1
>
(),
coors_range
,
grid_size
,
max_points
,
max_voxels
,
num_points
,
coor_to_voxelidx
.
accessor
<
int
,
3
>
(),
num_features
,
NDim
);
voxel_num
,
});
voxel_size
,
coors_range
,
return
voxel_num
;
grid_size
,
max_points
,
max_voxels
,
num_points
,
num_features
,
NDim
);
});
return
voxel_num
;
}
}
void
dynamic_voxelize_cpu
(
const
at
::
Tensor
&
points
,
at
::
Tensor
&
coors
,
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
void
dynamic_voxelize_cpu
(
std
::
vector
<
int
>
grid_size
(
NDim
);
const
at
::
Tensor
&
points
,
const
int
num_points
=
points
.
size
(
0
);
at
::
Tensor
&
coors
,
const
int
num_features
=
points
.
size
(
1
);
const
std
::
vector
<
float
>
voxel_size
,
const
std
::
vector
<
float
>
coors_range
,
const
int
NDim
=
3
)
{
// check device
AT_ASSERTM
(
points
.
device
().
is_cpu
(),
"points must be a CPU tensor"
);
std
::
vector
<
int
>
grid_size
(
NDim
);
const
int
num_points
=
points
.
size
(
0
);
const
int
num_features
=
points
.
size
(
1
);
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
for
(
int
i
=
0
;
i
<
NDim
;
++
i
)
{
grid_size
[
i
]
=
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
grid_size
[
i
]
=
}
round
((
coors_range
[
NDim
+
i
]
-
coors_range
[
i
])
/
voxel_size
[
i
]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
type
(),
"hard_voxelize_forward"
,
[
&
]
{
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
points
.
scalar_type
(),
"hard_voxelize_forward"
,
[
&
]
{
dynamic_voxelize_kernel
<
scalar_t
,
int
>
(
dynamic_voxelize_kernel
<
scalar_t
,
int
>
(
points
.
accessor
<
scalar_t
,
2
>
(),
points
.
accessor
<
scalar_t
,
2
>
(),
coors
.
accessor
<
int
,
2
>
(),
coors
.
accessor
<
int
,
2
>
(),
voxel_size
,
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
voxel_size
,
});
coors_range
,
grid_size
,
num_points
,
num_features
,
NDim
);
});
return
;
}
return
;
}
}
}
// namespace voxelization
mmdet3d/ops/voxel/src/voxelization_cuda.cu
View file @
aec41c7f
...
@@ -6,7 +6,7 @@
...
@@ -6,7 +6,7 @@
#include <ATen/cuda/CUDAApplyUtils.cuh>
#include <ATen/cuda/CUDAApplyUtils.cuh>
#define CHECK_CUDA(x) \
#define CHECK_CUDA(x) \
TORCH_CHECK(x.
typ
e().is_cuda(), #x " must be a CUDA tensor")
TORCH_CHECK(x.
devic
e().is_cuda(), #x " must be a CUDA tensor")
#define CHECK_CONTIGUOUS(x) \
#define CHECK_CONTIGUOUS(x) \
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
TORCH_CHECK(x.is_contiguous(), #x " must be contiguous")
#define CHECK_INPUT(x) \
#define CHECK_INPUT(x) \
...
@@ -219,7 +219,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
...
@@ -219,7 +219,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
// 1. link point to corresponding voxel coors
// 1. link point to corresponding voxel coors
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"hard_voxelize_kernel"
,
([
&
]
{
points
.
scalar_
type
(),
"hard_voxelize_kernel"
,
([
&
]
{
dynamic_voxelize_kernel
<
scalar_t
,
int
>
dynamic_voxelize_kernel
<
scalar_t
,
int
>
<<<
grid
,
block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
<<<
grid
,
block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
points
.
contiguous
().
data_ptr
<
scalar_t
>
(),
points
.
contiguous
().
data_ptr
<
scalar_t
>
(),
...
@@ -247,7 +247,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
...
@@ -247,7 +247,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
dim3
map_grid
(
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
num_points
,
512
),
4096
));
dim3
map_grid
(
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
num_points
,
512
),
4096
));
dim3
map_block
(
512
);
dim3
map_block
(
512
);
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
temp_coors
.
type
(),
"determin_duplicate"
,
([
&
]
{
temp_coors
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
point_to_voxelidx_kernel
<
int
>
point_to_voxelidx_kernel
<
int
>
<<<
map_grid
,
map_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
<<<
map_grid
,
map_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
temp_coors
.
contiguous
().
data_ptr
<
int
>
(),
temp_coors
.
contiguous
().
data_ptr
<
int
>
(),
...
@@ -272,7 +272,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
...
@@ -272,7 +272,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
points
.
options
().
dtype
(
at
::
kInt
));
// must be zero from the begining
points
.
options
().
dtype
(
at
::
kInt
));
// must be zero from the begining
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
temp_coors
.
type
(),
"determin_duplicate"
,
([
&
]
{
temp_coors
.
scalar_
type
(),
"determin_duplicate"
,
([
&
]
{
determin_voxel_num
<
int
><<<
1
,
1
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
determin_voxel_num
<
int
><<<
1
,
1
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
num_points_per_voxel
.
contiguous
().
data_ptr
<
int
>
(),
num_points_per_voxel
.
contiguous
().
data_ptr
<
int
>
(),
point_to_voxelidx
.
contiguous
().
data_ptr
<
int
>
(),
point_to_voxelidx
.
contiguous
().
data_ptr
<
int
>
(),
...
@@ -290,7 +290,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
...
@@ -290,7 +290,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
dim3
cp_grid
(
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
pts_output_size
,
512
),
4096
));
dim3
cp_grid
(
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
pts_output_size
,
512
),
4096
));
dim3
cp_block
(
512
);
dim3
cp_block
(
512
);
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"assign_point_to_voxel"
,
([
&
]
{
points
.
scalar_
type
(),
"assign_point_to_voxel"
,
([
&
]
{
assign_point_to_voxel
<
float
,
int
>
assign_point_to_voxel
<
float
,
int
>
<<<
cp_grid
,
cp_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
<<<
cp_grid
,
cp_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
pts_output_size
,
points
.
contiguous
().
data_ptr
<
float
>
(),
pts_output_size
,
points
.
contiguous
().
data_ptr
<
float
>
(),
...
@@ -308,7 +308,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
...
@@ -308,7 +308,7 @@ int hard_voxelize_gpu(const at::Tensor& points, at::Tensor& voxels,
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
coors_output_size
,
512
),
4096
));
std
::
min
(
at
::
cuda
::
ATenCeilDiv
(
coors_output_size
,
512
),
4096
));
dim3
coors_cp_block
(
512
);
dim3
coors_cp_block
(
512
);
AT_DISPATCH_ALL_TYPES
(
AT_DISPATCH_ALL_TYPES
(
points
.
type
(),
"assign_point_to_voxel"
,
([
&
]
{
points
.
scalar_
type
(),
"assign_point_to_voxel"
,
([
&
]
{
assign_voxel_coors
<
float
,
int
><<<
coors_cp_grid
,
coors_cp_block
,
0
,
assign_voxel_coors
<
float
,
int
><<<
coors_cp_grid
,
coors_cp_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
coors_output_size
,
temp_coors
.
contiguous
().
data_ptr
<
int
>
(),
coors_output_size
,
temp_coors
.
contiguous
().
data_ptr
<
int
>
(),
...
...
mmdet3d/utils/__init__.py
View file @
aec41c7f
from
mmdet.utils
import
(
Registry
,
build_from_cfg
,
get_model_complexity_info
,
from
mmcv.utils
import
Registry
,
build_from_cfg
get_root_logger
,
print_log
)
from
mmdet.utils
import
get_model_complexity_info
,
get_root_logger
,
print_log
from
.collect_env
import
collect_env
from
.collect_env
import
collect_env
__all__
=
[
__all__
=
[
...
...
tests/test_anchor.py
0 → 100644
View file @
aec41c7f
"""
CommandLine:
pytest tests/test_anchor.py
xdoctest tests/test_anchor.py zero
"""
import
torch
def
test_aligned_anchor_generator
():
from
mmdet3d.core.anchor
import
build_anchor_generator
if
torch
.
cuda
.
is_available
():
device
=
'cuda'
else
:
device
=
'cpu'
anchor_generator_cfg
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
51.2
,
-
51.2
,
-
1.80
,
51.2
,
51.2
,
-
1.80
]],
strides
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
size_per_range
=
False
,
reshape_out
=
True
)
featmap_sizes
=
[(
256
,
256
),
(
128
,
128
),
(
64
,
64
)]
anchor_generator
=
build_anchor_generator
(
anchor_generator_cfg
)
assert
anchor_generator
.
num_base_anchors
==
8
# check base anchors
expected_grid_anchors
=
[
torch
.
tensor
([[
-
51.0000
,
-
51.0000
,
-
1.8000
,
0.8660
,
2.5981
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
51.0000
,
-
51.0000
,
-
1.8000
,
0.4000
,
0.4000
,
1.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
50.6000
,
-
51.0000
,
-
1.8000
,
0.4000
,
0.4000
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
50.2000
,
-
51.0000
,
-
1.8000
,
1.0000
,
1.0000
,
1.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
49.8000
,
-
51.0000
,
-
1.8000
,
1.0000
,
1.0000
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
49.4000
,
-
51.0000
,
-
1.8000
,
0.5774
,
1.7321
,
1.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
49.0000
,
-
51.0000
,
-
1.8000
,
0.5774
,
1.7321
,
1.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
48.6000
,
-
51.0000
,
-
1.8000
,
0.8660
,
2.5981
,
1.0000
,
1.5700
,
0.0000
,
0.0000
]],
device
=
device
),
torch
.
tensor
([[
-
50.8000
,
-
50.8000
,
-
1.8000
,
1.7320
,
5.1962
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
50.8000
,
-
50.8000
,
-
1.8000
,
0.8000
,
0.8000
,
2.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
50.0000
,
-
50.8000
,
-
1.8000
,
0.8000
,
0.8000
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
49.2000
,
-
50.8000
,
-
1.8000
,
2.0000
,
2.0000
,
2.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
48.4000
,
-
50.8000
,
-
1.8000
,
2.0000
,
2.0000
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
47.6000
,
-
50.8000
,
-
1.8000
,
1.1548
,
3.4642
,
2.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
46.8000
,
-
50.8000
,
-
1.8000
,
1.1548
,
3.4642
,
2.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
46.0000
,
-
50.8000
,
-
1.8000
,
1.7320
,
5.1962
,
2.0000
,
1.5700
,
0.0000
,
0.0000
]],
device
=
device
),
torch
.
tensor
([[
-
50.4000
,
-
50.4000
,
-
1.8000
,
3.4640
,
10.3924
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
50.4000
,
-
50.4000
,
-
1.8000
,
1.6000
,
1.6000
,
4.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
48.8000
,
-
50.4000
,
-
1.8000
,
1.6000
,
1.6000
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
47.2000
,
-
50.4000
,
-
1.8000
,
4.0000
,
4.0000
,
4.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
45.6000
,
-
50.4000
,
-
1.8000
,
4.0000
,
4.0000
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
44.0000
,
-
50.4000
,
-
1.8000
,
2.3096
,
6.9284
,
4.0000
,
1.5700
,
0.0000
,
0.0000
],
[
-
42.4000
,
-
50.4000
,
-
1.8000
,
2.3096
,
6.9284
,
4.0000
,
0.0000
,
0.0000
,
0.0000
],
[
-
40.8000
,
-
50.4000
,
-
1.8000
,
3.4640
,
10.3924
,
4.0000
,
1.5700
,
0.0000
,
0.0000
]],
device
=
device
)
]
multi_level_anchors
=
anchor_generator
.
grid_anchors
(
featmap_sizes
,
device
=
device
)
expected_multi_level_shapes
=
[
torch
.
Size
([
524288
,
9
]),
torch
.
Size
([
131072
,
9
]),
torch
.
Size
([
32768
,
9
])
]
for
i
,
single_level_anchor
in
enumerate
(
multi_level_anchors
):
assert
single_level_anchor
.
shape
==
expected_multi_level_shapes
[
i
]
# set [:56:7] thus it could cover 8 (len(size) * len(rotations))
# anchors on 8 location
assert
single_level_anchor
[:
56
:
7
].
allclose
(
expected_grid_anchors
[
i
])
tests/test_config.py
View file @
aec41c7f
...
@@ -70,6 +70,34 @@ def test_config_build_detector():
...
@@ -70,6 +70,34 @@ def test_config_build_detector():
# _check_bbox_head(head_config, detector.bbox_head)
# _check_bbox_head(head_config, detector.bbox_head)
def
test_config_build_pipeline
():
"""
Test that all detection models defined in the configs can be initialized.
"""
from
mmcv
import
Config
from
mmdet3d.datasets.pipelines
import
Compose
config_dpath
=
_get_config_directory
()
print
(
'Found config_dpath = {!r}'
.
format
(
config_dpath
))
# Other configs needs database sampler.
config_names
=
[
'nus/hv_pointpillars_secfpn_sbn-all_4x8_20e_nus-3d.py'
,
]
print
(
'Using {} config files'
.
format
(
len
(
config_names
)))
for
config_fname
in
config_names
:
config_fpath
=
join
(
config_dpath
,
config_fname
)
config_mod
=
Config
.
fromfile
(
config_fpath
)
# build train_pipeline
train_pipeline
=
Compose
(
config_mod
.
train_pipeline
)
test_pipeline
=
Compose
(
config_mod
.
test_pipeline
)
assert
train_pipeline
is
not
None
assert
test_pipeline
is
not
None
def
test_config_data_pipeline
():
def
test_config_data_pipeline
():
"""
"""
Test whether the data pipeline is valid and can process corner cases.
Test whether the data pipeline is valid and can process corner cases.
...
@@ -77,7 +105,7 @@ def test_config_data_pipeline():
...
@@ -77,7 +105,7 @@ def test_config_data_pipeline():
xdoctest -m tests/test_config.py test_config_build_data_pipeline
xdoctest -m tests/test_config.py test_config_build_data_pipeline
"""
"""
from
mmcv
import
Config
from
mmcv
import
Config
from
mmdet.datasets.pipelines
import
Compose
from
mmdet
3d
.datasets.pipelines
import
Compose
import
numpy
as
np
import
numpy
as
np
config_dpath
=
_get_config_directory
()
config_dpath
=
_get_config_directory
()
...
...
tools/create_data.py
View file @
aec41c7f
...
@@ -3,6 +3,8 @@ import os.path as osp
...
@@ -3,6 +3,8 @@ import os.path as osp
import
tools.data_converter.kitti_converter
as
kitti
import
tools.data_converter.kitti_converter
as
kitti
import
tools.data_converter.nuscenes_converter
as
nuscenes_converter
import
tools.data_converter.nuscenes_converter
as
nuscenes_converter
import
tools.data_converter.scannet_converter
as
scannet
import
tools.data_converter.sunrgbd_converter
as
sunrgbd
from
tools.data_converter.create_gt_database
import
create_groundtruth_database
from
tools.data_converter.create_gt_database
import
create_groundtruth_database
...
@@ -43,6 +45,14 @@ def nuscenes_data_prep(root_path,
...
@@ -43,6 +45,14 @@ def nuscenes_data_prep(root_path,
'{}/{}_infos_train.pkl'
.
format
(
out_dir
,
info_prefix
))
'{}/{}_infos_train.pkl'
.
format
(
out_dir
,
info_prefix
))
def
scannet_data_prep
(
root_path
,
info_prefix
,
out_dir
):
scannet
.
create_scannet_info_file
(
root_path
,
info_prefix
,
out_dir
)
def
sunrgbd_data_prep
(
root_path
,
info_prefix
,
out_dir
):
sunrgbd
.
create_sunrgbd_info_file
(
root_path
,
info_prefix
,
out_dir
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Data converter arg parser'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Data converter arg parser'
)
parser
.
add_argument
(
'dataset'
,
metavar
=
'kitti'
,
help
=
'name of the dataset'
)
parser
.
add_argument
(
'dataset'
,
metavar
=
'kitti'
,
help
=
'name of the dataset'
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -104,3 +114,13 @@ if __name__ == '__main__':
...
@@ -104,3 +114,13 @@ if __name__ == '__main__':
dataset_name
=
'NuScenesDataset'
,
dataset_name
=
'NuScenesDataset'
,
out_dir
=
args
.
out_dir
,
out_dir
=
args
.
out_dir
,
max_sweeps
=
args
.
max_sweeps
)
max_sweeps
=
args
.
max_sweeps
)
elif
args
.
dataset
==
'scannet'
:
scannet_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
)
elif
args
.
dataset
==
'sunrgbd'
:
sunrgbd_data_prep
(
root_path
=
args
.
root_path
,
info_prefix
=
args
.
extra_tag
,
out_dir
=
args
.
out_dir
)
tools/data_converter/scannet_converter.py
0 → 100644
View file @
aec41c7f
import
os
import
pickle
from
pathlib
import
Path
from
tools.data_converter.scannet_data_utils
import
ScanNetData
def
create_scannet_info_file
(
data_path
,
pkl_prefix
=
'scannet'
,
save_path
=
None
):
assert
os
.
path
.
exists
(
data_path
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
assert
os
.
path
.
exists
(
save_path
)
train_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
val_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
train_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'train'
)
val_dataset
=
ScanNetData
(
root_path
=
data_path
,
split
=
'val'
)
scannet_infos_train
=
train_dataset
.
get_scannet_infos
(
has_label
=
True
)
with
open
(
train_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
scannet_infos_train
,
f
)
print
(
'Scannet info train file is saved to %s'
%
train_filename
)
scannet_infos_val
=
val_dataset
.
get_scannet_infos
(
has_label
=
True
)
with
open
(
val_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
scannet_infos_val
,
f
)
print
(
'Scannet info val file is saved to %s'
%
val_filename
)
if
__name__
==
'__main__'
:
create_scannet_info_file
(
data_path
=
'./data/scannet'
,
save_path
=
'./data/scannet'
)
tools/data_converter/scannet_data_utils.py
0 → 100644
View file @
aec41c7f
import
os
import
numpy
as
np
class
ScanNetData
(
object
):
''' Load and parse object data '''
def
__init__
(
self
,
root_path
,
split
=
'train'
):
self
.
root_dir
=
root_path
self
.
split
=
split
self
.
split_dir
=
os
.
path
.
join
(
root_path
)
self
.
classes
=
[
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
]
self
.
cat2label
=
{
cat
:
self
.
classes
.
index
(
cat
)
for
cat
in
self
.
classes
}
self
.
label2cat
=
{
self
.
cat2label
[
t
]:
t
for
t
in
self
.
cat2label
}
self
.
cat_ids
=
np
.
array
(
[
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
])
self
.
cat_ids2class
=
{
nyu40id
:
i
for
i
,
nyu40id
in
enumerate
(
list
(
self
.
cat_ids
))
}
assert
split
in
[
'train'
,
'val'
,
'test'
]
split_dir
=
os
.
path
.
join
(
self
.
root_dir
,
'meta_data'
,
'scannetv2_%s.txt'
%
split
)
self
.
sample_id_list
=
[
x
.
strip
()
for
x
in
open
(
split_dir
).
readlines
()
]
if
os
.
path
.
exists
(
split_dir
)
else
None
def
__len__
(
self
):
return
len
(
self
.
sample_id_list
)
def
get_box_label
(
self
,
idx
):
box_file
=
os
.
path
.
join
(
self
.
root_dir
,
'scannet_train_instance_data'
,
'%s_bbox.npy'
%
idx
)
assert
os
.
path
.
exists
(
box_file
)
return
np
.
load
(
box_file
)
def
get_scannet_infos
(
self
,
num_workers
=
4
,
has_label
=
True
,
sample_id_list
=
None
):
import
concurrent.futures
as
futures
def
process_single_scene
(
sample_idx
):
print
(
'%s sample_idx: %s'
%
(
self
.
split
,
sample_idx
))
info
=
dict
()
pc_info
=
{
'num_features'
:
6
,
'lidar_idx'
:
sample_idx
}
info
[
'point_cloud'
]
=
pc_info
if
has_label
:
annotations
=
{}
boxes_with_classes
=
self
.
get_box_label
(
sample_idx
)
# k, 6 + class
annotations
[
'gt_num'
]
=
boxes_with_classes
.
shape
[
0
]
if
annotations
[
'gt_num'
]
!=
0
:
minmax_boxes3d
=
boxes_with_classes
[:,
:
-
1
]
# k, 6
classes
=
boxes_with_classes
[:,
-
1
]
# k, 1
annotations
[
'name'
]
=
np
.
array
([
self
.
label2cat
[
self
.
cat_ids2class
[
classes
[
i
]]]
for
i
in
range
(
annotations
[
'gt_num'
])
])
annotations
[
'location'
]
=
minmax_boxes3d
[:,
:
3
]
annotations
[
'dimensions'
]
=
minmax_boxes3d
[:,
3
:
6
]
annotations
[
'gt_boxes_upright_depth'
]
=
minmax_boxes3d
annotations
[
'index'
]
=
np
.
arange
(
annotations
[
'gt_num'
],
dtype
=
np
.
int32
)
annotations
[
'class'
]
=
np
.
array
([
self
.
cat_ids2class
[
classes
[
i
]]
for
i
in
range
(
annotations
[
'gt_num'
])
])
info
[
'annos'
]
=
annotations
return
info
sample_id_list
=
sample_id_list
if
sample_id_list
is
not
None
\
else
self
.
sample_id_list
with
futures
.
ThreadPoolExecutor
(
num_workers
)
as
executor
:
infos
=
executor
.
map
(
process_single_scene
,
sample_id_list
)
return
list
(
infos
)
tools/data_converter/sunrgbd_converter.py
0 → 100644
View file @
aec41c7f
import
os
import
pickle
from
pathlib
import
Path
from
tools.data_converter.sunrgbd_data_utils
import
SUNRGBDData
def
create_sunrgbd_info_file
(
data_path
,
pkl_prefix
=
'sunrgbd'
,
save_path
=
None
,
use_v1
=
False
):
assert
os
.
path
.
exists
(
data_path
)
if
save_path
is
None
:
save_path
=
Path
(
data_path
)
else
:
save_path
=
Path
(
save_path
)
assert
os
.
path
.
exists
(
save_path
)
train_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_train.pkl'
val_filename
=
save_path
/
f
'
{
pkl_prefix
}
_infos_val.pkl'
train_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'train'
,
use_v1
=
use_v1
)
val_dataset
=
SUNRGBDData
(
root_path
=
data_path
,
split
=
'val'
,
use_v1
=
use_v1
)
sunrgbd_infos_train
=
train_dataset
.
get_sunrgbd_infos
(
has_label
=
True
)
with
open
(
train_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
sunrgbd_infos_train
,
f
)
print
(
'Sunrgbd info train file is saved to %s'
%
train_filename
)
sunrgbd_infos_val
=
val_dataset
.
get_sunrgbd_infos
(
has_label
=
True
)
with
open
(
val_filename
,
'wb'
)
as
f
:
pickle
.
dump
(
sunrgbd_infos_val
,
f
)
print
(
'Sunrgbd info val file is saved to %s'
%
val_filename
)
if
__name__
==
'__main__'
:
create_sunrgbd_info_file
(
data_path
=
'./data/sunrgbd/sunrgbd_trainval'
,
save_path
=
'./data/sunrgbd'
)
tools/data_converter/sunrgbd_data_utils.py
0 → 100644
View file @
aec41c7f
import
os
import
cv2
import
numpy
as
np
import
scipy.io
as
sio
def
random_sampling
(
pc
,
num_sample
,
replace
=
None
,
return_choices
=
False
):
""" Input is NxC, output is num_samplexC
"""
if
replace
is
None
:
replace
=
(
pc
.
shape
[
0
]
<
num_sample
)
choices
=
np
.
random
.
choice
(
pc
.
shape
[
0
],
num_sample
,
replace
=
replace
)
if
return_choices
:
return
pc
[
choices
],
choices
else
:
return
pc
[
choices
]
class
SUNRGBDInstance
(
object
):
def
__init__
(
self
,
line
):
data
=
line
.
split
(
' '
)
data
[
1
:]
=
[
float
(
x
)
for
x
in
data
[
1
:]]
self
.
classname
=
data
[
0
]
self
.
xmin
=
data
[
1
]
self
.
ymin
=
data
[
2
]
self
.
xmax
=
data
[
1
]
+
data
[
3
]
self
.
ymax
=
data
[
2
]
+
data
[
4
]
self
.
box2d
=
np
.
array
([
self
.
xmin
,
self
.
ymin
,
self
.
xmax
,
self
.
ymax
])
self
.
centroid
=
np
.
array
([
data
[
5
],
data
[
6
],
data
[
7
]])
self
.
w
=
data
[
8
]
self
.
l
=
data
[
9
]
# noqa: E741
self
.
h
=
data
[
10
]
self
.
orientation
=
np
.
zeros
((
3
,
))
self
.
orientation
[
0
]
=
data
[
11
]
self
.
orientation
[
1
]
=
data
[
12
]
self
.
heading_angle
=
-
1
*
np
.
arctan2
(
self
.
orientation
[
1
],
self
.
orientation
[
0
])
self
.
box3d
=
np
.
concatenate
([
self
.
centroid
,
np
.
array
([
self
.
l
*
2
,
self
.
w
*
2
,
self
.
h
*
2
,
self
.
heading_angle
])
])
class
SUNRGBDData
(
object
):
''' Load and parse object data '''
def
__init__
(
self
,
root_path
,
split
=
'train'
,
use_v1
=
False
):
self
.
root_dir
=
root_path
self
.
split
=
split
self
.
split_dir
=
os
.
path
.
join
(
root_path
)
self
.
classes
=
[
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
]
self
.
cat2label
=
{
cat
:
self
.
classes
.
index
(
cat
)
for
cat
in
self
.
classes
}
self
.
label2cat
=
{
label
:
self
.
classes
[
label
]
for
label
in
range
(
len
(
self
.
classes
))
}
assert
split
in
[
'train'
,
'val'
,
'test'
]
split_dir
=
os
.
path
.
join
(
self
.
root_dir
,
'%s_data_idx.txt'
%
split
)
self
.
sample_id_list
=
[
int
(
x
.
strip
())
for
x
in
open
(
split_dir
).
readlines
()
]
if
os
.
path
.
exists
(
split_dir
)
else
None
self
.
image_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'image'
)
self
.
calib_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'calib'
)
self
.
depth_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'depth'
)
if
use_v1
:
self
.
label_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'label_v1'
)
else
:
self
.
label_dir
=
os
.
path
.
join
(
self
.
split_dir
,
'label'
)
def
__len__
(
self
):
return
len
(
self
.
sample_id_list
)
def
get_image
(
self
,
idx
):
img_filename
=
os
.
path
.
join
(
self
.
image_dir
,
'%06d.jpg'
%
(
idx
))
return
cv2
.
imread
(
img_filename
)
def
get_image_shape
(
self
,
idx
):
image
=
self
.
get_image
(
idx
)
return
np
.
array
(
image
.
shape
[:
2
],
dtype
=
np
.
int32
)
def
get_depth
(
self
,
idx
):
depth_filename
=
os
.
path
.
join
(
self
.
depth_dir
,
'%06d.mat'
%
(
idx
))
depth
=
sio
.
loadmat
(
depth_filename
)[
'instance'
]
return
depth
def
get_calibration
(
self
,
idx
):
calib_filepath
=
os
.
path
.
join
(
self
.
calib_dir
,
'%06d.txt'
%
(
idx
))
lines
=
[
line
.
rstrip
()
for
line
in
open
(
calib_filepath
)]
Rt
=
np
.
array
([
float
(
x
)
for
x
in
lines
[
0
].
split
(
' '
)])
Rt
=
np
.
reshape
(
Rt
,
(
3
,
3
),
order
=
'F'
)
K
=
np
.
array
([
float
(
x
)
for
x
in
lines
[
1
].
split
(
' '
)])
return
K
,
Rt
def
get_label_objects
(
self
,
idx
):
label_filename
=
os
.
path
.
join
(
self
.
label_dir
,
'%06d.txt'
%
(
idx
))
lines
=
[
line
.
rstrip
()
for
line
in
open
(
label_filename
)]
objects
=
[
SUNRGBDInstance
(
line
)
for
line
in
lines
]
return
objects
def
get_sunrgbd_infos
(
self
,
num_workers
=
4
,
has_label
=
True
,
sample_id_list
=
None
):
import
concurrent.futures
as
futures
def
process_single_scene
(
sample_idx
):
print
(
'%s sample_idx: %s'
%
(
self
.
split
,
sample_idx
))
# convert depth to points
SAMPLE_NUM
=
50000
pc_upright_depth
=
self
.
get_depth
(
sample_idx
)
# TODO : sample points in loading process and test
pc_upright_depth_subsampled
=
random_sampling
(
pc_upright_depth
,
SAMPLE_NUM
)
np
.
savez_compressed
(
os
.
path
.
join
(
self
.
root_dir
,
'lidar'
,
'%06d.npz'
%
sample_idx
),
pc
=
pc_upright_depth_subsampled
)
info
=
dict
()
pc_info
=
{
'num_features'
:
6
,
'lidar_idx'
:
sample_idx
}
info
[
'point_cloud'
]
=
pc_info
img_name
=
os
.
path
.
join
(
self
.
image_dir
,
'%06d.jpg'
%
(
sample_idx
))
img_path
=
os
.
path
.
join
(
self
.
image_dir
,
img_name
)
image_info
=
{
'image_idx'
:
sample_idx
,
'image_shape'
:
self
.
get_image_shape
(
sample_idx
),
'image_path'
:
img_path
}
info
[
'image'
]
=
image_info
K
,
Rt
=
self
.
get_calibration
(
sample_idx
)
calib_info
=
{
'K'
:
K
,
'Rt'
:
Rt
}
info
[
'calib'
]
=
calib_info
if
has_label
:
obj_list
=
self
.
get_label_objects
(
sample_idx
)
annotations
=
{}
annotations
[
'gt_num'
]
=
len
([
obj
.
classname
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
if
annotations
[
'gt_num'
]
!=
0
:
annotations
[
'name'
]
=
np
.
array
([
obj
.
classname
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
annotations
[
'bbox'
]
=
np
.
concatenate
([
obj
.
box2d
.
reshape
(
1
,
4
)
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
],
axis
=
0
)
annotations
[
'location'
]
=
np
.
concatenate
([
obj
.
centroid
.
reshape
(
1
,
3
)
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
],
axis
=
0
)
annotations
[
'dimensions'
]
=
2
*
np
.
array
([
[
obj
.
l
,
obj
.
h
,
obj
.
w
]
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
# lhw(depth) format
annotations
[
'rotation_y'
]
=
np
.
array
([
obj
.
heading_angle
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
annotations
[
'index'
]
=
np
.
arange
(
len
(
obj_list
),
dtype
=
np
.
int32
)
annotations
[
'class'
]
=
np
.
array
([
self
.
cat2label
[
obj
.
classname
]
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
])
annotations
[
'gt_boxes_upright_depth'
]
=
np
.
stack
(
[
obj
.
box3d
for
obj
in
obj_list
if
obj
.
classname
in
self
.
cat2label
.
keys
()
],
axis
=
0
)
# (K,8)
info
[
'annos'
]
=
annotations
return
info
lidar_save_dir
=
os
.
path
.
join
(
self
.
root_dir
,
'lidar'
)
if
not
os
.
path
.
exists
(
lidar_save_dir
):
os
.
mkdir
(
lidar_save_dir
)
sample_id_list
=
sample_id_list
if
\
sample_id_list
is
not
None
else
self
.
sample_id_list
with
futures
.
ThreadPoolExecutor
(
num_workers
)
as
executor
:
infos
=
executor
.
map
(
process_single_scene
,
sample_id_list
)
return
list
(
infos
)
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment