Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
OpenPCDet
Commits
0f73c62c
Commit
0f73c62c
authored
Jun 23, 2020
by
Shaoshuai Shi
Browse files
add iou3d_nms cuda ops and roiaware_pool3d cuda ops in the unified normative coordinate
parent
fea61d34
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
1636 additions
and
0 deletions
+1636
-0
pcdet/ops/iou3d_nms/iou3d_nms_utils.py
pcdet/ops/iou3d_nms/iou3d_nms_utils.py
+115
-0
pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp
pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp
+240
-0
pcdet/ops/iou3d_nms/src/iou3d_cpu.h
pcdet/ops/iou3d_nms/src/iou3d_cpu.h
+11
-0
pcdet/ops/iou3d_nms/src/iou3d_nms.cpp
pcdet/ops/iou3d_nms/src/iou3d_nms.cpp
+179
-0
pcdet/ops/iou3d_nms/src/iou3d_nms.h
pcdet/ops/iou3d_nms/src/iou3d_nms.h
+14
-0
pcdet/ops/iou3d_nms/src/iou3d_nms_api.cpp
pcdet/ops/iou3d_nms/src/iou3d_nms_api.cpp
+17
-0
pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
+414
-0
pcdet/ops/roiaware_pool3d/roiaware_pool3d_utils.py
pcdet/ops/roiaware_pool3d/roiaware_pool3d_utils.py
+110
-0
pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
+177
-0
pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
+359
-0
No files found.
pcdet/ops/iou3d_nms/iou3d_nms_utils.py
0 → 100644
View file @
0f73c62c
"""
3D IoU Calculation and Rotated NMS
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
"""
import
torch
from
.
import
iou3d_nms_cuda
from
...utils
import
common_utils
def
boxes_bev_iou_cpu
(
boxes_a
,
boxes_b
):
"""
Args:
boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading]
Returns:
"""
boxes_a
,
is_numpy
=
common_utils
.
check_numpy_to_torch
(
boxes_a
)
boxes_b
,
is_numpy
=
common_utils
.
check_numpy_to_torch
(
boxes_b
)
assert
not
(
boxes_a
.
is_cuda
or
boxes_b
.
is_cuda
),
'Only support CPU tensors'
assert
boxes_a
.
shape
[
1
]
==
7
and
boxes_b
.
shape
[
1
]
==
7
ans_iou
=
boxes_a
.
new_zeros
(
torch
.
Size
((
boxes_a
.
shape
[
0
],
boxes_b
.
shape
[
0
])))
iou3d_nms_cuda
.
boxes_iou_bev_cpu
(
boxes_a
.
contiguous
(),
boxes_b
.
contiguous
(),
ans_iou
)
return
ans_iou
.
numpy
()
if
is_numpy
else
ans_iou
def
boxes_iou_bev
(
boxes_a
,
boxes_b
):
"""
Args:
boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading]
Returns:
ans_iou: (N, M)
"""
assert
boxes_a
.
shape
[
1
]
==
boxes_b
.
shape
[
1
]
==
7
ans_iou
=
torch
.
cuda
.
FloatTensor
(
torch
.
Size
((
boxes_a
.
shape
[
0
],
boxes_b
.
shape
[
0
]))).
zero_
()
iou3d_nms_cuda
.
boxes_iou_bev_gpu
(
boxes_a
.
contiguous
(),
boxes_b
.
contiguous
(),
ans_iou
)
return
ans_iou
def
boxes_iou3d_gpu
(
boxes_a
,
boxes_b
):
"""
Args:
boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
boxes_b: (N, 7) [x, y, z, dx, dy, dz, heading]
Returns:
ans_iou: (N, M)
"""
assert
boxes_a
.
shape
[
1
]
==
boxes_b
.
shape
[
1
]
==
7
# height overlap
boxes_a_height_max
=
(
boxes_a
[:,
2
]
+
boxes_a
[:,
5
]
/
2
).
view
(
-
1
,
1
)
boxes_a_height_min
=
(
boxes_a
[:,
2
]
-
boxes_a
[:,
5
]
/
2
).
view
(
-
1
,
1
)
boxes_b_height_max
=
(
boxes_b
[:,
2
]
+
boxes_b
[:,
5
]
/
2
).
view
(
1
,
-
1
)
boxes_b_height_min
=
(
boxes_b
[:,
2
]
-
boxes_b
[:,
5
]
/
2
).
view
(
1
,
-
1
)
# bev overlap
overlaps_bev
=
torch
.
cuda
.
FloatTensor
(
torch
.
Size
((
boxes_a
.
shape
[
0
],
boxes_b
.
shape
[
0
]))).
zero_
()
# (N, M)
iou3d_nms_cuda
.
boxes_overlap_bev_gpu
(
boxes_a
.
contiguous
(),
boxes_b
.
contiguous
(),
overlaps_bev
)
max_of_min
=
torch
.
max
(
boxes_a_height_min
,
boxes_b_height_min
)
min_of_max
=
torch
.
min
(
boxes_a_height_max
,
boxes_b_height_max
)
overlaps_h
=
torch
.
clamp
(
min_of_max
-
max_of_min
,
min
=
0
)
# 3d iou
overlaps_3d
=
overlaps_bev
*
overlaps_h
vol_a
=
(
boxes_a
[:,
3
]
*
boxes_a
[:,
4
]
*
boxes_a
[:,
5
]).
view
(
-
1
,
1
)
vol_b
=
(
boxes_b
[:,
3
]
*
boxes_b
[:,
4
]
*
boxes_b
[:,
5
]).
view
(
1
,
-
1
)
iou3d
=
overlaps_3d
/
torch
.
clamp
(
vol_a
+
vol_b
-
overlaps_3d
,
min
=
1e-6
)
return
iou3d
def
nms_gpu
(
boxes
,
scores
,
thresh
,
pre_maxsize
=
None
,
**
kwargs
):
"""
:param boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
:param scores: (N)
:param thresh:
:return:
"""
assert
boxes
.
shape
[
1
]
==
7
order
=
scores
.
sort
(
0
,
descending
=
True
)[
1
]
if
pre_maxsize
is
not
None
:
order
=
order
[:
pre_maxsize
]
boxes
=
boxes
[
order
].
contiguous
()
keep
=
torch
.
LongTensor
(
boxes
.
size
(
0
))
num_out
=
iou3d_nms_cuda
.
nms_gpu
(
boxes
,
keep
,
thresh
)
return
order
[
keep
[:
num_out
].
cuda
()].
contiguous
(),
None
def
nms_normal_gpu
(
boxes
,
scores
,
thresh
,
**
kwargs
):
"""
:param boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
:param scores: (N)
:param thresh:
:return:
"""
assert
boxes
.
shape
[
0
]
==
7
order
=
scores
.
sort
(
0
,
descending
=
True
)[
1
]
boxes
=
boxes
[
order
].
contiguous
()
keep
=
torch
.
LongTensor
(
boxes
.
size
(
0
))
num_out
=
iou3d_nms_cuda
.
nms_normal_gpu
(
boxes
,
keep
,
thresh
)
return
order
[
keep
[:
num_out
].
cuda
()].
contiguous
(),
None
pcdet/ops/iou3d_nms/src/iou3d_cpu.cpp
0 → 100644
View file @
0f73c62c
/*
3D Rotated IoU Calculation (CPU)
Written by Shaoshuai Shi
All Rights Reserved 2020.
*/
#include <stdio.h>
#include <math.h>
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "iou3d_cpu.h"
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
inline
float
min
(
float
a
,
float
b
){
return
a
>
b
?
b
:
a
;
}
inline
float
max
(
float
a
,
float
b
){
return
a
>
b
?
a
:
b
;
}
const
float
EPS
=
1e-8
;
struct
Point
{
float
x
,
y
;
__device__
Point
()
{}
__device__
Point
(
double
_x
,
double
_y
){
x
=
_x
,
y
=
_y
;
}
__device__
void
set
(
float
_x
,
float
_y
){
x
=
_x
;
y
=
_y
;
}
__device__
Point
operator
+
(
const
Point
&
b
)
const
{
return
Point
(
x
+
b
.
x
,
y
+
b
.
y
);
}
__device__
Point
operator
-
(
const
Point
&
b
)
const
{
return
Point
(
x
-
b
.
x
,
y
-
b
.
y
);
}
};
inline
float
cross
(
const
Point
&
a
,
const
Point
&
b
){
return
a
.
x
*
b
.
y
-
a
.
y
*
b
.
x
;
}
inline
float
cross
(
const
Point
&
p1
,
const
Point
&
p2
,
const
Point
&
p0
){
return
(
p1
.
x
-
p0
.
x
)
*
(
p2
.
y
-
p0
.
y
)
-
(
p2
.
x
-
p0
.
x
)
*
(
p1
.
y
-
p0
.
y
);
}
inline
int
check_rect_cross
(
const
Point
&
p1
,
const
Point
&
p2
,
const
Point
&
q1
,
const
Point
&
q2
){
int
ret
=
min
(
p1
.
x
,
p2
.
x
)
<=
max
(
q1
.
x
,
q2
.
x
)
&&
min
(
q1
.
x
,
q2
.
x
)
<=
max
(
p1
.
x
,
p2
.
x
)
&&
min
(
p1
.
y
,
p2
.
y
)
<=
max
(
q1
.
y
,
q2
.
y
)
&&
min
(
q1
.
y
,
q2
.
y
)
<=
max
(
p1
.
y
,
p2
.
y
);
return
ret
;
}
inline
int
check_in_box2d
(
const
float
*
box
,
const
Point
&
p
){
//params: (7) [x, y, z, dx, dy, dz, heading]
const
float
MARGIN
=
1e-2
;
float
center_x
=
box
[
0
],
center_y
=
box
[
1
];
float
angle_cos
=
cos
(
-
box
[
6
]),
angle_sin
=
sin
(
-
box
[
6
]);
// rotate the point in the opposite direction of box
float
rot_x
=
(
p
.
x
-
center_x
)
*
angle_cos
+
(
p
.
y
-
center_y
)
*
(
-
angle_sin
);
float
rot_y
=
(
p
.
x
-
center_x
)
*
angle_sin
+
(
p
.
y
-
center_y
)
*
angle_cos
;
return
(
fabs
(
rot_x
)
<
box
[
3
]
/
2
+
MARGIN
&&
fabs
(
rot_y
)
<
box
[
4
]
/
2
+
MARGIN
);
}
inline
int
intersection
(
const
Point
&
p1
,
const
Point
&
p0
,
const
Point
&
q1
,
const
Point
&
q0
,
Point
&
ans
){
// fast exclusion
if
(
check_rect_cross
(
p0
,
p1
,
q0
,
q1
)
==
0
)
return
0
;
// check cross standing
float
s1
=
cross
(
q0
,
p1
,
p0
);
float
s2
=
cross
(
p1
,
q1
,
p0
);
float
s3
=
cross
(
p0
,
q1
,
q0
);
float
s4
=
cross
(
q1
,
p1
,
q0
);
if
(
!
(
s1
*
s2
>
0
&&
s3
*
s4
>
0
))
return
0
;
// calculate intersection of two lines
float
s5
=
cross
(
q1
,
p1
,
p0
);
if
(
fabs
(
s5
-
s1
)
>
EPS
){
ans
.
x
=
(
s5
*
q0
.
x
-
s1
*
q1
.
x
)
/
(
s5
-
s1
);
ans
.
y
=
(
s5
*
q0
.
y
-
s1
*
q1
.
y
)
/
(
s5
-
s1
);
}
else
{
float
a0
=
p0
.
y
-
p1
.
y
,
b0
=
p1
.
x
-
p0
.
x
,
c0
=
p0
.
x
*
p1
.
y
-
p1
.
x
*
p0
.
y
;
float
a1
=
q0
.
y
-
q1
.
y
,
b1
=
q1
.
x
-
q0
.
x
,
c1
=
q0
.
x
*
q1
.
y
-
q1
.
x
*
q0
.
y
;
float
D
=
a0
*
b1
-
a1
*
b0
;
ans
.
x
=
(
b0
*
c1
-
b1
*
c0
)
/
D
;
ans
.
y
=
(
a1
*
c0
-
a0
*
c1
)
/
D
;
}
return
1
;
}
inline
void
rotate_around_center
(
const
Point
&
center
,
const
float
angle_cos
,
const
float
angle_sin
,
Point
&
p
){
float
new_x
=
(
p
.
x
-
center
.
x
)
*
angle_cos
+
(
p
.
y
-
center
.
y
)
*
(
-
angle_sin
)
+
center
.
x
;
float
new_y
=
(
p
.
x
-
center
.
x
)
*
angle_sin
+
(
p
.
y
-
center
.
y
)
*
angle_cos
+
center
.
y
;
p
.
set
(
new_x
,
new_y
);
}
inline
int
point_cmp
(
const
Point
&
a
,
const
Point
&
b
,
const
Point
&
center
){
return
atan2
(
a
.
y
-
center
.
y
,
a
.
x
-
center
.
x
)
>
atan2
(
b
.
y
-
center
.
y
,
b
.
x
-
center
.
x
);
}
inline
float
box_overlap
(
const
float
*
box_a
,
const
float
*
box_b
){
// params: box_a (7) [x, y, z, dx, dy, dz, heading]
// params: box_b (7) [x, y, z, dx, dy, dz, heading]
// float a_x1 = box_a[0], a_y1 = box_a[1], a_x2 = box_a[2], a_y2 = box_a[3], a_angle = box_a[4];
// float b_x1 = box_b[0], b_y1 = box_b[1], b_x2 = box_b[2], b_y2 = box_b[3], b_angle = box_b[4];
float
a_angle
=
box_a
[
6
],
b_angle
=
box_b
[
6
];
float
a_dx_half
=
box_a
[
3
]
/
2
,
b_dx_half
=
box_b
[
3
]
/
2
,
a_dy_half
=
box_a
[
4
]
/
2
,
b_dy_half
=
box_b
[
4
]
/
2
;
float
a_x1
=
box_a
[
0
]
-
a_dx_half
,
a_y1
=
box_a
[
1
]
-
a_dy_half
;
float
a_x2
=
box_a
[
0
]
+
a_dx_half
,
a_y2
=
box_a
[
1
]
+
a_dy_half
;
float
b_x1
=
box_b
[
0
]
-
b_dx_half
,
b_y1
=
box_b
[
1
]
-
b_dy_half
;
float
b_x2
=
box_b
[
0
]
+
b_dx_half
,
b_y2
=
box_b
[
1
]
+
b_dy_half
;
Point
center_a
(
box_a
[
0
],
box_a
[
1
]);
Point
center_b
(
box_b
[
0
],
box_b
[
1
]);
Point
box_a_corners
[
5
];
box_a_corners
[
0
].
set
(
a_x1
,
a_y1
);
box_a_corners
[
1
].
set
(
a_x2
,
a_y1
);
box_a_corners
[
2
].
set
(
a_x2
,
a_y2
);
box_a_corners
[
3
].
set
(
a_x1
,
a_y2
);
Point
box_b_corners
[
5
];
box_b_corners
[
0
].
set
(
b_x1
,
b_y1
);
box_b_corners
[
1
].
set
(
b_x2
,
b_y1
);
box_b_corners
[
2
].
set
(
b_x2
,
b_y2
);
box_b_corners
[
3
].
set
(
b_x1
,
b_y2
);
// get oriented corners
float
a_angle_cos
=
cos
(
a_angle
),
a_angle_sin
=
sin
(
a_angle
);
float
b_angle_cos
=
cos
(
b_angle
),
b_angle_sin
=
sin
(
b_angle
);
for
(
int
k
=
0
;
k
<
4
;
k
++
){
rotate_around_center
(
center_a
,
a_angle_cos
,
a_angle_sin
,
box_a_corners
[
k
]);
rotate_around_center
(
center_b
,
b_angle_cos
,
b_angle_sin
,
box_b_corners
[
k
]);
}
box_a_corners
[
4
]
=
box_a_corners
[
0
];
box_b_corners
[
4
]
=
box_b_corners
[
0
];
// get intersection of lines
Point
cross_points
[
16
];
Point
poly_center
;
int
cnt
=
0
,
flag
=
0
;
poly_center
.
set
(
0
,
0
);
for
(
int
i
=
0
;
i
<
4
;
i
++
){
for
(
int
j
=
0
;
j
<
4
;
j
++
){
flag
=
intersection
(
box_a_corners
[
i
+
1
],
box_a_corners
[
i
],
box_b_corners
[
j
+
1
],
box_b_corners
[
j
],
cross_points
[
cnt
]);
if
(
flag
){
poly_center
=
poly_center
+
cross_points
[
cnt
];
cnt
++
;
}
}
}
// check corners
for
(
int
k
=
0
;
k
<
4
;
k
++
){
if
(
check_in_box2d
(
box_a
,
box_b_corners
[
k
])){
poly_center
=
poly_center
+
box_b_corners
[
k
];
cross_points
[
cnt
]
=
box_b_corners
[
k
];
cnt
++
;
}
if
(
check_in_box2d
(
box_b
,
box_a_corners
[
k
])){
poly_center
=
poly_center
+
box_a_corners
[
k
];
cross_points
[
cnt
]
=
box_a_corners
[
k
];
cnt
++
;
}
}
poly_center
.
x
/=
cnt
;
poly_center
.
y
/=
cnt
;
// sort the points of polygon
Point
temp
;
for
(
int
j
=
0
;
j
<
cnt
-
1
;
j
++
){
for
(
int
i
=
0
;
i
<
cnt
-
j
-
1
;
i
++
){
if
(
point_cmp
(
cross_points
[
i
],
cross_points
[
i
+
1
],
poly_center
)){
temp
=
cross_points
[
i
];
cross_points
[
i
]
=
cross_points
[
i
+
1
];
cross_points
[
i
+
1
]
=
temp
;
}
}
}
// get the overlap areas
float
area
=
0
;
for
(
int
k
=
0
;
k
<
cnt
-
1
;
k
++
){
area
+=
cross
(
cross_points
[
k
]
-
cross_points
[
0
],
cross_points
[
k
+
1
]
-
cross_points
[
0
]);
}
return
fabs
(
area
)
/
2.0
;
}
inline
float
iou_bev
(
const
float
*
box_a
,
const
float
*
box_b
){
// params: box_a (7) [x, y, z, dx, dy, dz, heading]
// params: box_b (7) [x, y, z, dx, dy, dz, heading]
float
sa
=
box_a
[
3
]
*
box_a
[
4
];
float
sb
=
box_b
[
3
]
*
box_b
[
4
];
float
s_overlap
=
box_overlap
(
box_a
,
box_b
);
return
s_overlap
/
fmaxf
(
sa
+
sb
-
s_overlap
,
EPS
);
}
int
boxes_iou_bev_cpu
(
at
::
Tensor
boxes_a_tensor
,
at
::
Tensor
boxes_b_tensor
,
at
::
Tensor
ans_iou_tensor
){
// params boxes_a_tensor: (N, 7) [x, y, z, dx, dy, dz, heading]
// params boxes_b_tensor: (M, 7) [x, y, z, dx, dy, dz, heading]
// params ans_iou_tensor: (N, M)
CHECK_CONTIGUOUS
(
boxes_a_tensor
);
CHECK_CONTIGUOUS
(
boxes_b_tensor
);
int
num_boxes_a
=
boxes_a_tensor
.
size
(
0
);
int
num_boxes_b
=
boxes_b_tensor
.
size
(
0
);
const
float
*
boxes_a
=
boxes_a_tensor
.
data
<
float
>
();
const
float
*
boxes_b
=
boxes_b_tensor
.
data
<
float
>
();
float
*
ans_iou
=
ans_iou_tensor
.
data
<
float
>
();
for
(
int
i
=
0
;
i
<
num_boxes_a
;
i
++
){
for
(
int
j
=
0
;
j
<
num_boxes_b
;
j
++
){
ans_iou
[
i
*
num_boxes_b
+
j
]
=
iou_bev
(
boxes_a
+
i
*
7
,
boxes_b
+
j
*
7
);
}
}
return
1
;
}
pcdet/ops/iou3d_nms/src/iou3d_cpu.h
0 → 100644
View file @
0f73c62c
#ifndef IOU3D_CPU_H
#define IOU3D_CPU_H
#include <torch/serialize/tensor.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
int
boxes_iou_bev_cpu
(
at
::
Tensor
boxes_a_tensor
,
at
::
Tensor
boxes_b_tensor
,
at
::
Tensor
ans_iou_tensor
);
#endif
pcdet/ops/iou3d_nms/src/iou3d_nms.cpp
0 → 100644
View file @
0f73c62c
/*
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "iou3d_nms.h"
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
#define CHECK_ERROR(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline
void
gpuAssert
(
cudaError_t
code
,
const
char
*
file
,
int
line
,
bool
abort
=
true
)
{
if
(
code
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"GPUassert: %s %s %d
\n
"
,
cudaGetErrorString
(
code
),
file
,
line
);
if
(
abort
)
exit
(
code
);
}
}
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
void
boxesoverlapLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_overlap
);
void
boxesioubevLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_iou
);
void
nmsLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
void
nmsNormalLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
);
int
boxes_overlap_bev_gpu
(
at
::
Tensor
boxes_a
,
at
::
Tensor
boxes_b
,
at
::
Tensor
ans_overlap
){
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
// params ans_overlap: (N, M)
CHECK_INPUT
(
boxes_a
);
CHECK_INPUT
(
boxes_b
);
CHECK_INPUT
(
ans_overlap
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
const
float
*
boxes_a_data
=
boxes_a
.
data
<
float
>
();
const
float
*
boxes_b_data
=
boxes_b
.
data
<
float
>
();
float
*
ans_overlap_data
=
ans_overlap
.
data
<
float
>
();
boxesoverlapLauncher
(
num_a
,
boxes_a_data
,
num_b
,
boxes_b_data
,
ans_overlap_data
);
return
1
;
}
int
boxes_iou_bev_gpu
(
at
::
Tensor
boxes_a
,
at
::
Tensor
boxes_b
,
at
::
Tensor
ans_iou
){
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
// params ans_overlap: (N, M)
CHECK_INPUT
(
boxes_a
);
CHECK_INPUT
(
boxes_b
);
CHECK_INPUT
(
ans_iou
);
int
num_a
=
boxes_a
.
size
(
0
);
int
num_b
=
boxes_b
.
size
(
0
);
const
float
*
boxes_a_data
=
boxes_a
.
data
<
float
>
();
const
float
*
boxes_b_data
=
boxes_b
.
data
<
float
>
();
float
*
ans_iou_data
=
ans_iou
.
data
<
float
>
();
boxesioubevLauncher
(
num_a
,
boxes_a_data
,
num_b
,
boxes_b_data
,
ans_iou_data
);
return
1
;
}
int
nms_gpu
(
at
::
Tensor
boxes
,
at
::
Tensor
keep
,
float
nms_overlap_thresh
){
// params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
// params keep: (N)
CHECK_INPUT
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
const
float
*
boxes_data
=
boxes
.
data
<
float
>
();
long
*
keep_data
=
keep
.
data
<
long
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
unsigned
long
long
*
mask_data
=
NULL
;
CHECK_ERROR
(
cudaMalloc
((
void
**
)
&
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
)));
nmsLauncher
(
boxes_data
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
// unsigned long long mask_cpu[boxes_num * col_blocks];
// unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks];
std
::
vector
<
unsigned
long
long
>
mask_cpu
(
boxes_num
*
col_blocks
);
// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
CHECK_ERROR
(
cudaMemcpy
(
&
mask_cpu
[
0
],
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
),
cudaMemcpyDeviceToHost
));
cudaFree
(
mask_data
);
unsigned
long
long
remv_cpu
[
col_blocks
];
memset
(
remv_cpu
,
0
,
col_blocks
*
sizeof
(
unsigned
long
long
));
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
))){
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_cpu
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
){
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
()
)
printf
(
"Error!
\n
"
);
return
num_to_keep
;
}
int
nms_normal_gpu
(
at
::
Tensor
boxes
,
at
::
Tensor
keep
,
float
nms_overlap_thresh
){
// params boxes: (N, 7) [x, y, z, dx, dy, dz, heading]
// params keep: (N)
CHECK_INPUT
(
boxes
);
CHECK_CONTIGUOUS
(
keep
);
int
boxes_num
=
boxes
.
size
(
0
);
const
float
*
boxes_data
=
boxes
.
data
<
float
>
();
long
*
keep_data
=
keep
.
data
<
long
>
();
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
unsigned
long
long
*
mask_data
=
NULL
;
CHECK_ERROR
(
cudaMalloc
((
void
**
)
&
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
)));
nmsNormalLauncher
(
boxes_data
,
mask_data
,
boxes_num
,
nms_overlap_thresh
);
// unsigned long long mask_cpu[boxes_num * col_blocks];
// unsigned long long *mask_cpu = new unsigned long long [boxes_num * col_blocks];
std
::
vector
<
unsigned
long
long
>
mask_cpu
(
boxes_num
*
col_blocks
);
// printf("boxes_num=%d, col_blocks=%d\n", boxes_num, col_blocks);
CHECK_ERROR
(
cudaMemcpy
(
&
mask_cpu
[
0
],
mask_data
,
boxes_num
*
col_blocks
*
sizeof
(
unsigned
long
long
),
cudaMemcpyDeviceToHost
));
cudaFree
(
mask_data
);
unsigned
long
long
remv_cpu
[
col_blocks
];
memset
(
remv_cpu
,
0
,
col_blocks
*
sizeof
(
unsigned
long
long
));
int
num_to_keep
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
int
nblock
=
i
/
THREADS_PER_BLOCK_NMS
;
int
inblock
=
i
%
THREADS_PER_BLOCK_NMS
;
if
(
!
(
remv_cpu
[
nblock
]
&
(
1ULL
<<
inblock
))){
keep_data
[
num_to_keep
++
]
=
i
;
unsigned
long
long
*
p
=
&
mask_cpu
[
0
]
+
i
*
col_blocks
;
for
(
int
j
=
nblock
;
j
<
col_blocks
;
j
++
){
remv_cpu
[
j
]
|=
p
[
j
];
}
}
}
if
(
cudaSuccess
!=
cudaGetLastError
()
)
printf
(
"Error!
\n
"
);
return
num_to_keep
;
}
pcdet/ops/iou3d_nms/src/iou3d_nms.h
0 → 100644
View file @
0f73c62c
#ifndef IOU3D_NMS_H
#define IOU3D_NMS_H
#include <torch/serialize/tensor.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
int
boxes_overlap_bev_gpu
(
at
::
Tensor
boxes_a
,
at
::
Tensor
boxes_b
,
at
::
Tensor
ans_overlap
);
int
boxes_iou_bev_gpu
(
at
::
Tensor
boxes_a
,
at
::
Tensor
boxes_b
,
at
::
Tensor
ans_iou
);
int
nms_gpu
(
at
::
Tensor
boxes
,
at
::
Tensor
keep
,
float
nms_overlap_thresh
);
int
nms_normal_gpu
(
at
::
Tensor
boxes
,
at
::
Tensor
keep
,
float
nms_overlap_thresh
);
#endif
pcdet/ops/iou3d_nms/src/iou3d_nms_api.cpp
0 → 100644
View file @
0f73c62c
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <vector>
#include <cuda.h>
#include <cuda_runtime_api.h>
#include "iou3d_cpu.h"
#include "iou3d_nms.h"
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"boxes_overlap_bev_gpu"
,
&
boxes_overlap_bev_gpu
,
"oriented boxes overlap"
);
m
.
def
(
"boxes_iou_bev_gpu"
,
&
boxes_iou_bev_gpu
,
"oriented boxes iou"
);
m
.
def
(
"nms_gpu"
,
&
nms_gpu
,
"oriented nms gpu"
);
m
.
def
(
"nms_normal_gpu"
,
&
nms_normal_gpu
,
"nms gpu"
);
m
.
def
(
"boxes_iou_bev_cpu"
,
&
boxes_iou_bev_cpu
,
"oriented boxes iou"
);
}
pcdet/ops/iou3d_nms/src/iou3d_nms_kernel.cu
0 → 100644
View file @
0f73c62c
/*
3D IoU Calculation and Rotated NMS(modified from 2D NMS written by others)
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/
#include <stdio.h>
#define THREADS_PER_BLOCK 16
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
const
int
THREADS_PER_BLOCK_NMS
=
sizeof
(
unsigned
long
long
)
*
8
;
const
float
EPS
=
1e-8
;
struct
Point
{
float
x
,
y
;
__device__
Point
()
{}
__device__
Point
(
double
_x
,
double
_y
){
x
=
_x
,
y
=
_y
;
}
__device__
void
set
(
float
_x
,
float
_y
){
x
=
_x
;
y
=
_y
;
}
__device__
Point
operator
+
(
const
Point
&
b
)
const
{
return
Point
(
x
+
b
.
x
,
y
+
b
.
y
);
}
__device__
Point
operator
-
(
const
Point
&
b
)
const
{
return
Point
(
x
-
b
.
x
,
y
-
b
.
y
);
}
};
__device__
inline
float
cross
(
const
Point
&
a
,
const
Point
&
b
){
return
a
.
x
*
b
.
y
-
a
.
y
*
b
.
x
;
}
__device__
inline
float
cross
(
const
Point
&
p1
,
const
Point
&
p2
,
const
Point
&
p0
){
return
(
p1
.
x
-
p0
.
x
)
*
(
p2
.
y
-
p0
.
y
)
-
(
p2
.
x
-
p0
.
x
)
*
(
p1
.
y
-
p0
.
y
);
}
__device__
int
check_rect_cross
(
const
Point
&
p1
,
const
Point
&
p2
,
const
Point
&
q1
,
const
Point
&
q2
){
int
ret
=
min
(
p1
.
x
,
p2
.
x
)
<=
max
(
q1
.
x
,
q2
.
x
)
&&
min
(
q1
.
x
,
q2
.
x
)
<=
max
(
p1
.
x
,
p2
.
x
)
&&
min
(
p1
.
y
,
p2
.
y
)
<=
max
(
q1
.
y
,
q2
.
y
)
&&
min
(
q1
.
y
,
q2
.
y
)
<=
max
(
p1
.
y
,
p2
.
y
);
return
ret
;
}
__device__
inline
int
check_in_box2d
(
const
float
*
box
,
const
Point
&
p
){
//params: (7) [x, y, z, dx, dy, dz, heading]
const
float
MARGIN
=
1e-2
;
float
center_x
=
box
[
0
],
center_y
=
box
[
1
];
float
angle_cos
=
cos
(
-
box
[
6
]),
angle_sin
=
sin
(
-
box
[
6
]);
// rotate the point in the opposite direction of box
float
rot_x
=
(
p
.
x
-
center_x
)
*
angle_cos
+
(
p
.
y
-
center_y
)
*
(
-
angle_sin
);
float
rot_y
=
(
p
.
x
-
center_x
)
*
angle_sin
+
(
p
.
y
-
center_y
)
*
angle_cos
;
return
(
fabs
(
rot_x
)
<
box
[
3
]
/
2
+
MARGIN
&&
fabs
(
rot_y
)
<
box
[
4
]
/
2
+
MARGIN
);
}
__device__
inline
int
intersection
(
const
Point
&
p1
,
const
Point
&
p0
,
const
Point
&
q1
,
const
Point
&
q0
,
Point
&
ans
){
// fast exclusion
if
(
check_rect_cross
(
p0
,
p1
,
q0
,
q1
)
==
0
)
return
0
;
// check cross standing
float
s1
=
cross
(
q0
,
p1
,
p0
);
float
s2
=
cross
(
p1
,
q1
,
p0
);
float
s3
=
cross
(
p0
,
q1
,
q0
);
float
s4
=
cross
(
q1
,
p1
,
q0
);
if
(
!
(
s1
*
s2
>
0
&&
s3
*
s4
>
0
))
return
0
;
// calculate intersection of two lines
float
s5
=
cross
(
q1
,
p1
,
p0
);
if
(
fabs
(
s5
-
s1
)
>
EPS
){
ans
.
x
=
(
s5
*
q0
.
x
-
s1
*
q1
.
x
)
/
(
s5
-
s1
);
ans
.
y
=
(
s5
*
q0
.
y
-
s1
*
q1
.
y
)
/
(
s5
-
s1
);
}
else
{
float
a0
=
p0
.
y
-
p1
.
y
,
b0
=
p1
.
x
-
p0
.
x
,
c0
=
p0
.
x
*
p1
.
y
-
p1
.
x
*
p0
.
y
;
float
a1
=
q0
.
y
-
q1
.
y
,
b1
=
q1
.
x
-
q0
.
x
,
c1
=
q0
.
x
*
q1
.
y
-
q1
.
x
*
q0
.
y
;
float
D
=
a0
*
b1
-
a1
*
b0
;
ans
.
x
=
(
b0
*
c1
-
b1
*
c0
)
/
D
;
ans
.
y
=
(
a1
*
c0
-
a0
*
c1
)
/
D
;
}
return
1
;
}
__device__
inline
void
rotate_around_center
(
const
Point
&
center
,
const
float
angle_cos
,
const
float
angle_sin
,
Point
&
p
){
float
new_x
=
(
p
.
x
-
center
.
x
)
*
angle_cos
+
(
p
.
y
-
center
.
y
)
*
(
-
angle_sin
)
+
center
.
x
;
float
new_y
=
(
p
.
x
-
center
.
x
)
*
angle_sin
+
(
p
.
y
-
center
.
y
)
*
angle_cos
+
center
.
y
;
p
.
set
(
new_x
,
new_y
);
}
__device__
inline
int
point_cmp
(
const
Point
&
a
,
const
Point
&
b
,
const
Point
&
center
){
return
atan2
(
a
.
y
-
center
.
y
,
a
.
x
-
center
.
x
)
>
atan2
(
b
.
y
-
center
.
y
,
b
.
x
-
center
.
x
);
}
__device__
inline
float
box_overlap
(
const
float
*
box_a
,
const
float
*
box_b
){
// params box_a: [x, y, z, dx, dy, dz, heading]
// params box_b: [x, y, z, dx, dy, dz, heading]
float
a_angle
=
box_a
[
6
],
b_angle
=
box_b
[
6
];
float
a_dx_half
=
box_a
[
3
]
/
2
,
b_dx_half
=
box_b
[
3
]
/
2
,
a_dy_half
=
box_a
[
4
]
/
2
,
b_dy_half
=
box_b
[
4
]
/
2
;
float
a_x1
=
box_a
[
0
]
-
a_dx_half
,
a_y1
=
box_a
[
1
]
-
a_dy_half
;
float
a_x2
=
box_a
[
0
]
+
a_dx_half
,
a_y2
=
box_a
[
1
]
+
a_dy_half
;
float
b_x1
=
box_b
[
0
]
-
b_dx_half
,
b_y1
=
box_b
[
1
]
-
b_dy_half
;
float
b_x2
=
box_b
[
0
]
+
b_dx_half
,
b_y2
=
box_b
[
1
]
+
b_dy_half
;
Point
center_a
(
box_a
[
0
],
box_a
[
1
]);
Point
center_b
(
box_b
[
0
],
box_b
[
1
]);
#ifdef DEBUG
printf
(
"a: (%.3f, %.3f, %.3f, %.3f, %.3f), b: (%.3f, %.3f, %.3f, %.3f, %.3f)
\n
"
,
a_x1
,
a_y1
,
a_x2
,
a_y2
,
a_angle
,
b_x1
,
b_y1
,
b_x2
,
b_y2
,
b_angle
);
printf
(
"center a: (%.3f, %.3f), b: (%.3f, %.3f)
\n
"
,
center_a
.
x
,
center_a
.
y
,
center_b
.
x
,
center_b
.
y
);
#endif
Point
box_a_corners
[
5
];
box_a_corners
[
0
].
set
(
a_x1
,
a_y1
);
box_a_corners
[
1
].
set
(
a_x2
,
a_y1
);
box_a_corners
[
2
].
set
(
a_x2
,
a_y2
);
box_a_corners
[
3
].
set
(
a_x1
,
a_y2
);
Point
box_b_corners
[
5
];
box_b_corners
[
0
].
set
(
b_x1
,
b_y1
);
box_b_corners
[
1
].
set
(
b_x2
,
b_y1
);
box_b_corners
[
2
].
set
(
b_x2
,
b_y2
);
box_b_corners
[
3
].
set
(
b_x1
,
b_y2
);
// get oriented corners
float
a_angle_cos
=
cos
(
a_angle
),
a_angle_sin
=
sin
(
a_angle
);
float
b_angle_cos
=
cos
(
b_angle
),
b_angle_sin
=
sin
(
b_angle
);
for
(
int
k
=
0
;
k
<
4
;
k
++
){
#ifdef DEBUG
printf
(
"before corner %d: a(%.3f, %.3f), b(%.3f, %.3f)
\n
"
,
k
,
box_a_corners
[
k
].
x
,
box_a_corners
[
k
].
y
,
box_b_corners
[
k
].
x
,
box_b_corners
[
k
].
y
);
#endif
rotate_around_center
(
center_a
,
a_angle_cos
,
a_angle_sin
,
box_a_corners
[
k
]);
rotate_around_center
(
center_b
,
b_angle_cos
,
b_angle_sin
,
box_b_corners
[
k
]);
#ifdef DEBUG
printf
(
"corner %d: a(%.3f, %.3f), b(%.3f, %.3f)
\n
"
,
k
,
box_a_corners
[
k
].
x
,
box_a_corners
[
k
].
y
,
box_b_corners
[
k
].
x
,
box_b_corners
[
k
].
y
);
#endif
}
box_a_corners
[
4
]
=
box_a_corners
[
0
];
box_b_corners
[
4
]
=
box_b_corners
[
0
];
// get intersection of lines
Point
cross_points
[
16
];
Point
poly_center
;
int
cnt
=
0
,
flag
=
0
;
poly_center
.
set
(
0
,
0
);
for
(
int
i
=
0
;
i
<
4
;
i
++
){
for
(
int
j
=
0
;
j
<
4
;
j
++
){
flag
=
intersection
(
box_a_corners
[
i
+
1
],
box_a_corners
[
i
],
box_b_corners
[
j
+
1
],
box_b_corners
[
j
],
cross_points
[
cnt
]);
if
(
flag
){
poly_center
=
poly_center
+
cross_points
[
cnt
];
cnt
++
;
#ifdef DEBUG
printf
(
"Cross points (%.3f, %.3f): a(%.3f, %.3f)->(%.3f, %.3f), b(%.3f, %.3f)->(%.3f, %.3f)
\n
"
,
cross_points
[
cnt
-
1
].
x
,
cross_points
[
cnt
-
1
].
y
,
box_a_corners
[
i
].
x
,
box_a_corners
[
i
].
y
,
box_a_corners
[
i
+
1
].
x
,
box_a_corners
[
i
+
1
].
y
,
box_b_corners
[
i
].
x
,
box_b_corners
[
i
].
y
,
box_b_corners
[
i
+
1
].
x
,
box_b_corners
[
i
+
1
].
y
);
#endif
}
}
}
// check corners
for
(
int
k
=
0
;
k
<
4
;
k
++
){
if
(
check_in_box2d
(
box_a
,
box_b_corners
[
k
])){
poly_center
=
poly_center
+
box_b_corners
[
k
];
cross_points
[
cnt
]
=
box_b_corners
[
k
];
cnt
++
;
#ifdef DEBUG
printf
(
"b corners in a: corner_b(%.3f, %.3f)"
,
cross_points
[
cnt
-
1
].
x
,
cross_points
[
cnt
-
1
].
y
);
#endif
}
if
(
check_in_box2d
(
box_b
,
box_a_corners
[
k
])){
poly_center
=
poly_center
+
box_a_corners
[
k
];
cross_points
[
cnt
]
=
box_a_corners
[
k
];
cnt
++
;
#ifdef DEBUG
printf
(
"a corners in b: corner_a(%.3f, %.3f)"
,
cross_points
[
cnt
-
1
].
x
,
cross_points
[
cnt
-
1
].
y
);
#endif
}
}
poly_center
.
x
/=
cnt
;
poly_center
.
y
/=
cnt
;
// sort the points of polygon
Point
temp
;
for
(
int
j
=
0
;
j
<
cnt
-
1
;
j
++
){
for
(
int
i
=
0
;
i
<
cnt
-
j
-
1
;
i
++
){
if
(
point_cmp
(
cross_points
[
i
],
cross_points
[
i
+
1
],
poly_center
)){
temp
=
cross_points
[
i
];
cross_points
[
i
]
=
cross_points
[
i
+
1
];
cross_points
[
i
+
1
]
=
temp
;
}
}
}
#ifdef DEBUG
printf
(
"cnt=%d
\n
"
,
cnt
);
for
(
int
i
=
0
;
i
<
cnt
;
i
++
){
printf
(
"All cross point %d: (%.3f, %.3f)
\n
"
,
i
,
cross_points
[
i
].
x
,
cross_points
[
i
].
y
);
}
#endif
// get the overlap areas
float
area
=
0
;
for
(
int
k
=
0
;
k
<
cnt
-
1
;
k
++
){
area
+=
cross
(
cross_points
[
k
]
-
cross_points
[
0
],
cross_points
[
k
+
1
]
-
cross_points
[
0
]);
}
return
fabs
(
area
)
/
2.0
;
}
__device__
inline
float
iou_bev
(
const
float
*
box_a
,
const
float
*
box_b
){
// params box_a: [x, y, z, dx, dy, dz, heading]
// params box_b: [x, y, z, dx, dy, dz, heading]
float
sa
=
box_a
[
3
]
*
box_a
[
4
];
float
sb
=
box_b
[
3
]
*
box_b
[
4
];
float
s_overlap
=
box_overlap
(
box_a
,
box_b
);
return
s_overlap
/
fmaxf
(
sa
+
sb
-
s_overlap
,
EPS
);
}
__global__
void
boxes_overlap_kernel
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_overlap
){
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
const
int
a_idx
=
blockIdx
.
y
*
THREADS_PER_BLOCK
+
threadIdx
.
y
;
const
int
b_idx
=
blockIdx
.
x
*
THREADS_PER_BLOCK
+
threadIdx
.
x
;
if
(
a_idx
>=
num_a
||
b_idx
>=
num_b
){
return
;
}
const
float
*
cur_box_a
=
boxes_a
+
a_idx
*
7
;
const
float
*
cur_box_b
=
boxes_b
+
b_idx
*
7
;
float
s_overlap
=
box_overlap
(
cur_box_a
,
cur_box_b
);
ans_overlap
[
a_idx
*
num_b
+
b_idx
]
=
s_overlap
;
}
__global__
void
boxes_iou_bev_kernel
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_iou
){
// params boxes_a: (N, 7) [x, y, z, dx, dy, dz, heading]
// params boxes_b: (M, 7) [x, y, z, dx, dy, dz, heading]
const
int
a_idx
=
blockIdx
.
y
*
THREADS_PER_BLOCK
+
threadIdx
.
y
;
const
int
b_idx
=
blockIdx
.
x
*
THREADS_PER_BLOCK
+
threadIdx
.
x
;
if
(
a_idx
>=
num_a
||
b_idx
>=
num_b
){
return
;
}
const
float
*
cur_box_a
=
boxes_a
+
a_idx
*
7
;
const
float
*
cur_box_b
=
boxes_b
+
b_idx
*
7
;
float
cur_iou_bev
=
iou_bev
(
cur_box_a
,
cur_box_b
);
ans_iou
[
a_idx
*
num_b
+
b_idx
]
=
cur_iou_bev
;
}
__global__
void
nms_kernel
(
const
int
boxes_num
,
const
float
nms_overlap_thresh
,
const
float
*
boxes
,
unsigned
long
long
*
mask
){
//params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
//params: mask (N, N/THREADS_PER_BLOCK_NMS)
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
// if (row_start > col_start) return;
const
int
row_size
=
fminf
(
boxes_num
-
row_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
const
int
col_size
=
fminf
(
boxes_num
-
col_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
__shared__
float
block_boxes
[
THREADS_PER_BLOCK_NMS
*
7
];
if
(
threadIdx
.
x
<
col_size
)
{
block_boxes
[
threadIdx
.
x
*
7
+
0
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
0
];
block_boxes
[
threadIdx
.
x
*
7
+
1
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
1
];
block_boxes
[
threadIdx
.
x
*
7
+
2
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
2
];
block_boxes
[
threadIdx
.
x
*
7
+
3
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
3
];
block_boxes
[
threadIdx
.
x
*
7
+
4
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
4
];
block_boxes
[
threadIdx
.
x
*
7
+
5
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
5
];
block_boxes
[
threadIdx
.
x
*
7
+
6
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
6
];
}
__syncthreads
();
if
(
threadIdx
.
x
<
row_size
)
{
const
int
cur_box_idx
=
THREADS_PER_BLOCK_NMS
*
row_start
+
threadIdx
.
x
;
const
float
*
cur_box
=
boxes
+
cur_box_idx
*
7
;
int
i
=
0
;
unsigned
long
long
t
=
0
;
int
start
=
0
;
if
(
row_start
==
col_start
)
{
start
=
threadIdx
.
x
+
1
;
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
iou_bev
(
cur_box
,
block_boxes
+
i
*
7
)
>
nms_overlap_thresh
){
t
|=
1ULL
<<
i
;
}
}
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
mask
[
cur_box_idx
*
col_blocks
+
col_start
]
=
t
;
}
}
__device__
inline
float
iou_normal
(
float
const
*
const
a
,
float
const
*
const
b
)
{
//params: a: [x, y, z, dx, dy, dz, heading]
//params: b: [x, y, z, dx, dy, dz, heading]
float
left
=
fmaxf
(
a
[
0
]
-
a
[
3
]
/
2
,
b
[
0
]
-
b
[
3
]
/
2
),
right
=
fminf
(
a
[
0
]
+
a
[
3
]
/
2
,
b
[
0
]
+
b
[
3
]
/
2
);
float
top
=
fmaxf
(
a
[
1
]
-
a
[
4
]
/
2
,
b
[
1
]
-
b
[
4
]
/
2
),
bottom
=
fminf
(
a
[
1
]
+
a
[
4
]
/
2
,
b
[
1
]
+
b
[
4
]
/
2
);
float
width
=
fmaxf
(
right
-
left
,
0.
f
),
height
=
fmaxf
(
bottom
-
top
,
0.
f
);
float
interS
=
width
*
height
;
float
Sa
=
a
[
3
]
*
a
[
4
];
float
Sb
=
b
[
3
]
*
b
[
4
];
return
interS
/
fmaxf
(
Sa
+
Sb
-
interS
,
EPS
);
}
__global__
void
nms_normal_kernel
(
const
int
boxes_num
,
const
float
nms_overlap_thresh
,
const
float
*
boxes
,
unsigned
long
long
*
mask
){
//params: boxes (N, 7) [x, y, z, dx, dy, dz, heading]
//params: mask (N, N/THREADS_PER_BLOCK_NMS)
const
int
row_start
=
blockIdx
.
y
;
const
int
col_start
=
blockIdx
.
x
;
// if (row_start > col_start) return;
const
int
row_size
=
fminf
(
boxes_num
-
row_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
const
int
col_size
=
fminf
(
boxes_num
-
col_start
*
THREADS_PER_BLOCK_NMS
,
THREADS_PER_BLOCK_NMS
);
__shared__
float
block_boxes
[
THREADS_PER_BLOCK_NMS
*
7
];
if
(
threadIdx
.
x
<
col_size
)
{
block_boxes
[
threadIdx
.
x
*
7
+
0
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
0
];
block_boxes
[
threadIdx
.
x
*
7
+
1
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
1
];
block_boxes
[
threadIdx
.
x
*
7
+
2
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
2
];
block_boxes
[
threadIdx
.
x
*
7
+
3
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
3
];
block_boxes
[
threadIdx
.
x
*
7
+
4
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
4
];
block_boxes
[
threadIdx
.
x
*
7
+
5
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
5
];
block_boxes
[
threadIdx
.
x
*
7
+
6
]
=
boxes
[(
THREADS_PER_BLOCK_NMS
*
col_start
+
threadIdx
.
x
)
*
7
+
6
];
}
__syncthreads
();
if
(
threadIdx
.
x
<
row_size
)
{
const
int
cur_box_idx
=
THREADS_PER_BLOCK_NMS
*
row_start
+
threadIdx
.
x
;
const
float
*
cur_box
=
boxes
+
cur_box_idx
*
7
;
int
i
=
0
;
unsigned
long
long
t
=
0
;
int
start
=
0
;
if
(
row_start
==
col_start
)
{
start
=
threadIdx
.
x
+
1
;
}
for
(
i
=
start
;
i
<
col_size
;
i
++
)
{
if
(
iou_normal
(
cur_box
,
block_boxes
+
i
*
7
)
>
nms_overlap_thresh
){
t
|=
1ULL
<<
i
;
}
}
const
int
col_blocks
=
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
);
mask
[
cur_box_idx
*
col_blocks
+
col_start
]
=
t
;
}
}
void
boxesoverlapLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_overlap
){
dim3
blocks
(
DIVUP
(
num_b
,
THREADS_PER_BLOCK
),
DIVUP
(
num_a
,
THREADS_PER_BLOCK
));
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
,
THREADS_PER_BLOCK
);
boxes_overlap_kernel
<<<
blocks
,
threads
>>>
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_overlap
);
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
}
void
boxesioubevLauncher
(
const
int
num_a
,
const
float
*
boxes_a
,
const
int
num_b
,
const
float
*
boxes_b
,
float
*
ans_iou
){
dim3
blocks
(
DIVUP
(
num_b
,
THREADS_PER_BLOCK
),
DIVUP
(
num_a
,
THREADS_PER_BLOCK
));
// blockIdx.x(col), blockIdx.y(row)
dim3
threads
(
THREADS_PER_BLOCK
,
THREADS_PER_BLOCK
);
boxes_iou_bev_kernel
<<<
blocks
,
threads
>>>
(
num_a
,
boxes_a
,
num_b
,
boxes_b
,
ans_iou
);
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
}
void
nmsLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
){
dim3
blocks
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
),
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
));
dim3
threads
(
THREADS_PER_BLOCK_NMS
);
nms_kernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_overlap_thresh
,
boxes
,
mask
);
}
void
nmsNormalLauncher
(
const
float
*
boxes
,
unsigned
long
long
*
mask
,
int
boxes_num
,
float
nms_overlap_thresh
){
dim3
blocks
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
),
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK_NMS
));
dim3
threads
(
THREADS_PER_BLOCK_NMS
);
nms_normal_kernel
<<<
blocks
,
threads
>>>
(
boxes_num
,
nms_overlap_thresh
,
boxes
,
mask
);
}
pcdet/ops/roiaware_pool3d/roiaware_pool3d_utils.py
0 → 100644
View file @
0f73c62c
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Function
from
...utils
import
common_utils
from
.
import
roiaware_pool3d_cuda
def
points_in_boxes_cpu
(
points
,
boxes
):
"""
Args:
points: (num_points, 3)
boxes: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center, each box DO NOT overlaps
Returns:
point_indices: (N, num_points)
"""
assert
boxes
.
shape
[
1
]
==
7
assert
points
.
shape
[
1
]
==
3
points
,
is_numpy
=
common_utils
.
check_numpy_to_torch
(
points
)
boxes
,
is_numpy
=
common_utils
.
check_numpy_to_torch
(
boxes
)
point_indices
=
points
.
new_zeros
((
boxes
.
shape
[
0
],
points
.
shape
[
0
]),
dtype
=
torch
.
int
)
roiaware_pool3d_cuda
.
points_in_boxes_cpu
(
boxes
.
float
().
contiguous
(),
points
.
float
().
contiguous
(),
point_indices
)
return
point_indices
.
numpy
()
if
is_numpy
else
point_indices
def
points_in_boxes_gpu
(
points
,
boxes
):
"""
:param points: (B, M, 3)
:param boxes: (B, T, 7), num_valid_boxes <= T
:return box_idxs_of_pts: (B, M), default background = -1
"""
assert
boxes
.
shape
[
0
]
==
points
.
shape
[
0
]
assert
boxes
.
shape
[
2
]
==
7
and
points
.
shape
[
2
]
==
3
batch_size
,
num_points
,
_
=
points
.
shape
box_idxs_of_pts
=
points
.
new_zeros
((
batch_size
,
num_points
),
dtype
=
torch
.
int
).
fill_
(
-
1
)
roiaware_pool3d_cuda
.
points_in_boxes_gpu
(
boxes
.
contiguous
(),
points
.
contiguous
(),
box_idxs_of_pts
)
return
box_idxs_of_pts
class
RoIAwarePool3d
(
nn
.
Module
):
def
__init__
(
self
,
out_size
,
max_pts_each_voxel
=
128
):
super
().
__init__
()
self
.
out_size
=
out_size
self
.
max_pts_each_voxel
=
max_pts_each_voxel
def
forward
(
self
,
rois
,
pts
,
pts_feature
,
pool_method
=
'max'
):
assert
pool_method
in
[
'max'
,
'avg'
]
return
RoIAwarePool3dFunction
.
apply
(
rois
,
pts
,
pts_feature
,
self
.
out_size
,
self
.
max_pts_each_voxel
,
pool_method
)
class
RoIAwarePool3dFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
rois
,
pts
,
pts_feature
,
out_size
,
max_pts_each_voxel
,
pool_method
):
"""
Args:
ctx:
rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
pts: (npoints, 3)
pts_feature: (npoints, C)
out_size: int or tuple, like 7 or (7, 7, 7)
max_pts_each_voxel:
pool_method: 'max' or 'avg'
Returns:
pooled_features: (N, out_x, out_y, out_z, C)
"""
assert
rois
.
shape
[
1
]
==
7
and
pts
.
shape
[
1
]
==
3
if
isinstance
(
out_size
,
int
):
out_x
=
out_y
=
out_z
=
out_size
else
:
assert
len
(
out_size
)
==
3
for
k
in
range
(
3
):
assert
isinstance
(
out_size
[
k
],
int
)
out_x
,
out_y
,
out_z
=
out_size
num_rois
=
rois
.
shape
[
0
]
num_channels
=
pts_feature
.
shape
[
-
1
]
num_pts
=
pts
.
shape
[
0
]
pooled_features
=
pts_feature
.
new_zeros
((
num_rois
,
out_x
,
out_y
,
out_z
,
num_channels
))
argmax
=
pts_feature
.
new_zeros
((
num_rois
,
out_x
,
out_y
,
out_z
,
num_channels
),
dtype
=
torch
.
int
)
pts_idx_of_voxels
=
pts_feature
.
new_zeros
((
num_rois
,
out_x
,
out_y
,
out_z
,
max_pts_each_voxel
),
dtype
=
torch
.
int
)
pool_method_map
=
{
'max'
:
0
,
'avg'
:
1
}
pool_method
=
pool_method_map
[
pool_method
]
roiaware_pool3d_cuda
.
forward
(
rois
,
pts
,
pts_feature
,
argmax
,
pts_idx_of_voxels
,
pooled_features
,
pool_method
)
ctx
.
roiaware_pool3d_for_backward
=
(
pts_idx_of_voxels
,
argmax
,
pool_method
,
num_pts
,
num_channels
)
return
pooled_features
@
staticmethod
def
backward
(
ctx
,
grad_out
):
"""
:param grad_out: (N, out_x, out_y, out_z, C)
:return:
grad_in: (npoints, C)
"""
pts_idx_of_voxels
,
argmax
,
pool_method
,
num_pts
,
num_channels
=
ctx
.
roiaware_pool3d_for_backward
grad_in
=
grad_out
.
new_zeros
((
num_pts
,
num_channels
))
roiaware_pool3d_cuda
.
backward
(
pts_idx_of_voxels
,
argmax
,
grad_out
.
contiguous
(),
grad_in
,
pool_method
)
return
None
,
None
,
grad_in
,
None
,
None
,
None
if
__name__
==
'__main__'
:
pass
pcdet/ops/roiaware_pool3d/src/roiaware_pool3d.cpp
0 → 100644
View file @
0f73c62c
/*
RoI-aware point cloud feature pooling
Reference paper: https://arxiv.org/abs/1907.03670
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/
#include <torch/serialize/tensor.h>
#include <torch/extension.h>
#include <assert.h>
#define CHECK_CUDA(x) AT_CHECK(x.type().is_cuda(), #x, " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) AT_CHECK(x.is_contiguous(), #x, " must be contiguous ")
#define CHECK_INPUT(x) CHECK_CUDA(x);CHECK_CONTIGUOUS(x)
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
);
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
);
void
points_in_boxes_launcher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
);
int
roiaware_pool3d_gpu
(
at
::
Tensor
rois
,
at
::
Tensor
pts
,
at
::
Tensor
pts_feature
,
at
::
Tensor
argmax
,
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
pooled_features
,
int
pool_method
){
// params rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
// params pts: (npoints, 3) [x, y, z]
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
rois
);
CHECK_INPUT
(
pts
);
CHECK_INPUT
(
pts_feature
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
pooled_features
);
int
boxes_num
=
rois
.
size
(
0
);
int
pts_num
=
pts
.
size
(
0
);
int
channels
=
pts_feature
.
size
(
1
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
assert
((
out_x
<
256
)
&&
(
out_y
<
256
)
&&
(
out_z
<
256
));
// we encode index with 8bit
const
float
*
rois_data
=
rois
.
data
<
float
>
();
const
float
*
pts_data
=
pts
.
data
<
float
>
();
const
float
*
pts_feature_data
=
pts_feature
.
data
<
float
>
();
int
*
argmax_data
=
argmax
.
data
<
int
>
();
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data
<
int
>
();
float
*
pooled_features_data
=
pooled_features
.
data
<
float
>
();
roiaware_pool3d_launcher
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
rois_data
,
pts_data
,
pts_feature_data
,
argmax_data
,
pts_idx_of_voxels_data
,
pooled_features_data
,
pool_method
);
return
1
;
}
int
roiaware_pool3d_gpu_backward
(
at
::
Tensor
pts_idx_of_voxels
,
at
::
Tensor
argmax
,
at
::
Tensor
grad_out
,
at
::
Tensor
grad_in
,
int
pool_method
){
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool 1: avg_pool
CHECK_INPUT
(
pts_idx_of_voxels
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
grad_out
);
CHECK_INPUT
(
grad_in
);
int
boxes_num
=
pts_idx_of_voxels
.
size
(
0
);
int
out_x
=
pts_idx_of_voxels
.
size
(
1
);
int
out_y
=
pts_idx_of_voxels
.
size
(
2
);
int
out_z
=
pts_idx_of_voxels
.
size
(
3
);
int
max_pts_each_voxel
=
pts_idx_of_voxels
.
size
(
4
);
// index 0 is the counter
int
channels
=
grad_out
.
size
(
4
);
const
int
*
pts_idx_of_voxels_data
=
pts_idx_of_voxels
.
data
<
int
>
();
const
int
*
argmax_data
=
argmax
.
data
<
int
>
();
const
float
*
grad_out_data
=
grad_out
.
data
<
float
>
();
float
*
grad_in_data
=
grad_in
.
data
<
float
>
();
roiaware_pool3d_backward_launcher
(
boxes_num
,
out_x
,
out_y
,
out_z
,
channels
,
max_pts_each_voxel
,
pts_idx_of_voxels_data
,
argmax_data
,
grad_out_data
,
grad_in_data
,
pool_method
);
return
1
;
}
int
points_in_boxes_gpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
box_idx_of_points_tensor
){
// params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
// params pts: (B, npoints, 3) [x, y, z]
// params boxes_idx_of_points: (B, npoints), default -1
CHECK_INPUT
(
boxes_tensor
);
CHECK_INPUT
(
pts_tensor
);
CHECK_INPUT
(
box_idx_of_points_tensor
);
int
batch_size
=
boxes_tensor
.
size
(
0
);
int
boxes_num
=
boxes_tensor
.
size
(
1
);
int
pts_num
=
pts_tensor
.
size
(
1
);
const
float
*
boxes
=
boxes_tensor
.
data
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
<
float
>
();
int
*
box_idx_of_points
=
box_idx_of_points_tensor
.
data
<
int
>
();
points_in_boxes_launcher
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
return
1
;
}
inline
void
lidar_to_local_coords_cpu
(
float
shift_x
,
float
shift_y
,
float
rot_angle
,
float
&
local_x
,
float
&
local_y
){
float
cosa
=
cos
(
-
rot_angle
),
sina
=
sin
(
-
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
inline
int
check_pt_in_box3d_cpu
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
// param pt: (x, y, z)
// param box3d: [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center
const
float
MARGIN
=
1e-2
;
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
dx
=
box3d
[
3
],
dy
=
box3d
[
4
],
dz
=
box3d
[
5
],
rz
=
box3d
[
6
];
if
(
fabsf
(
z
-
cz
)
>
dz
/
2.0
)
return
0
;
lidar_to_local_coords_cpu
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
fabs
(
local_x
)
<
dx
/
2.0
+
MARGIN
)
&
(
fabs
(
local_y
)
<
dy
/
2.0
+
MARGIN
);
return
in_flag
;
}
int
points_in_boxes_cpu
(
at
::
Tensor
boxes_tensor
,
at
::
Tensor
pts_tensor
,
at
::
Tensor
pts_indices_tensor
){
// params boxes: (N, 7) [x, y, z, dx, dy, dz, heading], (x, y, z) is the box center, each box DO NOT overlaps
// params pts: (num_points, 3) [x, y, z]
// params pts_indices: (N, num_points)
CHECK_CONTIGUOUS
(
boxes_tensor
);
CHECK_CONTIGUOUS
(
pts_tensor
);
CHECK_CONTIGUOUS
(
pts_indices_tensor
);
int
boxes_num
=
boxes_tensor
.
size
(
0
);
int
pts_num
=
pts_tensor
.
size
(
0
);
const
float
*
boxes
=
boxes_tensor
.
data
<
float
>
();
const
float
*
pts
=
pts_tensor
.
data
<
float
>
();
int
*
pts_indices
=
pts_indices_tensor
.
data
<
int
>
();
float
local_x
=
0
,
local_y
=
0
;
for
(
int
i
=
0
;
i
<
boxes_num
;
i
++
){
for
(
int
j
=
0
;
j
<
pts_num
;
j
++
){
int
cur_in_flag
=
check_pt_in_box3d_cpu
(
pts
+
j
*
3
,
boxes
+
i
*
7
,
local_x
,
local_y
);
pts_indices
[
i
*
pts_num
+
j
]
=
cur_in_flag
;
}
}
return
1
;
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"forward"
,
&
roiaware_pool3d_gpu
,
"roiaware pool3d forward (CUDA)"
);
m
.
def
(
"backward"
,
&
roiaware_pool3d_gpu_backward
,
"roiaware pool3d backward (CUDA)"
);
m
.
def
(
"points_in_boxes_gpu"
,
&
points_in_boxes_gpu
,
"points_in_boxes_gpu forward (CUDA)"
);
m
.
def
(
"points_in_boxes_cpu"
,
&
points_in_boxes_cpu
,
"points_in_boxes_cpu forward (CUDA)"
);
}
pcdet/ops/roiaware_pool3d/src/roiaware_pool3d_kernel.cu
0 → 100644
View file @
0f73c62c
/*
RoI-aware point cloud feature pooling
Written by Shaoshuai Shi
All Rights Reserved 2019-2020.
*/
#include <math.h>
#include <stdio.h>
#define THREADS_PER_BLOCK 256
#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
// #define DEBUG
__device__
inline
void
lidar_to_local_coords
(
float
shift_x
,
float
shift_y
,
float
rot_angle
,
float
&
local_x
,
float
&
local_y
){
float
cosa
=
cos
(
-
rot_angle
),
sina
=
sin
(
-
rot_angle
);
local_x
=
shift_x
*
cosa
+
shift_y
*
(
-
sina
);
local_y
=
shift_x
*
sina
+
shift_y
*
cosa
;
}
__device__
inline
int
check_pt_in_box3d
(
const
float
*
pt
,
const
float
*
box3d
,
float
&
local_x
,
float
&
local_y
){
// param pt: (x, y, z)
// param box3d: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
const
float
MARGIN
=
1e-5
;
float
x
=
pt
[
0
],
y
=
pt
[
1
],
z
=
pt
[
2
];
float
cx
=
box3d
[
0
],
cy
=
box3d
[
1
],
cz
=
box3d
[
2
];
float
dx
=
box3d
[
3
],
dy
=
box3d
[
4
],
dz
=
box3d
[
5
],
rz
=
box3d
[
6
];
if
(
fabsf
(
z
-
cz
)
>
dz
/
2.0
)
return
0
;
lidar_to_local_coords
(
x
-
cx
,
y
-
cy
,
rz
,
local_x
,
local_y
);
float
in_flag
=
(
fabs
(
local_x
)
<
dx
/
2.0
+
MARGIN
)
&
(
fabs
(
local_y
)
<
dy
/
2.0
+
MARGIN
);
return
in_flag
;
}
__global__
void
generate_pts_mask_for_box3d
(
int
boxes_num
,
int
pts_num
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
int
*
pts_mask
){
// params rois: [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
// params pts: (npoints, 3) [x, y, z]
// params pts_mask: (N, npoints): -1 means point doesnot in this box, otherwise: encode (x_idxs, y_idxs, z_idxs) by binary bit
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
box_idx
=
blockIdx
.
y
;
if
(
pt_idx
>=
pts_num
||
box_idx
>=
boxes_num
)
return
;
pts
+=
pt_idx
*
3
;
rois
+=
box_idx
*
7
;
pts_mask
+=
box_idx
*
pts_num
+
pt_idx
;
float
local_x
=
0
,
local_y
=
0
;
int
cur_in_flag
=
check_pt_in_box3d
(
pts
,
rois
,
local_x
,
local_y
);
pts_mask
[
0
]
=
-
1
;
if
(
cur_in_flag
>
0
){
float
local_z
=
pts
[
2
]
-
rois
[
2
];
float
dx
=
rois
[
3
],
dy
=
rois
[
4
],
dz
=
rois
[
5
];
float
x_res
=
dx
/
out_x
;
float
y_res
=
dy
/
out_y
;
float
z_res
=
dz
/
out_z
;
unsigned
int
x_idx
=
int
((
local_x
+
dx
/
2
)
/
x_res
);
unsigned
int
y_idx
=
int
((
local_y
+
dy
/
2
)
/
y_res
);
unsigned
int
z_idx
=
int
((
local_z
+
dz
/
2
)
/
z_res
);
x_idx
=
min
(
max
(
x_idx
,
0
),
out_x
-
1
);
y_idx
=
min
(
max
(
y_idx
,
0
),
out_y
-
1
);
z_idx
=
min
(
max
(
z_idx
,
0
),
out_z
-
1
);
unsigned
int
idx_encoding
=
(
x_idx
<<
16
)
+
(
y_idx
<<
8
)
+
z_idx
;
pts_mask
[
0
]
=
idx_encoding
;
}
}
__global__
void
collect_inside_pts_for_box3d
(
int
boxes_num
,
int
pts_num
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
int
*
pts_mask
,
int
*
pts_idx_of_voxels
){
// params pts_mask: (N, npoints) 0 or 1
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
int
box_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
box_idx
>=
boxes_num
)
return
;
int
max_num_pts
=
max_pts_each_voxel
-
1
;
// index 0 is the counter
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
;
for
(
int
k
=
0
;
k
<
pts_num
;
k
++
){
if
(
pts_mask
[
box_idx
*
pts_num
+
k
]
!=
-
1
){
unsigned
int
idx_encoding
=
pts_mask
[
box_idx
*
pts_num
+
k
];
unsigned
int
x_idx
=
(
idx_encoding
>>
16
)
&
0xFF
;
unsigned
int
y_idx
=
(
idx_encoding
>>
8
)
&
0xFF
;
unsigned
int
z_idx
=
idx_encoding
&
0xFF
;
unsigned
int
base_offset
=
x_idx
*
out_y
*
out_z
*
max_pts_each_voxel
+
y_idx
*
out_z
*
max_pts_each_voxel
+
z_idx
*
max_pts_each_voxel
;
unsigned
int
cnt
=
pts_idx_of_voxels
[
base_offset
];
if
(
cnt
<
max_num_pts
){
pts_idx_of_voxels
[
base_offset
+
cnt
+
1
]
=
k
;
pts_idx_of_voxels
[
base_offset
]
++
;
}
#ifdef DEBUG
printf
(
"collect: pts_%d, idx(%d, %d, %d), idx_encoding=%x
\n
"
,
k
,
x_idx
,
y_idx
,
z_idx
,
idx_encoding
);
#endif
}
}
}
__global__
void
roiaware_maxpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
pts_feature
,
const
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
*
argmax
){
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
// params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
#ifdef DEBUG
printf
(
"src pts_idx_of_voxels: (%p, ), argmax: %p
\n
"
,
pts_idx_of_voxels
,
argmax
);
#endif
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
int
argmax_idx
=
-
1
;
float
max_val
=
-
1e50
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
if
(
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
]
>
max_val
){
max_val
=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
argmax_idx
=
pts_idx_of_voxels
[
k
];
}
}
if
(
argmax_idx
!=
-
1
){
pooled_features
[
0
]
=
max_val
;
}
argmax
[
0
]
=
argmax_idx
;
#ifdef DEBUG
printf
(
"channel_%d idx(%d, %d, %d), argmax_idx=(%d, %.3f), total=%d, after pts_idx: %p, argmax: (%p, %d)
\n
"
,
channel_idx
,
x_idx
,
y_idx
,
z_idx
,
argmax_idx
,
max_val
,
total_pts
,
pts_idx_of_voxels
,
argmax
,
argmax_idx
);
#endif
}
__global__
void
roiaware_avgpool3d
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
pts_feature
,
const
int
*
pts_idx_of_voxels
,
float
*
pooled_features
){
// params pts_feature: (npoints, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel), index 0 is the counter
// params pooled_features: (N, out_x, out_y, out_z, C)
// params argmax: (N, out_x, out_y, out_z, C)
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
pooled_features
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
float
sum_val
=
0
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
sum_val
+=
pts_feature
[
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
];
}
if
(
total_pts
>
0
){
pooled_features
[
0
]
=
sum_val
/
total_pts
;
}
}
void
roiaware_pool3d_launcher
(
int
boxes_num
,
int
pts_num
,
int
channels
,
int
max_pts_each_voxel
,
int
out_x
,
int
out_y
,
int
out_z
,
const
float
*
rois
,
const
float
*
pts
,
const
float
*
pts_feature
,
int
*
argmax
,
int
*
pts_idx_of_voxels
,
float
*
pooled_features
,
int
pool_method
){
// params rois: (N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
// params pts: (npoints, 3) [x, y, z]
// params pts_feature: (npoints, C)
// params argmax: (N, out_x, out_y, out_z, C)
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
int
*
pts_mask
=
NULL
;
cudaMalloc
(
&
pts_mask
,
boxes_num
*
pts_num
*
sizeof
(
int
));
// (N, M)
cudaMemset
(
pts_mask
,
-
1
,
boxes_num
*
pts_num
*
sizeof
(
int
));
dim3
blocks_mask
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
boxes_num
);
dim3
threads
(
THREADS_PER_BLOCK
);
generate_pts_mask_for_box3d
<<<
blocks_mask
,
threads
>>>
(
boxes_num
,
pts_num
,
out_x
,
out_y
,
out_z
,
rois
,
pts
,
pts_mask
);
// TODO: Merge the collect and pool functions, SS
dim3
blocks_collect
(
DIVUP
(
boxes_num
,
THREADS_PER_BLOCK
));
collect_inside_pts_for_box3d
<<<
blocks_collect
,
threads
>>>
(
boxes_num
,
pts_num
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_mask
,
pts_idx_of_voxels
);
dim3
blocks_pool
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
boxes_num
);
if
(
pool_method
==
0
){
roiaware_maxpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_feature
,
pts_idx_of_voxels
,
pooled_features
,
argmax
);
}
else
if
(
pool_method
==
1
){
roiaware_avgpool3d
<<<
blocks_pool
,
threads
>>>
(
boxes_num
,
pts_num
,
channels
,
max_pts_each_voxel
,
out_x
,
out_y
,
out_z
,
pts_feature
,
pts_idx_of_voxels
,
pooled_features
);
}
cudaFree
(
pts_mask
);
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
}
__global__
void
roiaware_maxpool3d_backward
(
int
boxes_num
,
int
channels
,
int
out_x
,
int
out_y
,
int
out_z
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
){
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
argmax
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
if
(
argmax
[
0
]
==
-
1
)
return
;
atomicAdd
(
grad_in
+
argmax
[
0
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
1
);
}
__global__
void
roiaware_avgpool3d_backward
(
int
boxes_num
,
int
channels
,
int
out_x
,
int
out_y
,
int
out_z
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
float
*
grad_out
,
float
*
grad_in
){
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
int
box_idx
=
blockIdx
.
z
;
int
channel_idx
=
blockIdx
.
y
;
int
voxel_idx_flat
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
x_idx
=
voxel_idx_flat
/
(
out_y
*
out_z
);
int
y_idx
=
(
voxel_idx_flat
-
x_idx
*
(
out_y
*
out_z
))
/
out_z
;
int
z_idx
=
voxel_idx_flat
%
out_z
;
if
(
box_idx
>=
boxes_num
||
channel_idx
>=
channels
||
x_idx
>=
out_x
||
y_idx
>=
out_y
||
z_idx
>=
out_z
)
return
;
int
offset_base
=
x_idx
*
out_y
*
out_z
+
y_idx
*
out_z
+
z_idx
;
pts_idx_of_voxels
+=
box_idx
*
out_x
*
out_y
*
out_z
*
max_pts_each_voxel
+
offset_base
*
max_pts_each_voxel
;
grad_out
+=
box_idx
*
out_x
*
out_y
*
out_z
*
channels
+
offset_base
*
channels
+
channel_idx
;
int
total_pts
=
pts_idx_of_voxels
[
0
];
float
cur_grad
=
1
/
fmaxf
(
float
(
total_pts
),
1.0
);
for
(
int
k
=
1
;
k
<=
total_pts
;
k
++
){
atomicAdd
(
grad_in
+
pts_idx_of_voxels
[
k
]
*
channels
+
channel_idx
,
grad_out
[
0
]
*
cur_grad
);
}
}
void
roiaware_pool3d_backward_launcher
(
int
boxes_num
,
int
out_x
,
int
out_y
,
int
out_z
,
int
channels
,
int
max_pts_each_voxel
,
const
int
*
pts_idx_of_voxels
,
const
int
*
argmax
,
const
float
*
grad_out
,
float
*
grad_in
,
int
pool_method
){
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params argmax: (N, out_x, out_y, out_z, C)
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool, 1: avg_pool
dim3
blocks
(
DIVUP
(
out_x
*
out_y
*
out_z
,
THREADS_PER_BLOCK
),
channels
,
boxes_num
);
dim3
threads
(
THREADS_PER_BLOCK
);
if
(
pool_method
==
0
){
roiaware_maxpool3d_backward
<<<
blocks
,
threads
>>>
(
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
argmax
,
grad_out
,
grad_in
);
}
else
if
(
pool_method
==
1
){
roiaware_avgpool3d_backward
<<<
blocks
,
threads
>>>
(
boxes_num
,
channels
,
out_x
,
out_y
,
out_z
,
max_pts_each_voxel
,
pts_idx_of_voxels
,
grad_out
,
grad_in
);
}
}
__global__
void
points_in_boxes_kernel
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
){
// params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
// params pts: (B, npoints, 3) [x, y, z] in LiDAR coordinate
// params boxes_idx_of_points: (B, npoints), default -1
int
bs_idx
=
blockIdx
.
y
;
int
pt_idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
bs_idx
>=
batch_size
||
pt_idx
>=
pts_num
)
return
;
boxes
+=
bs_idx
*
boxes_num
*
7
;
pts
+=
bs_idx
*
pts_num
*
3
+
pt_idx
*
3
;
box_idx_of_points
+=
bs_idx
*
pts_num
+
pt_idx
;
float
local_x
=
0
,
local_y
=
0
;
int
cur_in_flag
=
0
;
for
(
int
k
=
0
;
k
<
boxes_num
;
k
++
){
cur_in_flag
=
check_pt_in_box3d
(
pts
,
boxes
+
k
*
7
,
local_x
,
local_y
);
if
(
cur_in_flag
){
box_idx_of_points
[
0
]
=
k
;
break
;
}
}
}
void
points_in_boxes_launcher
(
int
batch_size
,
int
boxes_num
,
int
pts_num
,
const
float
*
boxes
,
const
float
*
pts
,
int
*
box_idx_of_points
){
// params boxes: (B, N, 7) [x, y, z, dx, dy, dz, heading] (x, y, z) is the box center
// params pts: (B, npoints, 3) [x, y, z]
// params boxes_idx_of_points: (B, npoints), default -1
cudaError_t
err
;
dim3
blocks
(
DIVUP
(
pts_num
,
THREADS_PER_BLOCK
),
batch_size
);
dim3
threads
(
THREADS_PER_BLOCK
);
points_in_boxes_kernel
<<<
blocks
,
threads
>>>
(
batch_size
,
boxes_num
,
pts_num
,
boxes
,
pts
,
box_idx_of_points
);
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"CUDA kernel failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
#ifdef DEBUG
cudaDeviceSynchronize
();
// for using printf in kernel function
#endif
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment