Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
TS-MODELS-OPT
training
Autonomous-Driving-models
Commits
d2b71343
Commit
d2b71343
authored
Apr 08, 2026
by
雍大凯
Browse files
add code
parent
69e57885
Changes
259
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1212 additions
and
0 deletions
+1212
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_hip.h
...ojects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_hip.h
+34
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
.../projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
+17
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling_hip.cpp
...jects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling_hip.cpp
+19
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
...projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
+80
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
...c/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
+32
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
...ects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
+101
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.hip
...cts/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.hip
+104
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.cpp
...ects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.cpp
+82
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.h
...ojects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.h
+34
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__init__.py
...shocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__init__.py
+2
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__pycache__/__init__.cpython-310.pyc
...ugin/ops/bev_pool_v2/__pycache__/__init__.cpython-310.pyc
+0
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__pycache__/bev_pool.cpython-310.pyc
...ugin/ops/bev_pool_v2/__pycache__/bev_pool.cpython-310.pyc
+0
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
...shocc/projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
+194
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool_v2_ext.cpython-310-x86_64-linux-gnu.so
...v_pool_v2/bev_pool_v2_ext.cpython-310-x86_64-linux-gnu.so
+0
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
.../projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
+111
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
...jects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
+142
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.hip
...ects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.hip
+145
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_hip.cpp
...jects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_hip.cpp
+113
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/__init__.py
...cc/projects/mmdet3d_plugin/ops/nearest_assign/__init__.py
+2
-0
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/__pycache__/__init__.cpython-310.pyc
...n/ops/nearest_assign/__pycache__/__init__.cpython-310.pyc
+0
-0
No files found.
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_max_pool_hip.h
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#ifndef _BEV_MAX_POOL_H
#define _BEV_MAX_POOL_H
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
at
::
Tensor
bev_max_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
at
::
Tensor
bev_max_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
// CUDA function declarations
void
bev_max_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
x
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_max_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
);
#endif
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling.cpp
0 → 100644
View file @
d2b71343
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_sum_pool.h"
#include "bev_max_pool.h"
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"bev_sum_pool_forward"
,
&
bev_sum_pool_forward
,
"bev_sum_pool_forward"
);
m
.
def
(
"bev_sum_pool_backward"
,
&
bev_sum_pool_backward
,
"bev_sum_pool_backward"
);
m
.
def
(
"bev_max_pool_forward"
,
&
bev_max_pool_forward
,
"bev_max_pool_forward"
);
m
.
def
(
"bev_max_pool_backward"
,
&
bev_max_pool_backward
,
"bev_max_pool_backward"
);
}
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_pooling_hip.cpp
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
#include "bev_sum_pool_hip.h"
#include "bev_max_pool_hip.h"
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"bev_sum_pool_forward"
,
&
bev_sum_pool_forward
,
"bev_sum_pool_forward"
);
m
.
def
(
"bev_sum_pool_backward"
,
&
bev_sum_pool_backward
,
"bev_sum_pool_backward"
);
m
.
def
(
"bev_max_pool_forward"
,
&
bev_max_pool_forward
,
"bev_max_pool_forward"
);
m
.
def
(
"bev_max_pool_backward"
,
&
bev_max_pool_backward
,
"bev_max_pool_backward"
);
}
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.cpp
0 → 100644
View file @
d2b71343
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_sum_pool.h"
/*
Function: pillar pooling (forward, cuda)
Args:
geom_feats : input features, FloatTensor[N, C]
_geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
out : output features, FloatTensor[b, d, h, w, c]
*/
at
::
Tensor
bev_sum_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_feats
.
size
(
0
);
int
c
=
_geom_feats
.
size
(
1
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_geom_feats
));
const
float
*
geom_feats
=
_geom_feats
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_geom_feats
.
dtype
()).
device
(
_geom_feats
.
device
());
at
::
Tensor
_out
=
torch
::
zeros
({
b
,
d
,
h
,
w
,
c
},
options
);
// (B, D=Dz, H=Dy, W=Dx, C)
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_sum_pool
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
return
_out
;
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : input features, FloatTensor[B, D, H, W, C]
geom_coords : input coordinates, IntTensor[N, 4]
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
x_grad : output features, FloatTensor[N, C]
*/
at
::
Tensor
bev_sum_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_coords
.
size
(
0
);
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_out_grad
.
dtype
()).
device
(
_out_grad
.
device
());
at
::
Tensor
_x_grad
=
torch
::
zeros
({
n
,
c
},
options
);
// (N, C)
float
*
x_grad
=
_x_grad
.
data_ptr
<
float
>
();
bev_sum_pool_grad
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
return
_x_grad
;
}
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool.h
0 → 100644
View file @
d2b71343
#ifndef _BEV_SUM_POOL_H
#define _BEV_SUM_POOL_H
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
at
::
Tensor
bev_sum_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
at
::
Tensor
bev_sum_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
// CUDA function declarations
void
bev_sum_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
x
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_sum_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
);
#endif
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.cu
0 → 100644
View file @
d2b71343
#include <stdio.h>
#include <stdlib.h>
#include "bev_sum_pool.h"
/*
Function: pillar pooling
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
geom_feats : input features, FloatTensor[n, c]
geom_coords : input coordinates, IntTensor[n, 4] 4: (x_id, y_id, z_id, batch_id)
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__
void
bev_sum_pool_kernel
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
__restrict__
geom_feats
,
const
int
*
__restrict__
geom_coords
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
idx
/
c
;
int
cur_c
=
idx
%
c
;
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
int
interval_length
=
interval_lengths
[
index
];
const
int
*
cur_geom_coords
=
geom_coords
+
interval_start
*
4
;
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
const
float
*
cur_geom_feats
=
geom_feats
+
interval_start
*
c
+
cur_c
;
float
*
cur_out
=
out
+
cur_geom_coords
[
3
]
*
d
*
h
*
w
*
c
+
cur_geom_coords
[
2
]
*
h
*
w
*
c
+
cur_geom_coords
[
1
]
*
w
*
c
+
cur_geom_coords
[
0
]
*
c
+
cur_c
;
float
psum
=
0
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
psum
+=
cur_geom_feats
[
i
*
c
];
}
*
cur_out
=
psum
;
}
/*
Function: pillar pooling backward
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[n_intervals]
interval_starts : starting position for pooled point, IntTensor[n_intervals]
x_grad : gradient of the image fmap, FloatTensor
*/
__global__
void
bev_sum_pool_grad_kernel
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
__restrict__
out_grad
,
const
int
*
__restrict__
geom_coords
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
x_grad
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
index
=
idx
/
c
;
int
cur_c
=
idx
%
c
;
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
int
interval_length
=
interval_lengths
[
index
];
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
// 该pillar中所有点的梯度 与 该pillar特征的梯度相同.
const
int
*
cur_geom_coords
=
geom_coords
+
interval_start
*
4
;
float
*
cur_x_grad
=
x_grad
+
interval_start
*
c
+
cur_c
;
const
float
*
cur_out_grad
=
out_grad
+
cur_geom_coords
[
3
]
*
d
*
h
*
w
*
c
+
cur_geom_coords
[
2
]
*
h
*
w
*
c
+
cur_geom_coords
[
1
]
*
w
*
c
+
cur_geom_coords
[
0
]
*
c
+
cur_c
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
cur_x_grad
[
i
*
c
]
=
*
cur_out_grad
;
}
}
void
bev_sum_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
geom_feats
,
const
int
*
geom_coords
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
)
{
bev_sum_pool_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
}
void
bev_sum_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_coords
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
)
{
bev_sum_pool_grad_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_cuda.hip
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include "hip/hip_runtime.h"
#include <stdio.h>
#include <stdlib.h>
#include "bev_sum_pool_hip.h"
/*
Function: pillar pooling
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
geom_feats : input features, FloatTensor[n, c]
geom_coords : input coordinates, IntTensor[n, 4] 4: (x_id, y_id, z_id, batch_id)
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__ void bev_sum_pool_kernel(int b, int d, int h, int w, int n, int c, int n_intervals,
const float *__restrict__ geom_feats,
const int *__restrict__ geom_coords,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ out) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int index = idx / c;
int cur_c = idx % c;
if (index >= n_intervals) return;
int interval_start = interval_starts[index];
int interval_length = interval_lengths[index];
const int* cur_geom_coords = geom_coords + interval_start * 4; // 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
const float* cur_geom_feats = geom_feats + interval_start * c + cur_c;
float* cur_out = out + cur_geom_coords[3] * d * h * w * c +
cur_geom_coords[2] * h * w * c + cur_geom_coords[1] * w * c +
cur_geom_coords[0] * c + cur_c;
float psum = 0;
for(int i = 0; i < interval_length; i++){
psum += cur_geom_feats[i * c];
}
*cur_out = psum;
}
/*
Function: pillar pooling backward
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[n_intervals]
interval_starts : starting position for pooled point, IntTensor[n_intervals]
x_grad : gradient of the image fmap, FloatTensor
*/
__global__ void bev_sum_pool_grad_kernel(int b, int d, int h, int w, int n, int c, int n_intervals,
const float *__restrict__ out_grad,
const int *__restrict__ geom_coords,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ x_grad) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int index = idx / c;
int cur_c = idx % c;
if (index >= n_intervals) return;
int interval_start = interval_starts[index];
int interval_length = interval_lengths[index];
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
// 该pillar中所有点的梯度 与 该pillar特征的梯度相同.
const int* cur_geom_coords = geom_coords + interval_start * 4;
float* cur_x_grad = x_grad + interval_start * c + cur_c;
const float* cur_out_grad = out_grad + cur_geom_coords[3] * d * h * w * c +
cur_geom_coords[2] * h * w * c + cur_geom_coords[1] * w * c +
cur_geom_coords[0] * c + cur_c;
for(int i = 0; i < interval_length; i++){
cur_x_grad[i * c] = *cur_out_grad;
}
}
void bev_sum_pool(int b, int d, int h, int w, int n, int c, int n_intervals, const float* geom_feats,
const int* geom_coords, const int* interval_starts, const int* interval_lengths, float* out) {
hipLaunchKernelGGL(( bev_sum_pool_kernel), dim3((int)ceil(((double)n_intervals * c / 256))), dim3(256), 0, 0,
b, d, h, w, n, c, n_intervals, geom_feats, geom_coords, interval_starts, interval_lengths, out
);
}
void bev_sum_pool_grad(int b, int d, int h, int w, int n, int c, int n_intervals, const float* out_grad,
const int* geom_coords, const int* interval_starts, const int* interval_lengths, float* x_grad) {
hipLaunchKernelGGL(( bev_sum_pool_grad_kernel), dim3((int)ceil(((double)n_intervals * c / 256))), dim3(256), 0, 0,
b, d, h, w, n, c, n_intervals, out_grad, geom_coords, interval_starts, interval_lengths, x_grad
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.cpp
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
#include "bev_sum_pool_hip.h"
/*
Function: pillar pooling (forward, cuda)
Args:
geom_feats : input features, FloatTensor[N, C]
_geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
out : output features, FloatTensor[b, d, h, w, c]
*/
at
::
Tensor
bev_sum_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_feats
.
size
(
0
);
int
c
=
_geom_feats
.
size
(
1
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
hip
::
OptionalHIPGuardMasqueradingAsCUDA
device_guard
(
device_of
(
_geom_feats
));
const
float
*
geom_feats
=
_geom_feats
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_geom_feats
.
dtype
()).
device
(
_geom_feats
.
device
());
at
::
Tensor
_out
=
torch
::
zeros
({
b
,
d
,
h
,
w
,
c
},
options
);
// (B, D=Dz, H=Dy, W=Dx, C)
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_sum_pool
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
geom_feats
,
geom_coords
,
interval_starts
,
interval_lengths
,
out
);
return
_out
;
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : input features, FloatTensor[B, D, H, W, C]
geom_coords : input coordinates, IntTensor[N, 4]
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
x_grad : output features, FloatTensor[N, C]
*/
at
::
Tensor
bev_sum_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
)
{
int
n
=
_geom_coords
.
size
(
0
);
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
hip
::
OptionalHIPGuardMasqueradingAsCUDA
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
const
int
*
geom_coords
=
_geom_coords
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
auto
options
=
torch
::
TensorOptions
().
dtype
(
_out_grad
.
dtype
()).
device
(
_out_grad
.
device
());
at
::
Tensor
_x_grad
=
torch
::
zeros
({
n
,
c
},
options
);
// (N, C)
float
*
x_grad
=
_x_grad
.
data_ptr
<
float
>
();
bev_sum_pool_grad
(
b
,
d
,
h
,
w
,
n
,
c
,
n_intervals
,
out_grad
,
geom_coords
,
interval_starts
,
interval_lengths
,
x_grad
);
return
_x_grad
;
}
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool/src/bev_sum_pool_hip.h
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#ifndef _BEV_SUM_POOL_H
#define _BEV_SUM_POOL_H
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
at
::
Tensor
bev_sum_pool_forward
(
const
at
::
Tensor
_geom_feats
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
at
::
Tensor
bev_sum_pool_backward
(
const
at
::
Tensor
_out_grad
,
const
at
::
Tensor
_geom_coords
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
,
int
b
,
int
d
,
int
h
,
int
w
);
// CUDA function declarations
void
bev_sum_pool
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
x
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_sum_pool_grad
(
int
b
,
int
d
,
int
h
,
int
w
,
int
n
,
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
int
*
geom_feats
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
x_grad
);
#endif
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__init__.py
0 → 100644
View file @
d2b71343
# Copyright (c) Phigent Robotics. All rights reserved.
from
.bev_pool
import
bev_pool_v2
,
TRTBEVPoolv2
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__pycache__/__init__.cpython-310.pyc
0 → 100644
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/__pycache__/bev_pool.cpython-310.pyc
0 → 100644
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool.py
0 → 100644
View file @
d2b71343
# Copyright (c) Phigent Robotics. All rights reserved.
import
numpy
as
np
import
torch
from
.
import
bev_pool_v2_ext
__all__
=
[
'bev_pool_v2'
,
'TRTBEVPoolv2'
]
class
QuickCumsumCuda
(
torch
.
autograd
.
Function
):
r
"""BEVPoolv2 implementation for Lift-Splat-Shoot view transformation.
Please refer to the `paper <https://arxiv.org/abs/2211.17111>`_
"""
@
staticmethod
def
forward
(
ctx
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
):
ranks_bev
=
ranks_bev
.
int
()
# (N_points, ),
depth
=
depth
.
contiguous
().
float
()
# (B, N, D, fH, fW)
feat
=
feat
.
contiguous
().
float
()
# (B, N, fH, fW, C)
ranks_depth
=
ranks_depth
.
contiguous
().
int
()
# (N_points, ),
ranks_feat
=
ranks_feat
.
contiguous
().
int
()
# (N_points, ),
interval_lengths
=
interval_lengths
.
contiguous
().
int
()
# (N_pillar, )
interval_starts
=
interval_starts
.
contiguous
().
int
()
# (N_pillar, )
out
=
feat
.
new_zeros
(
bev_feat_shape
)
# (B, D_Z, D_Y, D_X, C)
bev_pool_v2_ext
.
bev_pool_v2_forward
(
depth
,
feat
,
out
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_lengths
,
interval_starts
,
)
ctx
.
save_for_backward
(
ranks_bev
,
depth
,
feat
,
ranks_feat
,
ranks_depth
)
return
out
@
staticmethod
def
backward
(
ctx
,
out_grad
):
ranks_bev
,
depth
,
feat
,
ranks_feat
,
ranks_depth
=
ctx
.
saved_tensors
order
=
ranks_feat
.
argsort
()
ranks_feat
,
ranks_depth
,
ranks_bev
=
\
ranks_feat
[
order
],
ranks_depth
[
order
],
ranks_bev
[
order
]
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_feat
[
1
:]
!=
ranks_feat
[:
-
1
]
interval_starts_bp
=
torch
.
where
(
kept
)[
0
].
int
()
interval_lengths_bp
=
torch
.
zeros_like
(
interval_starts_bp
)
interval_lengths_bp
[:
-
1
]
=
interval_starts_bp
[
1
:]
-
interval_starts_bp
[:
-
1
]
interval_lengths_bp
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts_bp
[
-
1
]
depth
=
depth
.
contiguous
()
feat
=
feat
.
contiguous
()
ranks_depth
=
ranks_depth
.
contiguous
()
ranks_feat
=
ranks_feat
.
contiguous
()
ranks_bev
=
ranks_bev
.
contiguous
()
interval_lengths_bp
=
interval_lengths_bp
.
contiguous
()
interval_starts_bp
=
interval_starts_bp
.
contiguous
()
depth_grad
=
depth
.
new_zeros
(
depth
.
shape
)
feat_grad
=
feat
.
new_zeros
(
feat
.
shape
)
out_grad
=
out_grad
.
contiguous
()
bev_pool_v2_ext
.
bev_pool_v2_backward
(
out_grad
,
depth_grad
,
feat_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_lengths_bp
,
interval_starts_bp
,
)
return
depth_grad
,
feat_grad
,
None
,
None
,
None
,
None
,
None
,
\
None
,
None
,
None
def
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
):
"""
Args:
depth: (B, N, D, fH, fW)
feat: (B, N, fH, fW, C)
ranks_depth: (N_points, ),
ranks_feat: (N_points, ),
ranks_bev: (N_points, ),
bev_feat_shape: (B, D_Z, D_Y, D_X, C)
interval_starts: (N_pillar, )
interval_lengths: (N_pillar, )
Returns:
x: bev feature in shape (B, C, Dz, Dy, Dx)
"""
x
=
QuickCumsumCuda
.
apply
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
)
# (B, Dz, Dy, Dx, C)
x
=
x
.
permute
(
0
,
4
,
1
,
2
,
3
).
contiguous
()
# (B, C, Dz, Dy, Dx)
return
x
class
TRTBEVPoolv2
(
torch
.
autograd
.
Function
):
@
staticmethod
def
symbolic
(
g
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
output_height
=
128
,
output_width
=
128
,
output_z
=
1
):
"""symbolic function for creating onnx op."""
return
g
.
op
(
'mmdeploy::bev_pool_v2'
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
output_height_i
=
output_height
,
output_width_i
=
output_width
,
output_z_i
=
output_z
)
@
staticmethod
def
forward
(
g
,
depth
,
# N,D,H,W
feat
,
# N,H,W,C
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
output_height
=
128
,
output_width
=
128
,
output_z
=
1
):
"""run forward."""
feat
=
feat
.
unsqueeze
(
0
)
depth
=
depth
.
unsqueeze
(
0
)
bev_feat_shape
=
(
depth
.
shape
[
0
],
output_z
,
output_height
,
output_width
,
feat
.
shape
[
-
1
])
# (B, Z, Y, X, C)
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
bev_feat_shape
,
interval_starts
,
interval_lengths
)
if
output_z
==
1
:
bev_feat
=
bev_feat
.
squeeze
(
2
)
bev_feat
=
bev_feat
.
permute
(
0
,
2
,
3
,
1
)
return
bev_feat
def
test_bev_pool_v2
():
depth
=
np
.
array
([
0.3
,
0.4
,
0.2
,
0.1
,
0.7
,
0.6
,
0.8
,
0.9
])
depth
=
torch
.
from_numpy
(
depth
).
float
().
cuda
()
depth
=
depth
.
view
(
1
,
1
,
2
,
2
,
2
).
requires_grad_
()
feat
=
torch
.
ones
(
size
=
[
1
,
1
,
2
,
2
,
2
],
dtype
=
torch
.
float
,
device
=
'cuda'
).
requires_grad_
()
ranks_depth
=
torch
.
from_numpy
(
np
.
array
([
0
,
4
,
1
,
6
])).
int
().
cuda
()
ranks_feat
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
2
])).
int
().
cuda
()
ranks_bev
=
torch
.
from_numpy
(
np
.
array
([
0
,
0
,
1
,
1
])).
int
().
cuda
()
kept
=
torch
.
ones
(
ranks_bev
.
shape
[
0
],
device
=
ranks_bev
.
device
,
dtype
=
torch
.
bool
)
kept
[
1
:]
=
ranks_bev
[
1
:]
!=
ranks_bev
[:
-
1
]
interval_starts
=
torch
.
where
(
kept
)[
0
].
int
()
if
len
(
interval_starts
)
==
0
:
return
None
,
None
,
None
,
None
,
None
interval_lengths
=
torch
.
zeros_like
(
interval_starts
)
interval_lengths
[:
-
1
]
=
interval_starts
[
1
:]
-
interval_starts
[:
-
1
]
interval_lengths
[
-
1
]
=
ranks_bev
.
shape
[
0
]
-
interval_starts
[
-
1
]
bev_feat
=
bev_pool_v2
(
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
(
1
,
1
,
2
,
2
,
2
),
interval_starts
,
interval_lengths
)
loss
=
torch
.
sum
(
bev_feat
)
loss
.
backward
()
assert
loss
==
4.4
grad_depth
=
np
.
array
([
2.
,
2.
,
0.
,
0.
,
2.
,
0.
,
2.
,
0.
])
grad_depth
=
torch
.
from_numpy
(
grad_depth
).
float
()
grad_depth
=
grad_depth
.
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
depth
.
grad
.
allclose
(
grad_depth
)
grad_feat
=
np
.
array
([
1.0
,
1.0
,
0.4
,
0.4
,
0.8
,
0.8
,
0.
,
0.
])
grad_feat
=
torch
.
from_numpy
(
grad_feat
).
float
().
cuda
().
view
(
1
,
1
,
2
,
2
,
2
)
assert
feat
.
grad
.
allclose
(
grad_feat
)
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/bev_pool_v2_ext.cpython-310-x86_64-linux-gnu.so
0 → 100755
View file @
d2b71343
File added
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool.cpp
0 → 100644
View file @
d2b71343
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
// CUDA function declarations
void
bev_pool_v2
(
int
c
,
int
n_intervals
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_pool_v2_grad
(
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
depth_grad
,
float
*
feat_grad
);
/*
Function: pillar pooling (forward, cuda)
Args:
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
out : output features, FloatTensor[b, c, h_out, w_out]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
Return:
*/
void
bev_pool_v2_forward
(
const
at
::
Tensor
_depth
,
// (B, N, D, fH, fW)
const
at
::
Tensor
_feat
,
// (B, N, fH, fW, C)
at
::
Tensor
_out
,
// (B, D_Z, D_Y, D_X, C)
const
at
::
Tensor
_ranks_depth
,
// (N_points, ),
const
at
::
Tensor
_ranks_feat
,
// (N_points, ),
const
at
::
Tensor
_ranks_bev
,
// (N_points, ),
const
at
::
Tensor
_interval_lengths
,
// (N_pillar, )
const
at
::
Tensor
_interval_starts
// (N_pillar, )
)
{
int
c
=
_feat
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_depth
));
const
float
*
depth
=
_depth
.
data_ptr
<
float
>
();
const
float
*
feat
=
_feat
.
data_ptr
<
float
>
();
const
int
*
ranks_depth
=
_ranks_depth
.
data_ptr
<
int
>
();
const
int
*
ranks_feat
=
_ranks_feat
.
data_ptr
<
int
>
();
const
int
*
ranks_bev
=
_ranks_bev
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_pool_v2
(
c
,
n_intervals
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
out
);
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
depth_grad : grad of input depth, FloatTensor[n, d, h, w]
feat_grad : grad of input feature, FloatTensor[n, h, w, c]
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
*/
void
bev_pool_v2_backward
(
const
at
::
Tensor
_out_grad
,
at
::
Tensor
_depth_grad
,
at
::
Tensor
_feat_grad
,
const
at
::
Tensor
_depth
,
const
at
::
Tensor
_feat
,
const
at
::
Tensor
_ranks_depth
,
const
at
::
Tensor
_ranks_feat
,
const
at
::
Tensor
_ranks_bev
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
)
{
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
float
*
depth_grad
=
_depth_grad
.
data_ptr
<
float
>
();
float
*
feat_grad
=
_feat_grad
.
data_ptr
<
float
>
();
const
float
*
depth
=
_depth
.
data_ptr
<
float
>
();
const
float
*
feat
=
_feat
.
data_ptr
<
float
>
();
const
int
*
ranks_depth
=
_ranks_depth
.
data_ptr
<
int
>
();
const
int
*
ranks_feat
=
_ranks_feat
.
data_ptr
<
int
>
();
const
int
*
ranks_bev
=
_ranks_bev
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
bev_pool_v2_grad
(
c
,
n_intervals
,
out_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
depth_grad
,
feat_grad
);
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"bev_pool_v2_forward"
,
&
bev_pool_v2_forward
,
"bev_pool_v2_forward"
);
m
.
def
(
"bev_pool_v2_backward"
,
&
bev_pool_v2_backward
,
"bev_pool_v2_backward"
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.cu
0 → 100644
View file @
d2b71343
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
/*
Function: pillar pooling
Args:
c : number of channels
n_intervals : number of unique points
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__
void
bev_pool_v2_kernel
(
int
c
,
int
n_intervals
,
const
float
*
__restrict__
depth
,
const
float
*
__restrict__
feat
,
const
int
*
__restrict__
ranks_depth
,
const
int
*
__restrict__
ranks_feat
,
const
int
*
__restrict__
ranks_bev
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
out
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// 该pillar的cur_c特征对应的索引.
int
index
=
idx
/
c
;
// pillar id
int
cur_c
=
idx
%
c
;
// channel id
if
(
index
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
index
];
// 该pillar的起始索引.
int
interval_length
=
interval_lengths
[
index
];
// 该pillar的包含的点数量.
float
psum
=
0
;
const
float
*
cur_depth
;
const
float
*
cur_feat
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
// ranks_depth[interval_start+i]: depth索引, 介于(0, B*N*D*fH*fW-1)之间.
cur_depth
=
depth
+
ranks_depth
[
interval_start
+
i
];
// ranks_feat[interval_start+i]: feature索引, 介于(0, B*N*fH*fW-1)之间.
cur_feat
=
feat
+
ranks_feat
[
interval_start
+
i
]
*
c
+
cur_c
;
//
psum
+=
*
cur_feat
*
*
cur_depth
;
// 聚合该pillar对应的cur_c特征.
}
const
int
*
cur_rank
=
ranks_bev
+
interval_start
;
// 该pillar在BEV grids中对应的索引.
float
*
cur_out
=
out
+
*
cur_rank
*
c
+
cur_c
;
// 该cur_c特征对应的索引位置.
*
cur_out
=
psum
;
}
/*
Function: pillar pooling backward
Args:
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
depth_grad : gradient of the depth fmap, FloatTensor
feat_grad : gradient of the feature fmap, FloatTensor
*/
__global__
void
bev_pool_grad_kernel
(
int
c
,
int
n_intervals
,
const
float
*
__restrict__
out_grad
,
const
float
*
__restrict__
depth
,
const
float
*
__restrict__
feat
,
const
int
*
__restrict__
ranks_depth
,
const
int
*
__restrict__
ranks_feat
,
const
int
*
__restrict__
ranks_bev
,
const
int
*
__restrict__
interval_starts
,
const
int
*
__restrict__
interval_lengths
,
float
*
__restrict__
depth_grad
,
float
*
__restrict__
feat_grad
)
{
int
idx
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
// 该pillar对应的thread
if
(
idx
>=
n_intervals
)
return
;
int
interval_start
=
interval_starts
[
idx
];
// 该pillar的起始索引.
int
interval_length
=
interval_lengths
[
idx
];
// 该pillar的包含的点数量.
const
int
*
cur_rank
;
const
float
*
cur_out_grad
;
const
float
*
cur_out_grad_start
;
const
float
*
cur_feat
;
const
float
*
cur_feat_start
;
float
*
cur_depth_grad
;
float
grad_sum
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
cur_rank
=
ranks_bev
+
interval_start
+
i
;
// 该pillar在BEV grids中对应的索引.
cur_out_grad_start
=
out_grad
+
*
cur_rank
*
c
;
// pillar feature 的 grad.
cur_feat_start
=
feat
+
ranks_feat
[
interval_start
+
i
]
*
c
;
grad_sum
=
0
;
for
(
int
cur_c
=
0
;
cur_c
<
c
;
cur_c
++
){
cur_out_grad
=
cur_out_grad_start
+
cur_c
;
cur_feat
=
cur_feat_start
+
cur_c
;
grad_sum
+=
*
cur_out_grad
*
*
cur_feat
;
}
cur_depth_grad
=
depth_grad
+
ranks_depth
[
interval_start
+
i
];
*
cur_depth_grad
=
grad_sum
;
}
float
*
cur_feat_grad
;
const
float
*
cur_depth
;
for
(
int
cur_c
=
0
;
cur_c
<
c
;
cur_c
++
){
grad_sum
=
0
;
for
(
int
i
=
0
;
i
<
interval_length
;
i
++
){
cur_rank
=
ranks_bev
+
interval_start
+
i
;
cur_out_grad
=
out_grad
+
*
cur_rank
*
c
+
cur_c
;
cur_depth
=
depth
+
ranks_depth
[
interval_start
+
i
];
grad_sum
+=
*
cur_out_grad
*
*
cur_depth
;
}
cur_feat_grad
=
feat_grad
+
ranks_feat
[
interval_start
]
*
c
+
cur_c
;
*
cur_feat_grad
=
grad_sum
;
}
}
void
bev_pool_v2
(
int
c
,
int
n_intervals
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
)
{
bev_pool_v2_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
*
c
/
256
)),
256
>>>
(
c
,
n_intervals
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
out
);
}
void
bev_pool_v2_grad
(
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
depth_grad
,
float
*
feat_grad
)
{
bev_pool_grad_kernel
<<<
(
int
)
ceil
(((
double
)
n_intervals
/
256
)),
256
>>>
(
c
,
n_intervals
,
out_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
depth_grad
,
feat_grad
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_cuda.hip
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include "hip/hip_runtime.h"
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
/*
Function: pillar pooling
Args:
c : number of channels
n_intervals : number of unique points
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__ void bev_pool_v2_kernel(int c, int n_intervals,
const float *__restrict__ depth,
const float *__restrict__ feat,
const int *__restrict__ ranks_depth,
const int *__restrict__ ranks_feat,
const int *__restrict__ ranks_bev,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ out) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 该pillar的cur_c特征对应的索引.
int index = idx / c; // pillar id
int cur_c = idx % c; // channel id
if (index >= n_intervals) return;
int interval_start = interval_starts[index]; // 该pillar的起始索引.
int interval_length = interval_lengths[index]; // 该pillar的包含的点数量.
float psum = 0;
const float* cur_depth;
const float* cur_feat;
for(int i = 0; i < interval_length; i++){
// ranks_depth[interval_start+i]: depth索引, 介于(0, B*N*D*fH*fW-1)之间.
cur_depth = depth + ranks_depth[interval_start+i];
// ranks_feat[interval_start+i]: feature索引, 介于(0, B*N*fH*fW-1)之间.
cur_feat = feat + ranks_feat[interval_start+i] * c + cur_c; //
psum += *cur_feat * *cur_depth; // 聚合该pillar对应的cur_c特征.
}
const int* cur_rank = ranks_bev + interval_start; // 该pillar在BEV grids中对应的索引.
float* cur_out = out + *cur_rank * c + cur_c; // 该cur_c特征对应的索引位置.
*cur_out = psum;
}
/*
Function: pillar pooling backward
Args:
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
depth_grad : gradient of the depth fmap, FloatTensor
feat_grad : gradient of the feature fmap, FloatTensor
*/
__global__ void bev_pool_grad_kernel(int c, int n_intervals,
const float *__restrict__ out_grad,
const float *__restrict__ depth,
const float *__restrict__ feat,
const int *__restrict__ ranks_depth,
const int *__restrict__ ranks_feat,
const int *__restrict__ ranks_bev,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ depth_grad,
float* __restrict__ feat_grad) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 该pillar对应的thread
if (idx >= n_intervals) return;
int interval_start = interval_starts[idx]; // 该pillar的起始索引.
int interval_length = interval_lengths[idx]; // 该pillar的包含的点数量.
const int* cur_rank;
const float* cur_out_grad;
const float* cur_out_grad_start;
const float* cur_feat;
const float* cur_feat_start;
float* cur_depth_grad;
float grad_sum;
for(int i = 0; i < interval_length; i++){
cur_rank = ranks_bev + interval_start + i; // 该pillar在BEV grids中对应的索引.
cur_out_grad_start = out_grad + * cur_rank * c; // pillar feature 的 grad.
cur_feat_start = feat + ranks_feat[interval_start+i] * c;
grad_sum = 0;
for(int cur_c = 0; cur_c < c; cur_c++){
cur_out_grad = cur_out_grad_start + cur_c;
cur_feat = cur_feat_start + cur_c;
grad_sum += *cur_out_grad * *cur_feat;
}
cur_depth_grad = depth_grad + ranks_depth[interval_start+i];
*cur_depth_grad = grad_sum;
}
float* cur_feat_grad;
const float* cur_depth;
for(int cur_c = 0; cur_c < c; cur_c++){
grad_sum = 0;
for(int i = 0; i < interval_length; i++){
cur_rank = ranks_bev + interval_start + i;
cur_out_grad = out_grad + *cur_rank * c + cur_c;
cur_depth = depth + ranks_depth[interval_start+i];
grad_sum += *cur_out_grad * *cur_depth;
}
cur_feat_grad = feat_grad + ranks_feat[interval_start] * c + cur_c ;
* cur_feat_grad = grad_sum;
}
}
void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, const int* ranks_depth,
const int* ranks_feat, const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* out) {
hipLaunchKernelGGL(( bev_pool_v2_kernel), dim3((int)ceil(((double)n_intervals * c / 256))), dim3(256), 0, 0,
c, n_intervals, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, out
);
}
void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* depth_grad, float* feat_grad) {
hipLaunchKernelGGL(( bev_pool_grad_kernel), dim3((int)ceil(((double)n_intervals / 256))), dim3(256), 0, 0,
c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/bev_pool_v2/src/bev_pool_hip.cpp
0 → 100644
View file @
d2b71343
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
// CUDA function declarations
void
bev_pool_v2
(
int
c
,
int
n_intervals
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
out
);
void
bev_pool_v2_grad
(
int
c
,
int
n_intervals
,
const
float
*
out_grad
,
const
float
*
depth
,
const
float
*
feat
,
const
int
*
ranks_depth
,
const
int
*
ranks_feat
,
const
int
*
ranks_bev
,
const
int
*
interval_starts
,
const
int
*
interval_lengths
,
float
*
depth_grad
,
float
*
feat_grad
);
/*
Function: pillar pooling (forward, cuda)
Args:
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
out : output features, FloatTensor[b, c, h_out, w_out]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
Return:
*/
void
bev_pool_v2_forward
(
const
at
::
Tensor
_depth
,
// (B, N, D, fH, fW)
const
at
::
Tensor
_feat
,
// (B, N, fH, fW, C)
at
::
Tensor
_out
,
// (B, D_Z, D_Y, D_X, C)
const
at
::
Tensor
_ranks_depth
,
// (N_points, ),
const
at
::
Tensor
_ranks_feat
,
// (N_points, ),
const
at
::
Tensor
_ranks_bev
,
// (N_points, ),
const
at
::
Tensor
_interval_lengths
,
// (N_pillar, )
const
at
::
Tensor
_interval_starts
// (N_pillar, )
)
{
int
c
=
_feat
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
hip
::
OptionalHIPGuardMasqueradingAsCUDA
device_guard
(
device_of
(
_depth
));
const
float
*
depth
=
_depth
.
data_ptr
<
float
>
();
const
float
*
feat
=
_feat
.
data_ptr
<
float
>
();
const
int
*
ranks_depth
=
_ranks_depth
.
data_ptr
<
int
>
();
const
int
*
ranks_feat
=
_ranks_feat
.
data_ptr
<
int
>
();
const
int
*
ranks_bev
=
_ranks_bev
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
float
*
out
=
_out
.
data_ptr
<
float
>
();
bev_pool_v2
(
c
,
n_intervals
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
out
);
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
depth_grad : grad of input depth, FloatTensor[n, d, h, w]
feat_grad : grad of input feature, FloatTensor[n, h, w, c]
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
*/
void
bev_pool_v2_backward
(
const
at
::
Tensor
_out_grad
,
at
::
Tensor
_depth_grad
,
at
::
Tensor
_feat_grad
,
const
at
::
Tensor
_depth
,
const
at
::
Tensor
_feat
,
const
at
::
Tensor
_ranks_depth
,
const
at
::
Tensor
_ranks_feat
,
const
at
::
Tensor
_ranks_bev
,
const
at
::
Tensor
_interval_lengths
,
const
at
::
Tensor
_interval_starts
)
{
int
c
=
_out_grad
.
size
(
4
);
int
n_intervals
=
_interval_lengths
.
size
(
0
);
const
at
::
hip
::
OptionalHIPGuardMasqueradingAsCUDA
device_guard
(
device_of
(
_out_grad
));
const
float
*
out_grad
=
_out_grad
.
data_ptr
<
float
>
();
float
*
depth_grad
=
_depth_grad
.
data_ptr
<
float
>
();
float
*
feat_grad
=
_feat_grad
.
data_ptr
<
float
>
();
const
float
*
depth
=
_depth
.
data_ptr
<
float
>
();
const
float
*
feat
=
_feat
.
data_ptr
<
float
>
();
const
int
*
ranks_depth
=
_ranks_depth
.
data_ptr
<
int
>
();
const
int
*
ranks_feat
=
_ranks_feat
.
data_ptr
<
int
>
();
const
int
*
ranks_bev
=
_ranks_bev
.
data_ptr
<
int
>
();
const
int
*
interval_lengths
=
_interval_lengths
.
data_ptr
<
int
>
();
const
int
*
interval_starts
=
_interval_starts
.
data_ptr
<
int
>
();
bev_pool_v2_grad
(
c
,
n_intervals
,
out_grad
,
depth
,
feat
,
ranks_depth
,
ranks_feat
,
ranks_bev
,
interval_starts
,
interval_lengths
,
depth_grad
,
feat_grad
);
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"bev_pool_v2_forward"
,
&
bev_pool_v2_forward
,
"bev_pool_v2_forward"
);
m
.
def
(
"bev_pool_v2_backward"
,
&
bev_pool_v2_backward
,
"bev_pool_v2_backward"
);
}
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/__init__.py
0 → 100644
View file @
d2b71343
# Copyright (c) Phigent Robotics. All rights reserved.
from
.nearest_assign
import
nearest_assign
\ No newline at end of file
docker-hub/FlashOCC/Flashocc/projects/mmdet3d_plugin/ops/nearest_assign/__pycache__/__init__.cpython-310.pyc
0 → 100644
View file @
d2b71343
File added
Prev
1
…
5
6
7
8
9
10
11
12
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment