Commit d2b71343 authored by 雍大凯's avatar 雍大凯
Browse files

add code

parent 69e57885
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#ifndef _BEV_MAX_POOL_H
#define _BEV_MAX_POOL_H
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
at::Tensor bev_max_pool_forward(
const at::Tensor _geom_feats,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
);
at::Tensor bev_max_pool_backward(
const at::Tensor _out_grad,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
);
// CUDA function declarations
void bev_max_pool(int b, int d, int h, int w, int n, int c, int n_intervals, const float* x,
const int* geom_feats, const int* interval_starts, const int* interval_lengths, float* out);
void bev_max_pool_grad(int b, int d, int h, int w, int n, int c, int n_intervals, const float* out_grad,
const int* geom_feats, const int* interval_starts, const int* interval_lengths, float* x_grad);
#endif
\ No newline at end of file
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_sum_pool.h"
#include "bev_max_pool.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("bev_sum_pool_forward", &bev_sum_pool_forward,
"bev_sum_pool_forward");
m.def("bev_sum_pool_backward", &bev_sum_pool_backward,
"bev_sum_pool_backward");
m.def("bev_max_pool_forward", &bev_max_pool_forward,
"bev_max_pool_forward");
m.def("bev_max_pool_backward", &bev_max_pool_backward,
"bev_max_pool_backward");
}
\ No newline at end of file
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
#include "bev_sum_pool_hip.h"
#include "bev_max_pool_hip.h"
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("bev_sum_pool_forward", &bev_sum_pool_forward,
"bev_sum_pool_forward");
m.def("bev_sum_pool_backward", &bev_sum_pool_backward,
"bev_sum_pool_backward");
m.def("bev_max_pool_forward", &bev_max_pool_forward,
"bev_max_pool_forward");
m.def("bev_max_pool_backward", &bev_max_pool_backward,
"bev_max_pool_backward");
}
\ No newline at end of file
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
#include "bev_sum_pool.h"
/*
Function: pillar pooling (forward, cuda)
Args:
geom_feats : input features, FloatTensor[N, C]
_geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
out : output features, FloatTensor[b, d, h, w, c]
*/
at::Tensor bev_sum_pool_forward(
const at::Tensor _geom_feats,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
) {
int n = _geom_feats.size(0);
int c = _geom_feats.size(1);
int n_intervals = _interval_lengths.size(0);
const at::cuda::OptionalCUDAGuard device_guard(device_of(_geom_feats));
const float* geom_feats = _geom_feats.data_ptr<float>();
const int* geom_coords = _geom_coords.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
auto options =
torch::TensorOptions().dtype(_geom_feats.dtype()).device(_geom_feats.device());
at::Tensor _out = torch::zeros({b, d, h, w, c}, options); // (B, D=Dz, H=Dy, W=Dx, C)
float* out = _out.data_ptr<float>();
bev_sum_pool(
b, d, h, w, n, c, n_intervals, geom_feats,
geom_coords, interval_starts, interval_lengths, out
);
return _out;
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : input features, FloatTensor[B, D, H, W, C]
geom_coords : input coordinates, IntTensor[N, 4]
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
x_grad : output features, FloatTensor[N, C]
*/
at::Tensor bev_sum_pool_backward(
const at::Tensor _out_grad,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
) {
int n = _geom_coords.size(0);
int c = _out_grad.size(4);
int n_intervals = _interval_lengths.size(0);
const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad));
const float* out_grad = _out_grad.data_ptr<float>();
const int* geom_coords = _geom_coords.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
auto options =
torch::TensorOptions().dtype(_out_grad.dtype()).device(_out_grad.device());
at::Tensor _x_grad = torch::zeros({n, c}, options); // (N, C)
float* x_grad = _x_grad.data_ptr<float>();
bev_sum_pool_grad(
b, d, h, w, n, c, n_intervals, out_grad,
geom_coords, interval_starts, interval_lengths, x_grad
);
return _x_grad;
}
\ No newline at end of file
#ifndef _BEV_SUM_POOL_H
#define _BEV_SUM_POOL_H
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
at::Tensor bev_sum_pool_forward(
const at::Tensor _geom_feats,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
);
at::Tensor bev_sum_pool_backward(
const at::Tensor _out_grad,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
);
// CUDA function declarations
void bev_sum_pool(int b, int d, int h, int w, int n, int c, int n_intervals, const float* x,
const int* geom_feats, const int* interval_starts, const int* interval_lengths, float* out);
void bev_sum_pool_grad(int b, int d, int h, int w, int n, int c, int n_intervals, const float* out_grad,
const int* geom_feats, const int* interval_starts, const int* interval_lengths, float* x_grad);
#endif
\ No newline at end of file
#include <stdio.h>
#include <stdlib.h>
#include "bev_sum_pool.h"
/*
Function: pillar pooling
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
geom_feats : input features, FloatTensor[n, c]
geom_coords : input coordinates, IntTensor[n, 4] 4: (x_id, y_id, z_id, batch_id)
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__ void bev_sum_pool_kernel(int b, int d, int h, int w, int n, int c, int n_intervals,
const float *__restrict__ geom_feats,
const int *__restrict__ geom_coords,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ out) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int index = idx / c;
int cur_c = idx % c;
if (index >= n_intervals) return;
int interval_start = interval_starts[index];
int interval_length = interval_lengths[index];
const int* cur_geom_coords = geom_coords + interval_start * 4; // 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
const float* cur_geom_feats = geom_feats + interval_start * c + cur_c;
float* cur_out = out + cur_geom_coords[3] * d * h * w * c +
cur_geom_coords[2] * h * w * c + cur_geom_coords[1] * w * c +
cur_geom_coords[0] * c + cur_c;
float psum = 0;
for(int i = 0; i < interval_length; i++){
psum += cur_geom_feats[i * c];
}
*cur_out = psum;
}
/*
Function: pillar pooling backward
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[n_intervals]
interval_starts : starting position for pooled point, IntTensor[n_intervals]
x_grad : gradient of the image fmap, FloatTensor
*/
__global__ void bev_sum_pool_grad_kernel(int b, int d, int h, int w, int n, int c, int n_intervals,
const float *__restrict__ out_grad,
const int *__restrict__ geom_coords,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ x_grad) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int index = idx / c;
int cur_c = idx % c;
if (index >= n_intervals) return;
int interval_start = interval_starts[index];
int interval_length = interval_lengths[index];
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
// 该pillar中所有点的梯度 与 该pillar特征的梯度相同.
const int* cur_geom_coords = geom_coords + interval_start * 4;
float* cur_x_grad = x_grad + interval_start * c + cur_c;
const float* cur_out_grad = out_grad + cur_geom_coords[3] * d * h * w * c +
cur_geom_coords[2] * h * w * c + cur_geom_coords[1] * w * c +
cur_geom_coords[0] * c + cur_c;
for(int i = 0; i < interval_length; i++){
cur_x_grad[i * c] = *cur_out_grad;
}
}
void bev_sum_pool(int b, int d, int h, int w, int n, int c, int n_intervals, const float* geom_feats,
const int* geom_coords, const int* interval_starts, const int* interval_lengths, float* out) {
bev_sum_pool_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256>>>(
b, d, h, w, n, c, n_intervals, geom_feats, geom_coords, interval_starts, interval_lengths, out
);
}
void bev_sum_pool_grad(int b, int d, int h, int w, int n, int c, int n_intervals, const float* out_grad,
const int* geom_coords, const int* interval_starts, const int* interval_lengths, float* x_grad) {
bev_sum_pool_grad_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256>>>(
b, d, h, w, n, c, n_intervals, out_grad, geom_coords, interval_starts, interval_lengths, x_grad
);
}
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include "hip/hip_runtime.h"
#include <stdio.h>
#include <stdlib.h>
#include "bev_sum_pool_hip.h"
/*
Function: pillar pooling
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
geom_feats : input features, FloatTensor[n, c]
geom_coords : input coordinates, IntTensor[n, 4] 4: (x_id, y_id, z_id, batch_id)
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__ void bev_sum_pool_kernel(int b, int d, int h, int w, int n, int c, int n_intervals,
const float *__restrict__ geom_feats,
const int *__restrict__ geom_coords,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ out) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int index = idx / c;
int cur_c = idx % c;
if (index >= n_intervals) return;
int interval_start = interval_starts[index];
int interval_length = interval_lengths[index];
const int* cur_geom_coords = geom_coords + interval_start * 4; // 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
const float* cur_geom_feats = geom_feats + interval_start * c + cur_c;
float* cur_out = out + cur_geom_coords[3] * d * h * w * c +
cur_geom_coords[2] * h * w * c + cur_geom_coords[1] * w * c +
cur_geom_coords[0] * c + cur_c;
float psum = 0;
for(int i = 0; i < interval_length; i++){
psum += cur_geom_feats[i * c];
}
*cur_out = psum;
}
/*
Function: pillar pooling backward
Args:
b : batch size
d : depth of the feature map
h : height of pooled feature map
w : width of pooled feature map
n : number of input points
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[n_intervals]
interval_starts : starting position for pooled point, IntTensor[n_intervals]
x_grad : gradient of the image fmap, FloatTensor
*/
__global__ void bev_sum_pool_grad_kernel(int b, int d, int h, int w, int n, int c, int n_intervals,
const float *__restrict__ out_grad,
const int *__restrict__ geom_coords,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ x_grad) {
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int index = idx / c;
int cur_c = idx % c;
if (index >= n_intervals) return;
int interval_start = interval_starts[index];
int interval_length = interval_lengths[index];
// 当前负责计算的pillar的坐标 4: (x_id, y_id, z_id, batch_id)
// 该pillar中所有点的梯度 与 该pillar特征的梯度相同.
const int* cur_geom_coords = geom_coords + interval_start * 4;
float* cur_x_grad = x_grad + interval_start * c + cur_c;
const float* cur_out_grad = out_grad + cur_geom_coords[3] * d * h * w * c +
cur_geom_coords[2] * h * w * c + cur_geom_coords[1] * w * c +
cur_geom_coords[0] * c + cur_c;
for(int i = 0; i < interval_length; i++){
cur_x_grad[i * c] = *cur_out_grad;
}
}
void bev_sum_pool(int b, int d, int h, int w, int n, int c, int n_intervals, const float* geom_feats,
const int* geom_coords, const int* interval_starts, const int* interval_lengths, float* out) {
hipLaunchKernelGGL(( bev_sum_pool_kernel), dim3((int)ceil(((double)n_intervals * c / 256))), dim3(256), 0, 0,
b, d, h, w, n, c, n_intervals, geom_feats, geom_coords, interval_starts, interval_lengths, out
);
}
void bev_sum_pool_grad(int b, int d, int h, int w, int n, int c, int n_intervals, const float* out_grad,
const int* geom_coords, const int* interval_starts, const int* interval_lengths, float* x_grad) {
hipLaunchKernelGGL(( bev_sum_pool_grad_kernel), dim3((int)ceil(((double)n_intervals * c / 256))), dim3(256), 0, 0,
b, d, h, w, n, c, n_intervals, out_grad, geom_coords, interval_starts, interval_lengths, x_grad
);
}
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
#include "bev_sum_pool_hip.h"
/*
Function: pillar pooling (forward, cuda)
Args:
geom_feats : input features, FloatTensor[N, C]
_geom_coords : input coordinates, IntTensor[N, 4] 4: (x_id, y_id, z_id, batch_id)
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
out : output features, FloatTensor[b, d, h, w, c]
*/
at::Tensor bev_sum_pool_forward(
const at::Tensor _geom_feats,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
) {
int n = _geom_feats.size(0);
int c = _geom_feats.size(1);
int n_intervals = _interval_lengths.size(0);
const at::hip::OptionalHIPGuardMasqueradingAsCUDA device_guard(device_of(_geom_feats));
const float* geom_feats = _geom_feats.data_ptr<float>();
const int* geom_coords = _geom_coords.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
auto options =
torch::TensorOptions().dtype(_geom_feats.dtype()).device(_geom_feats.device());
at::Tensor _out = torch::zeros({b, d, h, w, c}, options); // (B, D=Dz, H=Dy, W=Dx, C)
float* out = _out.data_ptr<float>();
bev_sum_pool(
b, d, h, w, n, c, n_intervals, geom_feats,
geom_coords, interval_starts, interval_lengths, out
);
return _out;
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : input features, FloatTensor[B, D, H, W, C]
geom_coords : input coordinates, IntTensor[N, 4]
interval_lengths : how many points in each pooled point, IntTensor[N_pillar, ]
interval_starts : starting position for pooled point, IntTensor [N_pillar, ]
Return:
x_grad : output features, FloatTensor[N, C]
*/
at::Tensor bev_sum_pool_backward(
const at::Tensor _out_grad,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
) {
int n = _geom_coords.size(0);
int c = _out_grad.size(4);
int n_intervals = _interval_lengths.size(0);
const at::hip::OptionalHIPGuardMasqueradingAsCUDA device_guard(device_of(_out_grad));
const float* out_grad = _out_grad.data_ptr<float>();
const int* geom_coords = _geom_coords.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
auto options =
torch::TensorOptions().dtype(_out_grad.dtype()).device(_out_grad.device());
at::Tensor _x_grad = torch::zeros({n, c}, options); // (N, C)
float* x_grad = _x_grad.data_ptr<float>();
bev_sum_pool_grad(
b, d, h, w, n, c, n_intervals, out_grad,
geom_coords, interval_starts, interval_lengths, x_grad
);
return _x_grad;
}
\ No newline at end of file
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#ifndef _BEV_SUM_POOL_H
#define _BEV_SUM_POOL_H
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
at::Tensor bev_sum_pool_forward(
const at::Tensor _geom_feats,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
);
at::Tensor bev_sum_pool_backward(
const at::Tensor _out_grad,
const at::Tensor _geom_coords,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts,
int b, int d, int h, int w
);
// CUDA function declarations
void bev_sum_pool(int b, int d, int h, int w, int n, int c, int n_intervals, const float* x,
const int* geom_feats, const int* interval_starts, const int* interval_lengths, float* out);
void bev_sum_pool_grad(int b, int d, int h, int w, int n, int c, int n_intervals, const float* out_grad,
const int* geom_feats, const int* interval_starts, const int* interval_lengths, float* x_grad);
#endif
\ No newline at end of file
# Copyright (c) Phigent Robotics. All rights reserved.
from .bev_pool import bev_pool_v2, TRTBEVPoolv2
\ No newline at end of file
# Copyright (c) Phigent Robotics. All rights reserved.
import numpy as np
import torch
from . import bev_pool_v2_ext
__all__ = ['bev_pool_v2', 'TRTBEVPoolv2']
class QuickCumsumCuda(torch.autograd.Function):
r"""BEVPoolv2 implementation for Lift-Splat-Shoot view transformation.
Please refer to the `paper <https://arxiv.org/abs/2211.17111>`_
"""
@staticmethod
def forward(ctx, depth, feat, ranks_depth, ranks_feat, ranks_bev,
bev_feat_shape, interval_starts, interval_lengths):
ranks_bev = ranks_bev.int() # (N_points, ),
depth = depth.contiguous().float() # (B, N, D, fH, fW)
feat = feat.contiguous().float() # (B, N, fH, fW, C)
ranks_depth = ranks_depth.contiguous().int() # (N_points, ),
ranks_feat = ranks_feat.contiguous().int() # (N_points, ),
interval_lengths = interval_lengths.contiguous().int() # (N_pillar, )
interval_starts = interval_starts.contiguous().int() # (N_pillar, )
out = feat.new_zeros(bev_feat_shape) # (B, D_Z, D_Y, D_X, C)
bev_pool_v2_ext.bev_pool_v2_forward(
depth,
feat,
out,
ranks_depth,
ranks_feat,
ranks_bev,
interval_lengths,
interval_starts,
)
ctx.save_for_backward(ranks_bev, depth, feat, ranks_feat, ranks_depth)
return out
@staticmethod
def backward(ctx, out_grad):
ranks_bev, depth, feat, ranks_feat, ranks_depth = ctx.saved_tensors
order = ranks_feat.argsort()
ranks_feat, ranks_depth, ranks_bev = \
ranks_feat[order], ranks_depth[order], ranks_bev[order]
kept = torch.ones(
ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
kept[1:] = ranks_feat[1:] != ranks_feat[:-1]
interval_starts_bp = torch.where(kept)[0].int()
interval_lengths_bp = torch.zeros_like(interval_starts_bp)
interval_lengths_bp[:-1] = interval_starts_bp[
1:] - interval_starts_bp[:-1]
interval_lengths_bp[-1] = ranks_bev.shape[0] - interval_starts_bp[-1]
depth = depth.contiguous()
feat = feat.contiguous()
ranks_depth = ranks_depth.contiguous()
ranks_feat = ranks_feat.contiguous()
ranks_bev = ranks_bev.contiguous()
interval_lengths_bp = interval_lengths_bp.contiguous()
interval_starts_bp = interval_starts_bp.contiguous()
depth_grad = depth.new_zeros(depth.shape)
feat_grad = feat.new_zeros(feat.shape)
out_grad = out_grad.contiguous()
bev_pool_v2_ext.bev_pool_v2_backward(
out_grad,
depth_grad,
feat_grad,
depth,
feat,
ranks_depth,
ranks_feat,
ranks_bev,
interval_lengths_bp,
interval_starts_bp,
)
return depth_grad, feat_grad, None, None, None, None, None, \
None, None, None
def bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
bev_feat_shape, interval_starts, interval_lengths):
"""
Args:
depth: (B, N, D, fH, fW)
feat: (B, N, fH, fW, C)
ranks_depth: (N_points, ),
ranks_feat: (N_points, ),
ranks_bev: (N_points, ),
bev_feat_shape: (B, D_Z, D_Y, D_X, C)
interval_starts: (N_pillar, )
interval_lengths: (N_pillar, )
Returns:
x: bev feature in shape (B, C, Dz, Dy, Dx)
"""
x = QuickCumsumCuda.apply(depth, feat, ranks_depth, ranks_feat, ranks_bev,
bev_feat_shape, interval_starts,
interval_lengths) # (B, Dz, Dy, Dx, C)
x = x.permute(0, 4, 1, 2, 3).contiguous() # (B, C, Dz, Dy, Dx)
return x
class TRTBEVPoolv2(torch.autograd.Function):
@staticmethod
def symbolic(g,
depth,
feat,
ranks_depth,
ranks_feat,
ranks_bev,
interval_starts,
interval_lengths,
output_height=128,
output_width=128,
output_z=1):
"""symbolic function for creating onnx op."""
return g.op(
'mmdeploy::bev_pool_v2',
depth,
feat,
ranks_depth,
ranks_feat,
ranks_bev,
interval_starts,
interval_lengths,
output_height_i=output_height,
output_width_i=output_width,
output_z_i=output_z)
@staticmethod
def forward(g,
depth, # N,D,H,W
feat, # N,H,W,C
ranks_depth,
ranks_feat,
ranks_bev,
interval_starts,
interval_lengths,
output_height=128,
output_width=128,
output_z=1):
"""run forward."""
feat = feat.unsqueeze(0)
depth = depth.unsqueeze(0)
bev_feat_shape = (depth.shape[0], output_z, output_height, output_width,
feat.shape[-1]) # (B, Z, Y, X, C)
bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
bev_feat_shape, interval_starts,
interval_lengths)
if output_z == 1:
bev_feat = bev_feat.squeeze(2)
bev_feat = bev_feat.permute(0, 2, 3, 1)
return bev_feat
def test_bev_pool_v2():
depth = np.array([0.3, 0.4, 0.2, 0.1, 0.7, 0.6, 0.8, 0.9])
depth = torch.from_numpy(depth).float().cuda()
depth = depth.view(1, 1, 2, 2, 2).requires_grad_()
feat = torch.ones(
size=[1, 1, 2, 2, 2], dtype=torch.float,
device='cuda').requires_grad_()
ranks_depth = torch.from_numpy(np.array([0, 4, 1, 6])).int().cuda()
ranks_feat = torch.from_numpy(np.array([0, 0, 1, 2])).int().cuda()
ranks_bev = torch.from_numpy(np.array([0, 0, 1, 1])).int().cuda()
kept = torch.ones(
ranks_bev.shape[0], device=ranks_bev.device, dtype=torch.bool)
kept[1:] = ranks_bev[1:] != ranks_bev[:-1]
interval_starts = torch.where(kept)[0].int()
if len(interval_starts) == 0:
return None, None, None, None, None
interval_lengths = torch.zeros_like(interval_starts)
interval_lengths[:-1] = interval_starts[1:] - interval_starts[:-1]
interval_lengths[-1] = ranks_bev.shape[0] - interval_starts[-1]
bev_feat = bev_pool_v2(depth, feat, ranks_depth, ranks_feat, ranks_bev,
(1, 1, 2, 2, 2), interval_starts, interval_lengths)
loss = torch.sum(bev_feat)
loss.backward()
assert loss == 4.4
grad_depth = np.array([2., 2., 0., 0., 2., 0., 2., 0.])
grad_depth = torch.from_numpy(grad_depth).float()
grad_depth = grad_depth.cuda().view(1, 1, 2, 2, 2)
assert depth.grad.allclose(grad_depth)
grad_feat = np.array([1.0, 1.0, 0.4, 0.4, 0.8, 0.8, 0., 0.])
grad_feat = torch.from_numpy(grad_feat).float().cuda().view(1, 1, 2, 2, 2)
assert feat.grad.allclose(grad_feat)
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <c10/cuda/CUDAGuard.h>
// CUDA function declarations
void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat,
const int* ranks_depth, const int* ranks_feat, const int* ranks_bev,
const int* interval_starts, const int* interval_lengths, float* out);
void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
const int* ranks_bev, const int* interval_starts, const int* interval_lengths,
float* depth_grad, float* feat_grad);
/*
Function: pillar pooling (forward, cuda)
Args:
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
out : output features, FloatTensor[b, c, h_out, w_out]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
Return:
*/
void bev_pool_v2_forward(
const at::Tensor _depth, // (B, N, D, fH, fW)
const at::Tensor _feat, // (B, N, fH, fW, C)
at::Tensor _out, // (B, D_Z, D_Y, D_X, C)
const at::Tensor _ranks_depth, // (N_points, ),
const at::Tensor _ranks_feat, // (N_points, ),
const at::Tensor _ranks_bev, // (N_points, ),
const at::Tensor _interval_lengths, // (N_pillar, )
const at::Tensor _interval_starts // (N_pillar, )
) {
int c = _feat.size(4);
int n_intervals = _interval_lengths.size(0);
const at::cuda::OptionalCUDAGuard device_guard(device_of(_depth));
const float* depth = _depth.data_ptr<float>();
const float* feat = _feat.data_ptr<float>();
const int* ranks_depth = _ranks_depth.data_ptr<int>();
const int* ranks_feat = _ranks_feat.data_ptr<int>();
const int* ranks_bev = _ranks_bev.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
float* out = _out.data_ptr<float>();
bev_pool_v2(
c, n_intervals, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, out
);
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
depth_grad : grad of input depth, FloatTensor[n, d, h, w]
feat_grad : grad of input feature, FloatTensor[n, h, w, c]
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
*/
void bev_pool_v2_backward(
const at::Tensor _out_grad,
at::Tensor _depth_grad,
at::Tensor _feat_grad,
const at::Tensor _depth,
const at::Tensor _feat,
const at::Tensor _ranks_depth,
const at::Tensor _ranks_feat,
const at::Tensor _ranks_bev,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts
) {
int c = _out_grad.size(4);
int n_intervals = _interval_lengths.size(0);
const at::cuda::OptionalCUDAGuard device_guard(device_of(_out_grad));
const float* out_grad = _out_grad.data_ptr<float>();
float* depth_grad = _depth_grad.data_ptr<float>();
float* feat_grad = _feat_grad.data_ptr<float>();
const float* depth = _depth.data_ptr<float>();
const float* feat = _feat.data_ptr<float>();
const int* ranks_depth = _ranks_depth.data_ptr<int>();
const int* ranks_feat = _ranks_feat.data_ptr<int>();
const int* ranks_bev = _ranks_bev.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
bev_pool_v2_grad(
c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("bev_pool_v2_forward", &bev_pool_v2_forward,
"bev_pool_v2_forward");
m.def("bev_pool_v2_backward", &bev_pool_v2_backward,
"bev_pool_v2_backward");
}
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
/*
Function: pillar pooling
Args:
c : number of channels
n_intervals : number of unique points
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__ void bev_pool_v2_kernel(int c, int n_intervals,
const float *__restrict__ depth,
const float *__restrict__ feat,
const int *__restrict__ ranks_depth,
const int *__restrict__ ranks_feat,
const int *__restrict__ ranks_bev,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ out) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 该pillar的cur_c特征对应的索引.
int index = idx / c; // pillar id
int cur_c = idx % c; // channel id
if (index >= n_intervals) return;
int interval_start = interval_starts[index]; // 该pillar的起始索引.
int interval_length = interval_lengths[index]; // 该pillar的包含的点数量.
float psum = 0;
const float* cur_depth;
const float* cur_feat;
for(int i = 0; i < interval_length; i++){
// ranks_depth[interval_start+i]: depth索引, 介于(0, B*N*D*fH*fW-1)之间.
cur_depth = depth + ranks_depth[interval_start+i];
// ranks_feat[interval_start+i]: feature索引, 介于(0, B*N*fH*fW-1)之间.
cur_feat = feat + ranks_feat[interval_start+i] * c + cur_c; //
psum += *cur_feat * *cur_depth; // 聚合该pillar对应的cur_c特征.
}
const int* cur_rank = ranks_bev + interval_start; // 该pillar在BEV grids中对应的索引.
float* cur_out = out + *cur_rank * c + cur_c; // 该cur_c特征对应的索引位置.
*cur_out = psum;
}
/*
Function: pillar pooling backward
Args:
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
depth_grad : gradient of the depth fmap, FloatTensor
feat_grad : gradient of the feature fmap, FloatTensor
*/
__global__ void bev_pool_grad_kernel(int c, int n_intervals,
const float *__restrict__ out_grad,
const float *__restrict__ depth,
const float *__restrict__ feat,
const int *__restrict__ ranks_depth,
const int *__restrict__ ranks_feat,
const int *__restrict__ ranks_bev,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ depth_grad,
float* __restrict__ feat_grad) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 该pillar对应的thread
if (idx >= n_intervals) return;
int interval_start = interval_starts[idx]; // 该pillar的起始索引.
int interval_length = interval_lengths[idx]; // 该pillar的包含的点数量.
const int* cur_rank;
const float* cur_out_grad;
const float* cur_out_grad_start;
const float* cur_feat;
const float* cur_feat_start;
float* cur_depth_grad;
float grad_sum;
for(int i = 0; i < interval_length; i++){
cur_rank = ranks_bev + interval_start + i; // 该pillar在BEV grids中对应的索引.
cur_out_grad_start = out_grad + * cur_rank * c; // pillar feature 的 grad.
cur_feat_start = feat + ranks_feat[interval_start+i] * c;
grad_sum = 0;
for(int cur_c = 0; cur_c < c; cur_c++){
cur_out_grad = cur_out_grad_start + cur_c;
cur_feat = cur_feat_start + cur_c;
grad_sum += *cur_out_grad * *cur_feat;
}
cur_depth_grad = depth_grad + ranks_depth[interval_start+i];
*cur_depth_grad = grad_sum;
}
float* cur_feat_grad;
const float* cur_depth;
for(int cur_c = 0; cur_c < c; cur_c++){
grad_sum = 0;
for(int i = 0; i < interval_length; i++){
cur_rank = ranks_bev + interval_start + i;
cur_out_grad = out_grad + *cur_rank * c + cur_c;
cur_depth = depth + ranks_depth[interval_start+i];
grad_sum += *cur_out_grad * *cur_depth;
}
cur_feat_grad = feat_grad + ranks_feat[interval_start] * c + cur_c ;
* cur_feat_grad = grad_sum;
}
}
void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, const int* ranks_depth,
const int* ranks_feat, const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* out) {
bev_pool_v2_kernel<<<(int)ceil(((double)n_intervals * c / 256)), 256>>>(
c, n_intervals, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, out
);
}
void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* depth_grad, float* feat_grad) {
bev_pool_grad_kernel<<<(int)ceil(((double)n_intervals / 256)), 256>>>(
c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
);
}
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
#include "hip/hip_runtime.h"
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <stdio.h>
#include <stdlib.h>
/*
Function: pillar pooling
Args:
c : number of channels
n_intervals : number of unique points
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
out : output features, FloatTensor[b, d, h, w, c]
*/
__global__ void bev_pool_v2_kernel(int c, int n_intervals,
const float *__restrict__ depth,
const float *__restrict__ feat,
const int *__restrict__ ranks_depth,
const int *__restrict__ ranks_feat,
const int *__restrict__ ranks_bev,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ out) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 该pillar的cur_c特征对应的索引.
int index = idx / c; // pillar id
int cur_c = idx % c; // channel id
if (index >= n_intervals) return;
int interval_start = interval_starts[index]; // 该pillar的起始索引.
int interval_length = interval_lengths[index]; // 该pillar的包含的点数量.
float psum = 0;
const float* cur_depth;
const float* cur_feat;
for(int i = 0; i < interval_length; i++){
// ranks_depth[interval_start+i]: depth索引, 介于(0, B*N*D*fH*fW-1)之间.
cur_depth = depth + ranks_depth[interval_start+i];
// ranks_feat[interval_start+i]: feature索引, 介于(0, B*N*fH*fW-1)之间.
cur_feat = feat + ranks_feat[interval_start+i] * c + cur_c; //
psum += *cur_feat * *cur_depth; // 聚合该pillar对应的cur_c特征.
}
const int* cur_rank = ranks_bev + interval_start; // 该pillar在BEV grids中对应的索引.
float* cur_out = out + *cur_rank * c + cur_c; // 该cur_c特征对应的索引位置.
*cur_out = psum;
}
/*
Function: pillar pooling backward
Args:
c : number of channels
n_intervals : number of unique points
out_grad : gradient of the BEV fmap from top, FloatTensor[b, d, h, w, c]
depth : input depth, FloatTensor[b,n,d,h,w]
feat : input feat, FloatTensor[b,n,h,w,c]
ranks_depth : input index of depth, IntTensor[n]
ranks_feat : input index of feat, IntTensor[n]
ranks_bev : output index, IntTensor[n]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
depth_grad : gradient of the depth fmap, FloatTensor
feat_grad : gradient of the feature fmap, FloatTensor
*/
__global__ void bev_pool_grad_kernel(int c, int n_intervals,
const float *__restrict__ out_grad,
const float *__restrict__ depth,
const float *__restrict__ feat,
const int *__restrict__ ranks_depth,
const int *__restrict__ ranks_feat,
const int *__restrict__ ranks_bev,
const int *__restrict__ interval_starts,
const int *__restrict__ interval_lengths,
float* __restrict__ depth_grad,
float* __restrict__ feat_grad) {
int idx = blockIdx.x * blockDim.x + threadIdx.x; // 该pillar对应的thread
if (idx >= n_intervals) return;
int interval_start = interval_starts[idx]; // 该pillar的起始索引.
int interval_length = interval_lengths[idx]; // 该pillar的包含的点数量.
const int* cur_rank;
const float* cur_out_grad;
const float* cur_out_grad_start;
const float* cur_feat;
const float* cur_feat_start;
float* cur_depth_grad;
float grad_sum;
for(int i = 0; i < interval_length; i++){
cur_rank = ranks_bev + interval_start + i; // 该pillar在BEV grids中对应的索引.
cur_out_grad_start = out_grad + * cur_rank * c; // pillar feature 的 grad.
cur_feat_start = feat + ranks_feat[interval_start+i] * c;
grad_sum = 0;
for(int cur_c = 0; cur_c < c; cur_c++){
cur_out_grad = cur_out_grad_start + cur_c;
cur_feat = cur_feat_start + cur_c;
grad_sum += *cur_out_grad * *cur_feat;
}
cur_depth_grad = depth_grad + ranks_depth[interval_start+i];
*cur_depth_grad = grad_sum;
}
float* cur_feat_grad;
const float* cur_depth;
for(int cur_c = 0; cur_c < c; cur_c++){
grad_sum = 0;
for(int i = 0; i < interval_length; i++){
cur_rank = ranks_bev + interval_start + i;
cur_out_grad = out_grad + *cur_rank * c + cur_c;
cur_depth = depth + ranks_depth[interval_start+i];
grad_sum += *cur_out_grad * *cur_depth;
}
cur_feat_grad = feat_grad + ranks_feat[interval_start] * c + cur_c ;
* cur_feat_grad = grad_sum;
}
}
void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat, const int* ranks_depth,
const int* ranks_feat, const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* out) {
hipLaunchKernelGGL(( bev_pool_v2_kernel), dim3((int)ceil(((double)n_intervals * c / 256))), dim3(256), 0, 0,
c, n_intervals, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, out
);
}
void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
const int* ranks_bev, const int* interval_starts, const int* interval_lengths, float* depth_grad, float* feat_grad) {
hipLaunchKernelGGL(( bev_pool_grad_kernel), dim3((int)ceil(((double)n_intervals / 256))), dim3(256), 0, 0,
c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
);
}
// !!! This is a file automatically generated by hipify!!!
#include <ATen/dtk_macros.h>
// Copyright (c) Phigent Robotics. All rights reserved.
// Reference https://arxiv.org/abs/2211.17111
#include <torch/torch.h>
#include <ATen/hip/impl/HIPGuardImplMasqueradingAsCUDA.h>
// CUDA function declarations
void bev_pool_v2(int c, int n_intervals, const float* depth, const float* feat,
const int* ranks_depth, const int* ranks_feat, const int* ranks_bev,
const int* interval_starts, const int* interval_lengths, float* out);
void bev_pool_v2_grad(int c, int n_intervals, const float* out_grad,
const float* depth, const float* feat, const int* ranks_depth, const int* ranks_feat,
const int* ranks_bev, const int* interval_starts, const int* interval_lengths,
float* depth_grad, float* feat_grad);
/*
Function: pillar pooling (forward, cuda)
Args:
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
out : output features, FloatTensor[b, c, h_out, w_out]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
Return:
*/
void bev_pool_v2_forward(
const at::Tensor _depth, // (B, N, D, fH, fW)
const at::Tensor _feat, // (B, N, fH, fW, C)
at::Tensor _out, // (B, D_Z, D_Y, D_X, C)
const at::Tensor _ranks_depth, // (N_points, ),
const at::Tensor _ranks_feat, // (N_points, ),
const at::Tensor _ranks_bev, // (N_points, ),
const at::Tensor _interval_lengths, // (N_pillar, )
const at::Tensor _interval_starts // (N_pillar, )
) {
int c = _feat.size(4);
int n_intervals = _interval_lengths.size(0);
const at::hip::OptionalHIPGuardMasqueradingAsCUDA device_guard(device_of(_depth));
const float* depth = _depth.data_ptr<float>();
const float* feat = _feat.data_ptr<float>();
const int* ranks_depth = _ranks_depth.data_ptr<int>();
const int* ranks_feat = _ranks_feat.data_ptr<int>();
const int* ranks_bev = _ranks_bev.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
float* out = _out.data_ptr<float>();
bev_pool_v2(
c, n_intervals, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, out
);
}
/*
Function: pillar pooling (backward, cuda)
Args:
out_grad : grad of output bev feature, FloatTensor[b, c, h_out, w_out]
depth_grad : grad of input depth, FloatTensor[n, d, h, w]
feat_grad : grad of input feature, FloatTensor[n, h, w, c]
depth : input depth, FloatTensor[n, d, h, w]
feat : input features, FloatTensor[n, h, w, c]
ranks_depth : depth index of points, IntTensor[n_points]
ranks_feat : feat index of points, IntTensor[n_points]
ranks_bev : output index of points, IntTensor[n_points]
interval_lengths : starting position for pooled point, IntTensor[n_intervals]
interval_starts : how many points in each pooled point, IntTensor[n_intervals]
*/
void bev_pool_v2_backward(
const at::Tensor _out_grad,
at::Tensor _depth_grad,
at::Tensor _feat_grad,
const at::Tensor _depth,
const at::Tensor _feat,
const at::Tensor _ranks_depth,
const at::Tensor _ranks_feat,
const at::Tensor _ranks_bev,
const at::Tensor _interval_lengths,
const at::Tensor _interval_starts
) {
int c = _out_grad.size(4);
int n_intervals = _interval_lengths.size(0);
const at::hip::OptionalHIPGuardMasqueradingAsCUDA device_guard(device_of(_out_grad));
const float* out_grad = _out_grad.data_ptr<float>();
float* depth_grad = _depth_grad.data_ptr<float>();
float* feat_grad = _feat_grad.data_ptr<float>();
const float* depth = _depth.data_ptr<float>();
const float* feat = _feat.data_ptr<float>();
const int* ranks_depth = _ranks_depth.data_ptr<int>();
const int* ranks_feat = _ranks_feat.data_ptr<int>();
const int* ranks_bev = _ranks_bev.data_ptr<int>();
const int* interval_lengths = _interval_lengths.data_ptr<int>();
const int* interval_starts = _interval_starts.data_ptr<int>();
bev_pool_v2_grad(
c, n_intervals, out_grad, depth, feat, ranks_depth, ranks_feat,
ranks_bev, interval_starts, interval_lengths, depth_grad, feat_grad
);
}
PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m.def("bev_pool_v2_forward", &bev_pool_v2_forward,
"bev_pool_v2_forward");
m.def("bev_pool_v2_backward", &bev_pool_v2_backward,
"bev_pool_v2_backward");
}
# Copyright (c) Phigent Robotics. All rights reserved.
from .nearest_assign import nearest_assign
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment