Commit fdeee889 authored by limm's avatar limm
Browse files

release v1.6.1 of mmcv

parent df465820
......@@ -5,27 +5,27 @@
using namespace at;
#ifdef MMCV_WITH_CUDA
void roi_align_rotated_forward_cuda(Tensor features, Tensor rois, Tensor output,
void roi_align_rotated_forward_cuda(Tensor input, Tensor rois, Tensor output,
int pooled_height, int pooled_width,
float spatial_scale, int sample_num,
float spatial_scale, int sampling_ratio,
bool aligned, bool clockwise);
void roi_align_rotated_backward_cuda(Tensor grad_output, Tensor rois,
Tensor bottom_grad, int pooled_height,
int pooled_width, float spatial_scale,
int sample_num, bool aligned,
int sampling_ratio, bool aligned,
bool clockwise);
#endif
void roi_align_rotated_forward_cpu(Tensor features, Tensor rois, Tensor output,
void roi_align_rotated_forward_cpu(Tensor input, Tensor rois, Tensor output,
int pooled_height, int pooled_width,
float spatial_scale, int sample_num,
float spatial_scale, int sampling_ratio,
bool aligned, bool clockwise);
void roi_align_rotated_backward_cpu(Tensor grad_output, Tensor rois,
Tensor bottom_grad, int pooled_height,
int pooled_width, float spatial_scale,
int sample_num, bool aligned,
int sampling_ratio, bool aligned,
bool clockwise);
#endif // ROI_ALIGN_ROTATED_PYTORCH_H
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
Tensor argmax, int pooled_height,
int pooled_width, float spatial_scale);
void ROIPoolBackwardCUDAKernelLauncher(Tensor grad_output, Tensor rois,
Tensor argmax, Tensor grad_input,
int pooled_height, int pooled_width,
float spatial_scale);
void roi_pool_forward_cuda(Tensor input, Tensor rois, Tensor output,
void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
Tensor argmax, int pooled_height, int pooled_width,
float spatial_scale) {
ROIPoolForwardCUDAKernelLauncher(input, rois, output, argmax, pooled_height,
pooled_width, spatial_scale);
DISPATCH_DEVICE_IMPL(roi_pool_forward_impl, input, rois, output, argmax,
pooled_height, pooled_width, spatial_scale);
}
void roi_pool_backward_cuda(Tensor grad_output, Tensor rois, Tensor argmax,
void roi_pool_backward_impl(Tensor grad_output, Tensor rois, Tensor argmax,
Tensor grad_input, int pooled_height,
int pooled_width, float spatial_scale) {
ROIPoolBackwardCUDAKernelLauncher(grad_output, rois, argmax, grad_input,
pooled_height, pooled_width, spatial_scale);
DISPATCH_DEVICE_IMPL(roi_pool_backward_impl, grad_output, rois, argmax,
grad_input, pooled_height, pooled_width, spatial_scale);
}
#endif
void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
int pooled_height, int pooled_width,
float spatial_scale) {
if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(output);
CHECK_CUDA_INPUT(argmax);
roi_pool_forward_cuda(input, rois, output, argmax, pooled_height,
pooled_width, spatial_scale);
#else
AT_ERROR("RoIPool is not compiled with GPU support");
#endif
} else {
AT_ERROR("RoIPool is not implemented on CPU");
}
roi_pool_forward_impl(input, rois, output, argmax, pooled_height,
pooled_width, spatial_scale);
}
void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
Tensor grad_input, int pooled_height, int pooled_width,
float spatial_scale) {
if (grad_output.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(argmax);
CHECK_CUDA_INPUT(grad_input);
roi_pool_backward_cuda(grad_output, rois, argmax, grad_input, pooled_height,
pooled_width, spatial_scale);
#else
AT_ERROR("RoIPool is not compiled with GPU support");
#endif
} else {
AT_ERROR("RoIPool is not implemented on CPU");
}
roi_pool_backward_impl(grad_output, rois, argmax, grad_input, pooled_height,
pooled_width, spatial_scale);
}
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void RoiawarePool3dForwardCUDAKernelLauncher(
int boxes_num, int pts_num, int channels, int max_pts_each_voxel, int out_x,
int out_y, int out_z, const Tensor rois, const Tensor pts,
const Tensor pts_feature, Tensor argmax, Tensor pts_idx_of_voxels,
Tensor pooled_features, int pool_method);
void roiaware_pool3d_forward_cuda(int boxes_num, int pts_num, int channels,
void roiaware_pool3d_forward_impl(int boxes_num, int pts_num, int channels,
int max_pts_each_voxel, int out_x, int out_y,
int out_z, const Tensor rois,
const Tensor pts, const Tensor pts_feature,
Tensor argmax, Tensor pts_idx_of_voxels,
Tensor pooled_features, int pool_method) {
RoiawarePool3dForwardCUDAKernelLauncher(
boxes_num, pts_num, channels, max_pts_each_voxel, out_x, out_y, out_z,
rois, pts, pts_feature, argmax, pts_idx_of_voxels, pooled_features,
pool_method);
};
void RoiawarePool3dBackwardCUDAKernelLauncher(
int boxes_num, int out_x, int out_y, int out_z, int channels,
int max_pts_each_voxel, const Tensor pts_idx_of_voxels, const Tensor argmax,
const Tensor grad_out, Tensor grad_in, int pool_method);
DISPATCH_DEVICE_IMPL(roiaware_pool3d_forward_impl, boxes_num, pts_num,
channels, max_pts_each_voxel, out_x, out_y, out_z, rois,
pts, pts_feature, argmax, pts_idx_of_voxels,
pooled_features, pool_method);
}
void roiaware_pool3d_backward_cuda(int boxes_num, int out_x, int out_y,
void roiaware_pool3d_backward_impl(int boxes_num, int out_x, int out_y,
int out_z, int channels,
int max_pts_each_voxel,
const Tensor pts_idx_of_voxels,
const Tensor argmax, const Tensor grad_out,
Tensor grad_in, int pool_method) {
RoiawarePool3dBackwardCUDAKernelLauncher(
boxes_num, out_x, out_y, out_z, channels, max_pts_each_voxel,
pts_idx_of_voxels, argmax, grad_out, grad_in, pool_method);
};
#endif
DISPATCH_DEVICE_IMPL(roiaware_pool3d_backward_impl, boxes_num, out_x, out_y,
out_z, channels, max_pts_each_voxel, pts_idx_of_voxels,
argmax, grad_out, grad_in, pool_method);
}
void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
Tensor argmax, Tensor pts_idx_of_voxels,
......@@ -47,36 +35,20 @@ void roiaware_pool3d_forward(Tensor rois, Tensor pts, Tensor pts_feature,
// params pts_idx_of_voxels: (N, out_x, out_y, out_z, max_pts_each_voxel)
// params pooled_features: (N, out_x, out_y, out_z, C)
// params pool_method: 0: max_pool 1: avg_pool
if (pts.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(pts);
CHECK_CUDA_INPUT(pts_feature);
CHECK_CUDA_INPUT(argmax);
CHECK_CUDA_INPUT(pts_idx_of_voxels);
CHECK_CUDA_INPUT(pooled_features);
int boxes_num = rois.size(0);
int pts_num = pts.size(0);
int channels = pts_feature.size(1);
int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
assert((out_x < 256) && (out_y < 256) &&
(out_z < 256)); // we encode index with 8bit
int boxes_num = rois.size(0);
int pts_num = pts.size(0);
int channels = pts_feature.size(1);
int max_pts_each_voxel =
pts_idx_of_voxels.size(4); // index 0 is the counter
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
assert((out_x < 256) && (out_y < 256) &&
(out_z < 256)); // we encode index with 8bit
roiaware_pool3d_forward_cuda(boxes_num, pts_num, channels,
max_pts_each_voxel, out_x, out_y, out_z, rois,
pts, pts_feature, argmax, pts_idx_of_voxels,
pooled_features, pool_method);
#else
AT_ERROR("roiaware_pool3d is not compiled with GPU support");
#endif
} else {
AT_ERROR("roiaware_pool3d is not implemented on CPU");
}
roiaware_pool3d_forward_impl(boxes_num, pts_num, channels, max_pts_each_voxel,
out_x, out_y, out_z, rois, pts, pts_feature,
argmax, pts_idx_of_voxels, pooled_features,
pool_method);
}
void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
......@@ -87,29 +59,14 @@ void roiaware_pool3d_backward(Tensor pts_idx_of_voxels, Tensor argmax,
// params grad_out: (N, out_x, out_y, out_z, C)
// params grad_in: (npoints, C), return value
// params pool_method: 0: max_pool 1: avg_pool
int boxes_num = pts_idx_of_voxels.size(0);
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
int max_pts_each_voxel = pts_idx_of_voxels.size(4); // index 0 is the counter
int channels = grad_out.size(4);
if (grad_in.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(pts_idx_of_voxels);
CHECK_CUDA_INPUT(argmax);
CHECK_CUDA_INPUT(grad_out);
CHECK_CUDA_INPUT(grad_in);
int boxes_num = pts_idx_of_voxels.size(0);
int out_x = pts_idx_of_voxels.size(1);
int out_y = pts_idx_of_voxels.size(2);
int out_z = pts_idx_of_voxels.size(3);
int max_pts_each_voxel =
pts_idx_of_voxels.size(4); // index 0 is the counter
int channels = grad_out.size(4);
roiaware_pool3d_backward_cuda(boxes_num, out_x, out_y, out_z, channels,
max_pts_each_voxel, pts_idx_of_voxels, argmax,
grad_out, grad_in, pool_method);
#else
AT_ERROR("roiaware_pool3d is not compiled with GPU support");
#endif
} else {
AT_ERROR("roiaware_pool3d is not implemented on CPU");
}
roiaware_pool3d_backward_impl(boxes_num, out_x, out_y, out_z, channels,
max_pts_each_voxel, pts_idx_of_voxels, argmax,
grad_out, grad_in, pool_method);
}
......@@ -7,24 +7,18 @@ All Rights Reserved 2018.
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void RoIPointPool3dForwardCUDAKernelLauncher(
int batch_size, int pts_num, int boxes_num, int feature_in_len,
int sampled_pts_num, const Tensor xyz, const Tensor boxes3d,
const Tensor pts_feature, Tensor pooled_features, Tensor pooled_empty_flag);
void roipoint_pool3d_forward_cuda(int batch_size, int pts_num, int boxes_num,
void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
int feature_in_len, int sampled_pts_num,
const Tensor xyz, const Tensor boxes3d,
const Tensor pts_feature,
Tensor pooled_features,
Tensor pooled_empty_flag) {
RoIPointPool3dForwardCUDAKernelLauncher(
batch_size, pts_num, boxes_num, feature_in_len, sampled_pts_num, xyz,
boxes3d, pts_feature, pooled_features, pooled_empty_flag);
};
#endif
DISPATCH_DEVICE_IMPL(roipoint_pool3d_forward_impl, batch_size, pts_num,
boxes_num, feature_in_len, sampled_pts_num, xyz, boxes3d,
pts_feature, pooled_features, pooled_empty_flag);
}
void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
Tensor pooled_features, Tensor pooled_empty_flag) {
......@@ -33,28 +27,13 @@ void roipoint_pool3d_forward(Tensor xyz, Tensor boxes3d, Tensor pts_feature,
// params pts_feature: (B, N, C)
// params pooled_features: (B, M, 512, 3+C)
// params pooled_empty_flag: (B, M)
int batch_size = xyz.size(0);
int pts_num = xyz.size(1);
int boxes_num = boxes3d.size(1);
int feature_in_len = pts_feature.size(2);
int sampled_pts_num = pooled_features.size(2);
if (xyz.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(xyz);
CHECK_CUDA_INPUT(boxes3d);
CHECK_CUDA_INPUT(pts_feature);
CHECK_CUDA_INPUT(pooled_features);
CHECK_CUDA_INPUT(pooled_empty_flag);
int batch_size = xyz.size(0);
int pts_num = xyz.size(1);
int boxes_num = boxes3d.size(1);
int feature_in_len = pts_feature.size(2);
int sampled_pts_num = pooled_features.size(2);
roipoint_pool3d_forward_cuda(batch_size, pts_num, boxes_num, feature_in_len,
sampled_pts_num, xyz, boxes3d, pts_feature,
pooled_features, pooled_empty_flag);
#else
AT_ERROR("roipoint_pool3d is not compiled with GPU support");
#endif
} else {
AT_ERROR("roipoint_pool3d is not implemented on CPU");
}
roipoint_pool3d_forward_impl(batch_size, pts_num, boxes_num, feature_in_len,
sampled_pts_num, xyz, boxes3d, pts_feature,
pooled_features, pooled_empty_flag);
}
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/SJTU-Thinklab-Det/r3det-on-mmdetection/blob/master/mmdet/ops/fr/src/feature_refine_cuda.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void rotated_feature_align_forward_impl(const Tensor features,
const Tensor best_bboxes,
const float spatial_scale,
const int points, Tensor output) {
DISPATCH_DEVICE_IMPL(rotated_feature_align_forward_impl, features,
best_bboxes, spatial_scale, points, output);
}
void rotated_feature_align_backward_impl(const Tensor top_grad,
const Tensor best_bboxes,
const float spatial_scale,
const int points, Tensor bottom_grad) {
DISPATCH_DEVICE_IMPL(rotated_feature_align_backward_impl, top_grad,
best_bboxes, spatial_scale, points, bottom_grad);
}
void rotated_feature_align_forward(const Tensor features,
const Tensor best_bboxes, Tensor output,
const float spatial_scale,
const int points) {
rotated_feature_align_forward_impl(features, best_bboxes, spatial_scale,
points, output);
}
void rotated_feature_align_backward(const Tensor top_grad,
const Tensor best_bboxes,
Tensor bottom_grad,
const float spatial_scale,
const int points) {
rotated_feature_align_backward_impl(top_grad, best_bboxes, spatial_scale,
points, bottom_grad);
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "rotated_feature_align_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void rotated_feature_align_forward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
float spatial_scale;
int points;
SSAttrs(attr)
.get<float>("spatial_scale", spatial_scale)
.get<int>("points", points)
.done();
auto features = buildATensor(ctx, ins[0]);
auto best_bboxes = buildATensor(ctx, ins[1]);
auto output = buildATensor(ctx, outs[0]);
rotated_feature_align_forward(features, best_bboxes, output, spatial_scale,
points);
}
void rotated_feature_align_backward_cuda_parrots(
CudaContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
float spatial_scale;
int points;
SSAttrs(attr)
.get<float>("spatial_scale", spatial_scale)
.get<int>("points", points)
.done();
auto grad_output = buildATensor(ctx, ins[0]);
auto best_bboxes = buildATensor(ctx, ins[1]);
auto grad_input = buildATensor(ctx, outs[0]);
rotated_feature_align_backward(grad_output, best_bboxes, grad_input,
spatial_scale, points);
}
void rotated_feature_align_forward_cpu_parrots(
HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
float spatial_scale;
int points;
SSAttrs(attr)
.get<float>("spatial_scale", spatial_scale)
.get<int>("points", points)
.done();
auto features = buildATensor(ctx, ins[0]);
auto best_bboxes = buildATensor(ctx, ins[1]);
auto output = buildATensor(ctx, outs[0]);
rotated_feature_align_forward(features, best_bboxes, output, spatial_scale,
points);
}
#endif
void rotated_feature_align_backward_cpu_parrots(
HostContext& ctx, const SSElement& attr, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
float spatial_scale;
int points;
SSAttrs(attr)
.get<float>("spatial_scale", spatial_scale)
.get<int>("points", points)
.done();
auto grad_output = buildATensor(ctx, ins[0]);
auto best_bboxes = buildATensor(ctx, ins[1]);
auto grad_input = buildATensor(ctx, outs[0]);
rotated_feature_align_backward(grad_output, best_bboxes, grad_input,
spatial_scale, points);
}
PARROTS_EXTENSION_REGISTER(rotated_feature_align_forward)
.attr("spatial_scale")
.attr("points")
.input(2)
.output(1)
.apply(rotated_feature_align_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
.apply(rotated_feature_align_forward_cuda_parrots)
#endif
.done();
PARROTS_EXTENSION_REGISTER(rotated_feature_align_backward)
.attr("spatial_scale")
.attr("points")
.input(2)
.output(1)
.apply(rotated_feature_align_forward_cpu_parrots)
#ifdef MMCV_WITH_CUDA
.apply(rotated_feature_align_backward_cuda_parrots)
#endif
.done();
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ROTATED_FEATURE_ALIGN_PYTORCH_H
#define ROTATED_FEATURE_ALIGN_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void rotated_feature_align_forward(const Tensor features,
const Tensor best_bboxes, Tensor output,
const float spatial_scale, const int points);
void rotated_feature_align_backward(const Tensor top_grad,
const Tensor best_bboxes,
Tensor bottom_grad,
const float spatial_scale,
const int points);
#endif // ROTATED_FEATURE_ALIGN_PYTORCH_H
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean);
void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
Tensor var);
void SyncBNForwardOutputCUDAKernelLauncher(
const Tensor input, const Tensor mean, const Tensor var,
Tensor running_mean, Tensor running_var, const Tensor weight,
const Tensor bias, Tensor norm, Tensor std, Tensor output, float eps,
float momentum, int group_size);
void SyncBNBackwardParamCUDAKernelLauncher(const Tensor grad_output,
const Tensor norm,
Tensor grad_weight,
Tensor grad_bias);
void SyncBNBackwardDataCUDAKernelLauncher(const Tensor grad_output,
const Tensor weight,
const Tensor grad_weight,
const Tensor grad_bias,
const Tensor norm, const Tensor std,
Tensor grad_input);
void sync_bn_forward_mean_cuda(const Tensor input, Tensor mean) {
SyncBNForwardMeanCUDAKernelLauncher(input, mean);
void sync_bn_forward_mean_impl(const Tensor input, Tensor mean) {
DISPATCH_DEVICE_IMPL(sync_bn_forward_mean_impl, input, mean);
}
void sync_bn_forward_var_cuda(const Tensor input, const Tensor mean,
void sync_bn_forward_var_impl(const Tensor input, const Tensor mean,
Tensor var) {
SyncBNForwardVarCUDAKernelLauncher(input, mean, var);
DISPATCH_DEVICE_IMPL(sync_bn_forward_var_impl, input, mean, var);
}
void sync_bn_forward_output_cuda(const Tensor input, const Tensor mean,
void sync_bn_forward_output_impl(const Tensor input, const Tensor mean,
const Tensor var, Tensor running_mean,
Tensor running_var, const Tensor weight,
const Tensor bias, Tensor norm, Tensor std,
Tensor output, float eps, float momentum,
int group_size) {
SyncBNForwardOutputCUDAKernelLauncher(input, mean, var, running_mean,
running_var, weight, bias, norm, std,
output, eps, momentum, group_size);
DISPATCH_DEVICE_IMPL(sync_bn_forward_output_impl, input, mean, var,
running_mean, running_var, weight, bias, norm, std,
output, eps, momentum, group_size);
}
void sync_bn_backward_param_cuda(const Tensor grad_output, const Tensor norm,
void sync_bn_backward_param_impl(const Tensor grad_output, const Tensor norm,
Tensor grad_weight, Tensor grad_bias) {
SyncBNBackwardParamCUDAKernelLauncher(grad_output, norm, grad_weight,
grad_bias);
DISPATCH_DEVICE_IMPL(sync_bn_backward_param_impl, grad_output, norm,
grad_weight, grad_bias);
}
void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
void sync_bn_backward_data_impl(const Tensor grad_output, const Tensor weight,
const Tensor grad_weight,
const Tensor grad_bias, const Tensor norm,
const Tensor std, Tensor grad_input) {
SyncBNBackwardDataCUDAKernelLauncher(grad_output, weight, grad_weight,
grad_bias, norm, std, grad_input);
DISPATCH_DEVICE_IMPL(sync_bn_backward_data_impl, grad_output, weight,
grad_weight, grad_bias, norm, std, grad_input);
}
#endif
void sync_bn_forward_mean(const Tensor input, Tensor mean) {
if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(mean);
sync_bn_forward_mean_cuda(input, mean);
#else
AT_ERROR("SyncBatchNorm is not compiled with GPU support");
#endif
} else {
AT_ERROR("SyncBatchNorm is not implemented on CPU");
}
sync_bn_forward_mean_impl(input, mean);
}
void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) {
if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(mean);
CHECK_CUDA_INPUT(var);
sync_bn_forward_var_cuda(input, mean, var);
#else
AT_ERROR("SyncBatchNorm is not compiled with GPU support");
#endif
} else {
AT_ERROR("SyncBatchNorm is not implemented on CPU");
}
sync_bn_forward_var_impl(input, mean, var);
}
void sync_bn_forward_output(const Tensor input, const Tensor mean,
......@@ -95,65 +50,20 @@ void sync_bn_forward_output(const Tensor input, const Tensor mean,
Tensor running_var, Tensor norm, Tensor std,
Tensor output, float eps, float momentum,
int group_size) {
if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(mean);
CHECK_CUDA_INPUT(var);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias);
CHECK_CUDA_INPUT(running_mean);
CHECK_CUDA_INPUT(running_var);
CHECK_CUDA_INPUT(norm);
CHECK_CUDA_INPUT(std);
CHECK_CUDA_INPUT(output);
sync_bn_forward_output_cuda(input, mean, var, running_mean, running_var,
weight, bias, norm, std, output, eps, momentum,
group_size);
#else
AT_ERROR("SyncBatchNorm is not compiled with GPU support");
#endif
} else {
AT_ERROR("SyncBatchNorm is not implemented on CPU");
}
sync_bn_forward_output_impl(input, mean, var, running_mean, running_var,
weight, bias, norm, std, output, eps, momentum,
group_size);
}
void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
Tensor grad_weight, Tensor grad_bias) {
if (grad_output.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(norm);
CHECK_CUDA_INPUT(grad_weight);
CHECK_CUDA_INPUT(grad_bias);
sync_bn_backward_param_cuda(grad_output, norm, grad_weight, grad_bias);
#else
AT_ERROR("SyncBatchNorm is not compiled with GPU support");
#endif
} else {
AT_ERROR("SyncBatchNorm is not implemented on CPU");
}
sync_bn_backward_param_impl(grad_output, norm, grad_weight, grad_bias);
}
void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
const Tensor grad_weight, const Tensor grad_bias,
const Tensor norm, const Tensor std,
Tensor grad_input) {
if (grad_output.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(grad_weight);
CHECK_CUDA_INPUT(grad_bias);
CHECK_CUDA_INPUT(norm);
CHECK_CUDA_INPUT(std);
CHECK_CUDA_INPUT(grad_input);
sync_bn_backward_data_cuda(grad_output, weight, grad_weight, grad_bias,
norm, std, grad_input);
#else
AT_ERROR("SyncBatchNorm is not compiled with GPU support");
#endif
} else {
AT_ERROR("SyncBatchNorm is not implemented on CPU");
}
sync_bn_backward_data_impl(grad_output, weight, grad_weight, grad_bias, norm,
std, grad_input);
}
......@@ -2,60 +2,32 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void ThreeInterpolateForwardCUDAKernelLauncher(int b, int c, int m, int n,
const Tensor points,
const Tensor idx,
const Tensor weight, Tensor out);
void three_interpolate_forward_cuda(int b, int c, int m, int n,
void three_interpolate_forward_impl(int b, int c, int m, int n,
const Tensor points, const Tensor idx,
const Tensor weight, Tensor out) {
ThreeInterpolateForwardCUDAKernelLauncher(b, c, m, n, points, idx, weight,
out);
};
void ThreeInterpolateBackwardCUDAKernelLauncher(int b, int c, int n, int m,
const Tensor grad_out,
const Tensor idx,
const Tensor weight,
Tensor grad_points);
DISPATCH_DEVICE_IMPL(three_interpolate_forward_impl, b, c, m, n, points, idx,
weight, out);
}
void three_interpolate_backward_cuda(int b, int c, int n, int m,
void three_interpolate_backward_impl(int b, int c, int n, int m,
const Tensor grad_out, const Tensor idx,
const Tensor weight, Tensor grad_points) {
ThreeInterpolateBackwardCUDAKernelLauncher(b, c, n, m, grad_out, idx, weight,
grad_points);
};
#endif
DISPATCH_DEVICE_IMPL(three_interpolate_backward_impl, b, c, n, m, grad_out,
idx, weight, grad_points);
}
void three_interpolate_forward(Tensor points_tensor, Tensor idx_tensor,
Tensor weight_tensor, Tensor out_tensor, int b,
int c, int m, int n) {
if (points_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_interpolate_forward_cuda(b, c, m, n, points_tensor, idx_tensor,
weight_tensor, out_tensor);
#else
AT_ERROR("three_interpolate is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_interpolate is not implemented on CPU");
}
three_interpolate_forward_impl(b, c, m, n, points_tensor, idx_tensor,
weight_tensor, out_tensor);
}
void three_interpolate_backward(Tensor grad_out_tensor, Tensor idx_tensor,
Tensor weight_tensor, Tensor grad_points_tensor,
int b, int c, int n, int m) {
if (grad_out_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_interpolate_backward_cuda(b, c, n, m, grad_out_tensor, idx_tensor,
weight_tensor, grad_points_tensor);
#else
AT_ERROR("three_interpolate is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_interpolate is not implemented on CPU");
}
three_interpolate_backward_impl(b, c, n, m, grad_out_tensor, idx_tensor,
weight_tensor, grad_points_tensor);
}
......@@ -2,29 +2,17 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/interpolate.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void ThreeNNForwardCUDAKernelLauncher(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2,
Tensor idx);
void three_nn_forward_cuda(int b, int n, int m, const Tensor unknown,
void three_nn_forward_impl(int b, int n, int m, const Tensor unknown,
const Tensor known, Tensor dist2, Tensor idx) {
ThreeNNForwardCUDAKernelLauncher(b, n, m, unknown, known, dist2, idx);
};
#endif
DISPATCH_DEVICE_IMPL(three_nn_forward_impl, b, n, m, unknown, known, dist2,
idx);
}
void three_nn_forward(Tensor unknown_tensor, Tensor known_tensor,
Tensor dist2_tensor, Tensor idx_tensor, int b, int n,
int m) {
if (unknown_tensor.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
three_nn_forward_cuda(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
idx_tensor);
#else
AT_ERROR("three_nn is not compiled with GPU support");
#endif
} else {
AT_ERROR("three_nn is not implemented on CPU");
}
three_nn_forward_impl(b, n, m, unknown_tensor, known_tensor, dist2_tensor,
idx_tensor);
}
// Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
void TINShiftForwardCUDAKernelLauncher(Tensor input, Tensor shift,
Tensor output);
void TINShiftBackwardCUDAKernelLauncher(Tensor grad_output, Tensor shift,
Tensor grad_input);
void tin_shift_forward_cuda(Tensor input, Tensor shift, Tensor output) {
TINShiftForwardCUDAKernelLauncher(input, shift, output);
void tin_shift_forward_impl(Tensor input, Tensor shift, Tensor output) {
DISPATCH_DEVICE_IMPL(tin_shift_forward_impl, input, shift, output);
}
void tin_shift_backward_cuda(Tensor grad_output, Tensor shift,
void tin_shift_backward_impl(Tensor grad_output, Tensor shift,
Tensor grad_input) {
TINShiftBackwardCUDAKernelLauncher(grad_output, shift, grad_input);
DISPATCH_DEVICE_IMPL(tin_shift_backward_impl, grad_output, shift, grad_input);
}
#endif
void tin_shift_forward(Tensor input, Tensor shift, Tensor output) {
if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(shift);
CHECK_CUDA_INPUT(output);
tin_shift_forward_cuda(input, shift, output);
#else
AT_ERROR("TINShift is not compiled with GPU support");
#endif
} else {
AT_ERROR("TINShift is not implemented on CPU");
}
tin_shift_forward_impl(input, shift, output);
}
void tin_shift_backward(Tensor grad_output, Tensor shift, Tensor grad_input) {
if (grad_output.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(shift);
CHECK_CUDA_INPUT(grad_input);
tin_shift_backward_cuda(grad_output, shift, grad_input);
#else
AT_ERROR("TINShift is not compiled with GPU support");
#endif
} else {
AT_ERROR("TINShift is not implemented on CPU");
}
tin_shift_backward_impl(grad_output, shift, grad_input);
}
// Copyright (c) OpenMMLab. All rights reserved
// from
// Modified from
// https://github.com/rosinality/stylegan2-pytorch/blob/master/op/upfirdn2d.cpp
#include "pytorch_cpp_helper.hpp"
#ifdef MMCV_WITH_CUDA
torch::Tensor upfirdn2d_op(const torch::Tensor &input,
const torch::Tensor &kernel, int up_x, int up_y,
int down_x, int down_y, int pad_x0, int pad_x1,
int pad_y0, int pad_y1);
/*
Copyright (c) 2021, NVIDIA Corporation. All rights reserved.
NVIDIA Source Code License for StyleGAN2 with Adaptive Discriminator
Augmentation (ADA)
=======================================================================
1. Definitions
"Licensor" means any person or entity that distributes its Work.
"Software" means the original work of authorship made available under
this License.
"Work" means the Software and any additions to or derivative works of
the Software that are made available under this License.
The terms "reproduce," "reproduction," "derivative works," and
"distribution" have the meaning as provided under U.S. copyright law;
provided, however, that for the purposes of this License, derivative
works shall not include works that remain separable from, or merely
link (or bind by name) to the interfaces of, the Work.
Works, including the Software, are "made available" under this License
by including in or with the Work either (a) a copyright notice
referencing the applicability of this License to the Work, or (b) a
copy of this License.
2. License Grants
2.1 Copyright Grant. Subject to the terms and conditions of this
License, each Licensor grants to you a perpetual, worldwide,
non-exclusive, royalty-free, copyright license to reproduce,
prepare derivative works of, publicly display, publicly perform,
sublicense and distribute its Work and any resulting derivative
works in any form.
3. Limitations
3.1 Redistribution. You may reproduce or distribute the Work only
if (a) you do so under this License, (b) you include a complete
copy of this License with your distribution, and (c) you retain
without modification any copyright, patent, trademark, or
attribution notices that are present in the Work.
3.2 Derivative Works. You may specify that additional or different
terms apply to the use, reproduction, and distribution of your
derivative works of the Work ("Your Terms") only if (a) Your Terms
provide that the use limitation in Section 3.3 applies to your
derivative works, and (b) you identify the specific derivative
works that are subject to Your Terms. Notwithstanding Your Terms,
this License (including the redistribution requirements in Section
3.1) will continue to apply to the Work itself.
#endif
3.3 Use Limitation. The Work and any derivative works thereof only
may be used or intended for use non-commercially. Notwithstanding
the foregoing, NVIDIA and its affiliates may use the Work and any
derivative works commercially. As used herein, "non-commercially"
means for research or evaluation purposes only.
3.4 Patent Claims. If you bring or threaten to bring a patent claim
against any Licensor (including any claim, cross-claim or
counterclaim in a lawsuit) to enforce any patents that you allege
are infringed by any Work, then your rights under this License from
such Licensor (including the grant in Section 2.1) will terminate
immediately.
3.5 Trademarks. This License does not grant any rights to use any
Licensor’s or its affiliates’ names, logos, or trademarks, except
as necessary to reproduce the notices described in this License.
3.6 Termination. If you violate any term of this License, then your
rights under this License (including the grant in Section 2.1) will
terminate immediately.
4. Disclaimer of Warranty.
THE WORK IS PROVIDED "AS IS" WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR
NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER
THIS LICENSE.
5. Limitation of Liability.
EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL
THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE
SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT,
INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF
OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK
(INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION,
LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER
COMMERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF
THE POSSIBILITY OF SUCH DAMAGES.
=======================================================================
*/
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
torch::Tensor upfirdn2d_op_impl(const torch::Tensor& input,
const torch::Tensor& kernel, int up_x, int up_y,
int down_x, int down_y, int pad_x0, int pad_x1,
int pad_y0, int pad_y1) {
return DISPATCH_DEVICE_IMPL(upfirdn2d_op_impl, input, kernel, up_x, up_y,
down_x, down_y, pad_x0, pad_x1, pad_y0, pad_y1);
}
torch::Tensor upfirdn2d(const torch::Tensor &input, const torch::Tensor &kernel,
torch::Tensor upfirdn2d(const torch::Tensor& input, const torch::Tensor& kernel,
int up_x, int up_y, int down_x, int down_y, int pad_x0,
int pad_x1, int pad_y0, int pad_y1) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA(input);
CHECK_CUDA(kernel);
return upfirdn2d_op(input, kernel, up_x, up_y, down_x, down_y, pad_x0, pad_x1,
pad_y0, pad_y1);
#else
AT_ERROR("UpFirDn2d is not compiled with GPU support");
#endif
return upfirdn2d_op_impl(input, kernel, up_x, up_y, down_x, down_y, pad_x0,
pad_x1, pad_y0, pad_y1);
}
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
#ifdef MMCV_WITH_CUDA
int HardVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const int max_points,
const int max_voxels, const int NDim = 3);
int hard_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &voxels,
int hard_voxelize_forward_impl(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3) {
return HardVoxelizeForwardCUDAKernelLauncher(
points, voxels, coors, num_points_per_voxel, voxel_size, coors_range,
max_points, max_voxels, NDim);
};
return DISPATCH_DEVICE_IMPL(hard_voxelize_forward_impl, points, voxels, coors,
num_points_per_voxel, voxel_size, coors_range,
max_points, max_voxels, NDim);
}
void DynamicVoxelizeForwardCUDAKernelLauncher(
const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size, const std::vector<float> coors_range,
const int NDim = 3);
int nondeterministic_hard_voxelize_forward_impl(
const at::Tensor &points, at::Tensor &voxels, at::Tensor &coors,
at::Tensor &num_points_per_voxel, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const int max_points,
const int max_voxels, const int NDim = 3) {
return DISPATCH_DEVICE_IMPL(nondeterministic_hard_voxelize_forward_impl,
points, voxels, coors, num_points_per_voxel,
voxel_size, coors_range, max_points, max_voxels,
NDim);
}
void dynamic_voxelize_forward_cuda(const at::Tensor &points, at::Tensor &coors,
void dynamic_voxelize_forward_impl(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3) {
DynamicVoxelizeForwardCUDAKernelLauncher(points, coors, voxel_size,
coors_range, NDim);
};
#endif
int hard_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &voxels,
at::Tensor &coors,
at::Tensor &num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3);
void dynamic_voxelize_forward_cpu(const at::Tensor &points, at::Tensor &coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3);
DISPATCH_DEVICE_IMPL(dynamic_voxelize_forward_impl, points, coors, voxel_size,
coors_range, NDim);
}
void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
const at::Tensor &coors_range, at::Tensor &voxels,
at::Tensor &coors, at::Tensor &num_points_per_voxel,
at::Tensor &voxel_num, const int max_points,
const int max_voxels, const int NDim = 3) {
const int max_voxels, const int NDim = 3,
const bool deterministic = true) {
int64_t *voxel_num_data = voxel_num.data_ptr<int64_t>();
std::vector<float> voxel_size_v(
voxel_size.data_ptr<float>(),
......@@ -60,18 +47,13 @@ void hard_voxelize_forward(const at::Tensor &points,
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
*voxel_num_data = hard_voxelize_forward_cuda(
if (deterministic) {
*voxel_num_data = hard_voxelize_forward_impl(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
#else
AT_ERROR("hard_voxelize is not compiled with GPU support");
#endif
} else {
*voxel_num_data = hard_voxelize_forward_cpu(
*voxel_num_data = nondeterministic_hard_voxelize_forward_impl(
points, voxels, coors, num_points_per_voxel, voxel_size_v,
coors_range_v, max_points, max_voxels, NDim);
}
......@@ -87,17 +69,6 @@ void dynamic_voxelize_forward(const at::Tensor &points,
std::vector<float> coors_range_v(
coors_range.data_ptr<float>(),
coors_range.data_ptr<float>() + coors_range.numel());
if (points.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(points);
dynamic_voxelize_forward_cuda(points, coors, voxel_size_v, coors_range_v,
NDim);
#else
AT_ERROR("dynamic_voxelize is not compiled with GPU support");
#endif
} else {
dynamic_voxelize_forward_cpu(points, coors, voxel_size_v, coors_range_v,
NDim);
}
dynamic_voxelize_forward_impl(points, coors, voxel_size_v, coors_range_v,
NDim);
}
// Copyright (c) OpenMMLab. All rights reserved.
#include "pytorch_cpp_helper.hpp"
template <typename T, typename T_int>
void dynamic_voxelize_forward_cpu_kernel(
const torch::TensorAccessor<T, 2> points,
torch::TensorAccessor<T_int, 2> coors, const std::vector<float> voxel_size,
const std::vector<float> coors_range, const std::vector<int> grid_size,
const int num_points, const int num_features, const int NDim) {
const int ndim_minus_1 = NDim - 1;
bool failed = false;
// int coor[NDim];
int* coor = new int[NDim]();
int c;
for (int i = 0; i < num_points; ++i) {
failed = false;
for (int j = 0; j < NDim; ++j) {
c = floor((points[i][j] - coors_range[j]) / voxel_size[j]);
// necessary to rm points out of range
if ((c < 0 || c >= grid_size[j])) {
failed = true;
break;
}
coor[ndim_minus_1 - j] = c;
}
if (failed)
memset(&coors[i][0], -1, NDim * sizeof(T_int));
else
memcpy(&coors[i][0], &coor[0], NDim * sizeof(T_int));
}
delete[] coor;
}
template <typename T, typename T_int>
void hard_voxelize_forward_cpu_kernel(
const torch::TensorAccessor<T, 2> points,
torch::TensorAccessor<T, 3> voxels, torch::TensorAccessor<T_int, 2> coors,
torch::TensorAccessor<T_int, 1> num_points_per_voxel,
torch::TensorAccessor<T_int, 3> coor_to_voxelidx, int& voxel_num,
const std::vector<float> voxel_size, const std::vector<float> coors_range,
const std::vector<int> grid_size, const int max_points,
const int max_voxels, const int num_points, const int num_features,
const int NDim) {
// declare a temp coors
at::Tensor temp_coors = at::zeros(
{num_points, NDim}, at::TensorOptions().dtype(at::kInt).device(at::kCPU));
// First use dynamic voxelization to get coors,
// then check max points/voxels constraints
dynamic_voxelize_forward_cpu_kernel<T, int>(
points, temp_coors.accessor<int, 2>(), voxel_size, coors_range, grid_size,
num_points, num_features, NDim);
int voxelidx, num;
auto coor = temp_coors.accessor<int, 2>();
for (int i = 0; i < num_points; ++i) {
// T_int* coor = temp_coors.data_ptr<int>() + i * NDim;
if (coor[i][0] == -1) continue;
voxelidx = coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]];
// record voxel
if (voxelidx == -1) {
voxelidx = voxel_num;
if (max_voxels != -1 && voxel_num >= max_voxels) continue;
voxel_num += 1;
coor_to_voxelidx[coor[i][0]][coor[i][1]][coor[i][2]] = voxelidx;
memcpy(&coors[voxelidx][0], &coor[i][0], NDim * sizeof(T_int));
}
// put points into voxel
num = num_points_per_voxel[voxelidx];
if (max_points == -1 || num < max_points) {
memcpy(&voxels[voxelidx][num][0], &points[i][0],
num_features * sizeof(T));
num_points_per_voxel[voxelidx] += 1;
}
}
return;
}
void dynamic_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& coors,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int NDim = 3) {
// check device
AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor");
std::vector<int> grid_size(NDim);
const int num_points = points.size(0);
const int num_features = points.size(1);
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
points.scalar_type(), "dynamic_voxelize_forward_cpu_kernel", [&] {
dynamic_voxelize_forward_cpu_kernel<scalar_t, int>(
points.accessor<scalar_t, 2>(), coors.accessor<int, 2>(),
voxel_size, coors_range, grid_size, num_points, num_features, NDim);
});
}
int hard_voxelize_forward_cpu(const at::Tensor& points, at::Tensor& voxels,
at::Tensor& coors,
at::Tensor& num_points_per_voxel,
const std::vector<float> voxel_size,
const std::vector<float> coors_range,
const int max_points, const int max_voxels,
const int NDim = 3) {
// current version tooks about 0.02s_0.03s for one frame on cpu
// check device
AT_ASSERTM(points.device().is_cpu(), "points must be a CPU tensor");
std::vector<int> grid_size(NDim);
const int num_points = points.size(0);
const int num_features = points.size(1);
for (int i = 0; i < NDim; ++i) {
grid_size[i] =
round((coors_range[NDim + i] - coors_range[i]) / voxel_size[i]);
}
// coors, num_points_per_voxel, coor_to_voxelidx are int Tensor
// printf("cpu coor_to_voxelidx size: [%d, %d, %d]\n", grid_size[2],
// grid_size[1], grid_size[0]);
at::Tensor coor_to_voxelidx =
-at::ones({grid_size[2], grid_size[1], grid_size[0]}, coors.options());
int voxel_num = 0;
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
points.scalar_type(), "hard_voxelize_forward_cpu_kernel", [&] {
hard_voxelize_forward_cpu_kernel<scalar_t, int>(
points.accessor<scalar_t, 2>(), voxels.accessor<scalar_t, 3>(),
coors.accessor<int, 2>(), num_points_per_voxel.accessor<int, 1>(),
coor_to_voxelidx.accessor<int, 3>(), voxel_num, voxel_size,
coors_range, grid_size, max_points, max_voxels, num_points,
num_features, NDim);
});
return voxel_num;
}
......@@ -12,10 +12,12 @@ void hard_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int max_points, max_voxels, NDim;
bool deterministic;
SSAttrs(attr)
.get<int>("max_points", max_points)
.get<int>("max_voxels", max_voxels)
.get<int>("NDim", NDim)
.get<bool>("deterministic", deterministic)
.done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& voxel_size = buildATensor(ctx, ins[1]);
......@@ -28,7 +30,7 @@ void hard_voxelize_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
num_points_per_voxel, voxel_num, max_points, max_voxels,
NDim);
NDim, deterministic);
}
void dynamic_voxelize_forward_cuda_parrots(CudaContext& ctx,
......@@ -51,10 +53,12 @@ void hard_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
int max_points, max_voxels, NDim;
bool deterministic;
SSAttrs(attr)
.get<int>("max_points", max_points)
.get<int>("max_voxels", max_voxels)
.get<int>("NDim", NDim)
.get<bool>("deterministic", deterministic)
.done();
const auto& points = buildATensor(ctx, ins[0]);
const auto& voxel_size = buildATensor(ctx, ins[1]);
......@@ -67,7 +71,7 @@ void hard_voxelize_forward_cpu_parrots(HostContext& ctx, const SSElement& attr,
hard_voxelize_forward(points, voxel_size, coors_range, voxels, coors,
num_points_per_voxel, voxel_num, max_points, max_voxels,
NDim);
NDim, deterministic);
}
void dynamic_voxelize_forward_cpu_parrots(HostContext& ctx,
......@@ -89,6 +93,7 @@ PARROTS_EXTENSION_REGISTER(hard_voxelize_forward)
.attr("max_points")
.attr("max_voxels")
.attr("NDim")
.attr("deterministic")
.input(3)
.output(4)
.apply(hard_voxelize_forward_cpu_parrots)
......
......@@ -9,7 +9,8 @@ void hard_voxelize_forward(const at::Tensor &points,
const at::Tensor &coors_range, at::Tensor &voxels,
at::Tensor &coors, at::Tensor &num_points_per_voxel,
at::Tensor &voxel_num, const int max_points,
const int max_voxels, const int NDim = 3);
const int max_voxels, const int NDim = 3,
const bool deterministic = true);
void dynamic_voxelize_forward(const at::Tensor &points,
const at::Tensor &voxel_size,
......
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/csuhan/s2anet/blob/master/mmdet/ops/orn/src/ActiveRotatingFilter.h
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void active_rotated_filter_forward_impl(const Tensor input,
const Tensor indices, Tensor output) {
DISPATCH_DEVICE_IMPL(active_rotated_filter_forward_impl, input, indices,
output);
}
void active_rotated_filter_backward_impl(const Tensor grad_out,
const Tensor indices, Tensor grad_in) {
DISPATCH_DEVICE_IMPL(active_rotated_filter_backward_impl, grad_out, indices,
grad_in);
}
void active_rotated_filter_forward(const Tensor input, const Tensor indices,
Tensor output) {
active_rotated_filter_forward_impl(input, indices, output);
}
void active_rotated_filter_backward(const Tensor grad_out, const Tensor indices,
Tensor grad_in) {
active_rotated_filter_backward_impl(grad_out, indices, grad_in);
}
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/chrdiller/pyTorchChamferDistance/blob/master/chamfer_distance/chamfer_distance.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void chamfer_distance_forward_impl(const Tensor xyz1, const Tensor xyz2,
const Tensor dist1, const Tensor dist2,
const Tensor idx1, const Tensor idx2) {
DISPATCH_DEVICE_IMPL(chamfer_distance_forward_impl, xyz1, xyz2, dist1, dist2,
idx1, idx2);
}
void chamfer_distance_backward_impl(const Tensor xyz1, const Tensor xyz2,
Tensor gradxyz1, Tensor gradxyz2,
Tensor graddist1, Tensor graddist2,
Tensor idx1, Tensor idx2) {
DISPATCH_DEVICE_IMPL(chamfer_distance_backward_impl, xyz1, xyz2, gradxyz1,
gradxyz2, graddist1, graddist2, idx1, idx2);
}
void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
const Tensor dist1, const Tensor dist2,
const Tensor idx1, const Tensor idx2) {
chamfer_distance_forward_impl(xyz1, xyz2, dist1, dist2, idx1, idx2);
}
void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
Tensor gradxyz1, Tensor gradxyz2,
Tensor graddist1, Tensor graddist2, Tensor idx1,
Tensor idx2) {
chamfer_distance_backward_impl(xyz1, xyz2, gradxyz1, gradxyz2, graddist1,
graddist2, idx1, idx2);
}
......@@ -102,7 +102,6 @@ std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
IntArrayRef data_shape = kernel_mask.sizes();
auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
IntArrayRef label_map_shape = internal_kernel_label.sizes();
vector<vector<int>> text_line;
kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
......
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
Tensor ious) {
DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious);
}
void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) {
convex_iou_impl(pointsets, polygons, ious);
}
void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
Tensor output) {
DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output);
}
void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) {
convex_giou_impl(pointsets, polygons, output);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment