Unverified Commit d9549fba authored by zhuyuanhao's avatar zhuyuanhao Committed by GitHub
Browse files

fix cpp header error (#371)

* 1. use macro USE_PARROTS control header include
2. add clang-format google style in pre-commit

* use MMCV_ macros
parent 2c6fc5fd
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void DeformConvForwardCUDAKernelLauncher(Tensor input, Tensor weight, void DeformConvForwardCUDAKernelLauncher(Tensor input, Tensor weight,
Tensor offset, Tensor output, Tensor offset, Tensor output,
Tensor columns, Tensor ones, int kW, Tensor columns, Tensor ones, int kW,
...@@ -62,7 +62,7 @@ void deform_conv_forward(Tensor input, Tensor weight, Tensor offset, ...@@ -62,7 +62,7 @@ void deform_conv_forward(Tensor input, Tensor weight, Tensor offset,
int dilationW, int dilationH, int group, int dilationW, int dilationH, int group,
int deformable_group, int im2col_step) { int deformable_group, int im2col_step) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(offset);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
...@@ -88,7 +88,7 @@ void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput, ...@@ -88,7 +88,7 @@ void deform_conv_backward_input(Tensor input, Tensor offset, Tensor gradOutput,
int dilationW, int dilationH, int group, int dilationW, int dilationH, int group,
int deformable_group, int im2col_step) { int deformable_group, int im2col_step) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(offset);
CHECK_CUDA_INPUT(gradOutput); CHECK_CUDA_INPUT(gradOutput);
...@@ -117,7 +117,7 @@ void deform_conv_backward_parameters(Tensor input, Tensor offset, ...@@ -117,7 +117,7 @@ void deform_conv_backward_parameters(Tensor input, Tensor offset,
int deformable_group, float scale, int deformable_group, float scale,
int im2col_step) { int im2col_step) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(offset);
CHECK_CUDA_INPUT(gradOutput); CHECK_CUDA_INPUT(gradOutput);
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, void DeformRoIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois,
Tensor offset, Tensor output, Tensor offset, Tensor output,
int pooled_height, int pooled_width, int pooled_height, int pooled_width,
...@@ -38,7 +38,7 @@ void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset, ...@@ -38,7 +38,7 @@ void deform_roi_pool_forward(Tensor input, Tensor rois, Tensor offset,
float spatial_scale, int sampling_ratio, float spatial_scale, int sampling_ratio,
float gamma) { float gamma) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(offset); CHECK_CUDA_INPUT(offset);
...@@ -61,7 +61,7 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois, ...@@ -61,7 +61,7 @@ void deform_roi_pool_backward(Tensor grad_output, Tensor input, Tensor rois,
int pooled_width, float spatial_scale, int pooled_width, float spatial_scale,
int sampling_ratio, float gamma) { int sampling_ratio, float gamma) {
if (grad_output.device().is_cuda()) { if (grad_output.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output); CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(rois);
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
Tensor weight, Tensor output, Tensor weight, Tensor output,
const float gamma, const float gamma,
...@@ -54,7 +54,7 @@ void softmax_focal_loss_backward_cuda(Tensor input, Tensor target, ...@@ -54,7 +54,7 @@ void softmax_focal_loss_backward_cuda(Tensor input, Tensor target,
void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
Tensor output, float gamma, float alpha) { Tensor output, float gamma, float alpha) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(target); CHECK_CUDA_INPUT(target);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
...@@ -73,7 +73,7 @@ void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight, ...@@ -73,7 +73,7 @@ void sigmoid_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
Tensor grad_input, float gamma, float alpha) { Tensor grad_input, float gamma, float alpha) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(target); CHECK_CUDA_INPUT(target);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
...@@ -92,7 +92,7 @@ void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight, ...@@ -92,7 +92,7 @@ void sigmoid_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight, void softmax_focal_loss_forward(Tensor input, Tensor target, Tensor weight,
Tensor output, float gamma, float alpha) { Tensor output, float gamma, float alpha) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(target); CHECK_CUDA_INPUT(target);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
...@@ -112,7 +112,7 @@ void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight, ...@@ -112,7 +112,7 @@ void softmax_focal_loss_backward(Tensor input, Tensor target, Tensor weight,
Tensor buff, Tensor grad_input, float gamma, Tensor buff, Tensor grad_input, float gamma,
float alpha) { float alpha) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(target); CHECK_CUDA_INPUT(target);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
......
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
#include "sigmoid_focal_loss_kernel.cuh" #include "sigmoid_focal_loss_cuda_kernel.cuh"
#include "softmax_focal_loss_kernel.cuh" #include "softmax_focal_loss_cuda_kernel.cuh"
void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target, void SigmoidFocalLossForwardCUDAKernelLauncher(Tensor input, Tensor target,
Tensor weight, Tensor output, Tensor weight, Tensor output,
......
...@@ -2,13 +2,13 @@ ...@@ -2,13 +2,13 @@
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/vision.cpp
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
int get_cudart_version() { return CUDART_VERSION; } int get_cudart_version() { return CUDART_VERSION; }
#endif #endif
std::string get_compiling_cuda_version() { std::string get_compiling_cuda_version() {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
std::ostringstream oss; std::ostringstream oss;
// copied from // copied from
// https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231 // https://github.com/pytorch/pytorch/blob/master/aten/src/ATen/cuda/detail/CUDAHooks.cpp#L231
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data, void MaskedIm2colForwardCUDAKernelLauncher(const Tensor bottom_data,
const Tensor mask_h_idx, const Tensor mask_h_idx,
const Tensor mask_w_idx, const Tensor mask_w_idx,
...@@ -39,7 +39,7 @@ void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx, ...@@ -39,7 +39,7 @@ void masked_im2col_forward(const Tensor im, const Tensor mask_h_idx,
const int kernel_h, const int kernel_w, const int kernel_h, const int kernel_w,
const int pad_h, const int pad_w) { const int pad_h, const int pad_w) {
if (im.device().is_cuda()) { if (im.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(im); CHECK_CUDA_INPUT(im);
CHECK_CUDA_INPUT(mask_h_idx); CHECK_CUDA_INPUT(mask_h_idx);
CHECK_CUDA_INPUT(mask_w_idx); CHECK_CUDA_INPUT(mask_w_idx);
...@@ -58,7 +58,7 @@ void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx, ...@@ -58,7 +58,7 @@ void masked_col2im_forward(const Tensor col, const Tensor mask_h_idx,
const Tensor mask_w_idx, Tensor im, int height, const Tensor mask_w_idx, Tensor im, int height,
int width, int channels) { int width, int channels) {
if (col.device().is_cuda()) { if (col.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(col); CHECK_CUDA_INPUT(col);
CHECK_CUDA_INPUT(mask_h_idx); CHECK_CUDA_INPUT(mask_h_idx);
CHECK_CUDA_INPUT(mask_w_idx); CHECK_CUDA_INPUT(mask_w_idx);
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void ModulatedDeformConvForwardCUDAKernelLauncher( void ModulatedDeformConvForwardCUDAKernelLauncher(
Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset, Tensor input, Tensor weight, Tensor bias, Tensor ones, Tensor offset,
Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w, Tensor mask, Tensor output, Tensor columns, int kernel_h, int kernel_w,
...@@ -50,7 +50,7 @@ void modulated_deform_conv_forward( ...@@ -50,7 +50,7 @@ void modulated_deform_conv_forward(
const int dilation_h, const int dilation_w, const int group, const int dilation_h, const int dilation_w, const int group,
const int deformable_group, const bool with_bias) { const int deformable_group, const bool with_bias) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias); CHECK_CUDA_INPUT(bias);
...@@ -80,7 +80,7 @@ void modulated_deform_conv_backward( ...@@ -80,7 +80,7 @@ void modulated_deform_conv_backward(
int pad_w, int dilation_h, int dilation_w, int group, int deformable_group, int pad_w, int dilation_h, int dilation_w, int group, int deformable_group,
const bool with_bias) { const bool with_bias) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(bias); CHECK_CUDA_INPUT(bias);
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
int offset); int offset);
...@@ -62,7 +62,7 @@ Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) { ...@@ -62,7 +62,7 @@ Tensor nms_cpu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) { Tensor nms(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
if (boxes.device().is_cuda()) { if (boxes.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(boxes); CHECK_CUDA_INPUT(boxes);
CHECK_CUDA_INPUT(scores); CHECK_CUDA_INPUT(scores);
return nms_cuda(boxes, scores, iou_threshold, offset); return nms_cuda(boxes, scores, iou_threshold, offset);
......
#include "nms_kernel.cuh" #include "nms_cuda_kernel.cuh"
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold, Tensor NMSCUDAKernelLauncher(Tensor boxes, Tensor scores, float iou_threshold,
......
...@@ -182,7 +182,7 @@ void psamask_backward_cpu(const int psa_type, const Tensor grad_output, ...@@ -182,7 +182,7 @@ void psamask_backward_cpu(const int psa_type, const Tensor grad_output,
grad_input); grad_input);
} }
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input, void PSAMaskForwardCUDAKernelLauncher(const int psa_type, const Tensor input,
Tensor output, const int num_, Tensor output, const int num_,
const int h_feature, const int w_feature, const int h_feature, const int w_feature,
...@@ -221,7 +221,7 @@ void psamask_forward(const Tensor input, Tensor output, const int psa_type, ...@@ -221,7 +221,7 @@ void psamask_forward(const Tensor input, Tensor output, const int psa_type,
const int h_mask, const int w_mask, const int half_h_mask, const int h_mask, const int w_mask, const int half_h_mask,
const int half_w_mask) { const int half_w_mask) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(output);
psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature, psamask_forward_cuda(psa_type, input, output, num_, h_feature, w_feature,
...@@ -240,7 +240,7 @@ void psamask_backward(Tensor grad_output, const Tensor grad_input, ...@@ -240,7 +240,7 @@ void psamask_backward(Tensor grad_output, const Tensor grad_input,
const int w_feature, const int h_mask, const int w_mask, const int w_feature, const int h_mask, const int w_mask,
const int half_h_mask, const int half_w_mask) { const int half_h_mask, const int half_w_mask) {
if (grad_input.device().is_cuda()) { if (grad_input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_input); CHECK_CUDA_INPUT(grad_input);
CHECK_CUDA_INPUT(grad_output); CHECK_CUDA_INPUT(grad_output);
psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature, psamask_backward_cuda(psa_type, grad_output, grad_input, num_, h_feature,
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x, Tensor argmax_y, Tensor argmax_x,
int aligned_height, int aligned_width, int aligned_height, int aligned_width,
...@@ -40,7 +40,7 @@ void roi_align_forward(Tensor input, Tensor rois, Tensor output, ...@@ -40,7 +40,7 @@ void roi_align_forward(Tensor input, Tensor rois, Tensor output,
int aligned_width, float spatial_scale, int aligned_width, float spatial_scale,
int sampling_ratio, int pool_mode, bool aligned) { int sampling_ratio, int pool_mode, bool aligned) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(output);
...@@ -63,7 +63,7 @@ void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y, ...@@ -63,7 +63,7 @@ void roi_align_backward(Tensor grad_output, Tensor rois, Tensor argmax_y,
int aligned_width, float spatial_scale, int aligned_width, float spatial_scale,
int sampling_ratio, int pool_mode, bool aligned) { int sampling_ratio, int pool_mode, bool aligned) {
if (grad_output.device().is_cuda()) { if (grad_output.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output); CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(argmax_y); CHECK_CUDA_INPUT(argmax_y);
......
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
#include "roi_align_kernel.cuh" #include "roi_align_cuda_kernel.cuh"
void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, void ROIAlignForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
Tensor argmax_y, Tensor argmax_x, Tensor argmax_y, Tensor argmax_x,
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
Tensor argmax, int pooled_height, Tensor argmax, int pooled_height,
int pooled_width, float spatial_scale); int pooled_width, float spatial_scale);
...@@ -29,7 +29,7 @@ void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax, ...@@ -29,7 +29,7 @@ void roi_pool_forward(Tensor input, Tensor rois, Tensor output, Tensor argmax,
int pooled_height, int pooled_width, int pooled_height, int pooled_width,
float spatial_scale) { float spatial_scale) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(output); CHECK_CUDA_INPUT(output);
...@@ -49,7 +49,7 @@ void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax, ...@@ -49,7 +49,7 @@ void roi_pool_backward(Tensor grad_output, Tensor rois, Tensor argmax,
Tensor grad_input, int pooled_height, int pooled_width, Tensor grad_input, int pooled_height, int pooled_width,
float spatial_scale) { float spatial_scale) {
if (grad_output.device().is_cuda()) { if (grad_output.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output); CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(rois); CHECK_CUDA_INPUT(rois);
CHECK_CUDA_INPUT(argmax); CHECK_CUDA_INPUT(argmax);
......
#include "pytorch_cuda_helper.hpp" #include "pytorch_cuda_helper.hpp"
#include "roi_pool_kernel.cuh" #include "roi_pool_cuda_kernel.cuh"
void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output, void ROIPoolForwardCUDAKernelLauncher(Tensor input, Tensor rois, Tensor output,
Tensor argmax, int pooled_height, Tensor argmax, int pooled_height,
......
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean); void SyncBNForwardMeanCUDAKernelLauncher(const Tensor input, Tensor mean);
void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean, void SyncBNForwardVarCUDAKernelLauncher(const Tensor input, const Tensor mean,
...@@ -61,7 +61,7 @@ void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight, ...@@ -61,7 +61,7 @@ void sync_bn_backward_data_cuda(const Tensor grad_output, const Tensor weight,
void sync_bn_forward_mean(const Tensor input, Tensor mean) { void sync_bn_forward_mean(const Tensor input, Tensor mean) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(mean); CHECK_CUDA_INPUT(mean);
sync_bn_forward_mean_cuda(input, mean); sync_bn_forward_mean_cuda(input, mean);
...@@ -75,7 +75,7 @@ void sync_bn_forward_mean(const Tensor input, Tensor mean) { ...@@ -75,7 +75,7 @@ void sync_bn_forward_mean(const Tensor input, Tensor mean) {
void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) { void sync_bn_forward_var(const Tensor input, const Tensor mean, Tensor var) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(mean); CHECK_CUDA_INPUT(mean);
CHECK_CUDA_INPUT(var); CHECK_CUDA_INPUT(var);
...@@ -95,7 +95,7 @@ void sync_bn_forward_output(const Tensor input, const Tensor mean, ...@@ -95,7 +95,7 @@ void sync_bn_forward_output(const Tensor input, const Tensor mean,
Tensor output, float eps, float momentum, Tensor output, float eps, float momentum,
int group_size) { int group_size) {
if (input.device().is_cuda()) { if (input.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input); CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(mean); CHECK_CUDA_INPUT(mean);
CHECK_CUDA_INPUT(var); CHECK_CUDA_INPUT(var);
...@@ -120,7 +120,7 @@ void sync_bn_forward_output(const Tensor input, const Tensor mean, ...@@ -120,7 +120,7 @@ void sync_bn_forward_output(const Tensor input, const Tensor mean,
void sync_bn_backward_param(const Tensor grad_output, const Tensor norm, void sync_bn_backward_param(const Tensor grad_output, const Tensor norm,
Tensor grad_weight, Tensor grad_bias) { Tensor grad_weight, Tensor grad_bias) {
if (grad_output.device().is_cuda()) { if (grad_output.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output); CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(norm); CHECK_CUDA_INPUT(norm);
CHECK_CUDA_INPUT(grad_weight); CHECK_CUDA_INPUT(grad_weight);
...@@ -139,7 +139,7 @@ void sync_bn_backward_data(const Tensor grad_output, const Tensor weight, ...@@ -139,7 +139,7 @@ void sync_bn_backward_data(const Tensor grad_output, const Tensor weight,
const Tensor norm, const Tensor std, const Tensor norm, const Tensor std,
Tensor grad_input) { Tensor grad_input) {
if (grad_output.device().is_cuda()) { if (grad_output.device().is_cuda()) {
#ifdef WITH_CUDA #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output); CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(weight); CHECK_CUDA_INPUT(weight);
CHECK_CUDA_INPUT(grad_weight); CHECK_CUDA_INPUT(grad_weight);
......
#ifndef ROI_ALIGN_KERNEL_CUH #ifndef ROI_ALIGN_CUDA_KERNEL_CUH
#define ROI_ALIGN_KERNEL_CUH #define ROI_ALIGN_CUDA_KERNEL_CUH
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
/*** Forward ***/ /*** Forward ***/
template <typename T> template <typename T>
...@@ -196,4 +202,4 @@ __global__ void roi_align_backward_cuda_kernel( ...@@ -196,4 +202,4 @@ __global__ void roi_align_backward_cuda_kernel(
} }
} }
#endif // ROI_ALIGN_KERNEL_CUH #endif // ROI_ALIGN_CUDA_KERNEL_CUH
#ifndef ROI_POOL_KERNEL_CUH #ifndef ROI_POOL_CUDA_KERNEL_CUH
#define ROI_POOL_KERNEL_CUH #define ROI_POOL_CUDA_KERNEL_CUH
#include <cuda.h> #ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
template <typename T> template <typename T>
__global__ void roi_pool_forward_cuda_kernel( __global__ void roi_pool_forward_cuda_kernel(
...@@ -85,4 +89,4 @@ __global__ void roi_pool_backward_cuda_kernel( ...@@ -85,4 +89,4 @@ __global__ void roi_pool_backward_cuda_kernel(
} }
} }
#endif #endif // ROI_POOL_CUDA_KERNEL_CUH
#ifndef SIGMOID_FOCAL_LOSS_KERNEL_CUH #ifndef SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
#define SIGMOID_FOCAL_LOSS_KERNEL_CUH #define SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
template <typename T> template <typename T>
__global__ void sigmoid_focal_loss_forward_cuda_kernel( __global__ void sigmoid_focal_loss_forward_cuda_kernel(
...@@ -60,4 +66,5 @@ __global__ void sigmoid_focal_loss_backward_cuda_kernel( ...@@ -60,4 +66,5 @@ __global__ void sigmoid_focal_loss_backward_cuda_kernel(
} }
} }
} }
#endif
#endif // SIGMOID_FOCAL_LOSS_CUDA_KERNEL_CUH
#ifndef SOFTMAX_FOCAL_LOSS_KERNEL_CUH #ifndef SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
#define SOFTMAX_FOCAL_LOSS_KERNEL_CUH #define SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
#ifdef MMCV_USE_PARROTS
#include "parrots_cuda_helper.hpp"
#else
#include "pytorch_cuda_helper.hpp"
#endif
template <typename T> template <typename T>
__global__ void softmax_focal_loss_forward_cuda_kernel( __global__ void softmax_focal_loss_forward_cuda_kernel(
...@@ -61,4 +67,5 @@ __global__ void softmax_focal_loss_backward_cuda2_kernel( ...@@ -61,4 +67,5 @@ __global__ void softmax_focal_loss_backward_cuda2_kernel(
} }
} }
} }
#endif
#endif // SOFTMAX_FOCAL_LOSS_CUDA_KERNEL_CUH
#ifndef SOFTNMS_KERNEL_CUH
#define SOFTNMS_KERNEL_CUH
#include <cuda.h>
#define DIVUP(m, n) ((m) / (n) + ((m) % (n) > 0))
int const threadsPerBlock = sizeof(unsigned long long int) * 8;
template <typename scalar_t>
__device__ inline scalar_t devIoU(scalar_t const *const a,
scalar_t const *const b) {
scalar_t left = fmaxf(a[0], b[0]), right = fminf(a[2], b[2]);
scalar_t top = fmaxf(a[1], b[1]), bottom = fminf(a[3], b[3]);
scalar_t width = fmaxf(right - left + 1, 0.f),
height = fmaxf(bottom - top + 1, 0.f);
scalar_t interS = width * height;
scalar_t Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
scalar_t Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
return interS / (Sa + Sb - interS);
}
template <typename scalar_t>
__global__ void softnms_max_kernel(const int n_boxes,
const scalar_t overlap_thresh,
const scalar_t *dev_boxes, int *order,
float *max_value, int *max_index) {
__shared__ float maximum[threadsPerBlock];
__shared__ int max_id[threadsPerBlock];
unsigned int tid = threadIdx.x;
unsigned int idx = blockIdx.x * threadsPerBlock + threadIdx.x;
if (idx >= n_boxes) {
return;
}
const int block_size = fminf(n_boxes + tid - idx, threadsPerBlock);
int *l_order = order + (idx - tid);
if (l_order[tid] == 0 && dev_boxes[idx * 5 + 4] >= overlap_thresh) {
maximum[tid] = dev_boxes[idx * 5 + 4];
} else {
maximum[tid] = -1.0;
}
max_id[tid] = tid;
__syncthreads();
if (block_size >= 1024 && tid < 512) {
if (maximum[tid] < maximum[tid + 512]) {
maximum[tid] = maximum[tid + 512];
max_id[tid] = max_id[tid + 512];
}
}
if (block_size >= 512 && tid < 256) {
if (maximum[tid] < maximum[tid + 256]) {
maximum[tid] = maximum[tid + 256];
max_id[tid] = max_id[tid + 256];
}
}
if (block_size >= 256 && tid < 128) {
if (maximum[tid] < maximum[tid + 128]) {
maximum[tid] = maximum[tid + 128];
max_id[tid] = max_id[tid + 128];
}
}
if (block_size >= 128 && tid < 64) {
if (maximum[tid] < maximum[tid + 64]) {
maximum[tid] = maximum[tid + 64];
max_id[tid] = max_id[tid + 64];
}
}
if (tid < 32) {
volatile float *vmaximum = maximum;
volatile int *vmax_id = max_id;
if (block_size >= 64 && vmaximum[tid] < vmaximum[tid + 32]) {
vmaximum[tid] = vmaximum[tid + 32];
vmax_id[tid] = vmax_id[tid + 32];
}
if (block_size >= 32 && tid < 16 && vmaximum[tid] < vmaximum[tid + 16]) {
vmaximum[tid] = vmaximum[tid + 16];
vmax_id[tid] = vmax_id[tid + 16];
}
if (block_size >= 16 && tid < 8 && vmaximum[tid] < vmaximum[tid + 8]) {
vmaximum[tid] = vmaximum[tid + 8];
vmax_id[tid] = vmax_id[tid + 8];
}
if (block_size >= 8 && tid < 4 && vmaximum[tid] < vmaximum[tid + 4]) {
vmaximum[tid] = vmaximum[tid + 4];
vmax_id[tid] = vmax_id[tid + 4];
}
if (block_size >= 4 && tid < 2 && vmaximum[tid] < vmaximum[tid + 2]) {
vmaximum[tid] = vmaximum[tid + 2];
vmax_id[tid] = vmax_id[tid + 2];
}
if (block_size >= 2 && tid < 1 && vmaximum[tid] < vmaximum[tid + 1]) {
vmaximum[tid] = vmaximum[tid + 1];
vmax_id[tid] = vmax_id[tid + 1];
}
}
if (tid == 0) {
max_value[blockIdx.x] = maximum[0];
max_index[blockIdx.x] = max_id[0];
}
}
template <typename scalar_t>
__global__ void softnms_update_kernel(const int n_boxes, const scalar_t sigma,
const scalar_t n_thresh,
const unsigned int method,
const scalar_t overlap_thresh,
scalar_t *dev_boxes, int *order,
unsigned long long *keep, int max_id) {
const int col_start = blockIdx.x;
const int col_size =
fminf(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
const int cur_idx = threadsPerBlock * col_start + threadIdx.x;
const int tid = threadIdx.x;
if (cur_idx >= n_boxes) {
return;
}
__shared__ scalar_t cur_max_boxes[5];
cur_max_boxes[0] = dev_boxes[max_id * 5 + 0];
cur_max_boxes[1] = dev_boxes[max_id * 5 + 1];
cur_max_boxes[2] = dev_boxes[max_id * 5 + 2];
cur_max_boxes[3] = dev_boxes[max_id * 5 + 3];
cur_max_boxes[4] = dev_boxes[max_id * 5 + 4];
__syncthreads();
if (cur_idx != max_id && tid < col_size && order[cur_idx] == 0 &&
(!(keep[col_start] & (1ULL << tid)))) {
scalar_t block_boxes[5];
block_boxes[0] = dev_boxes[cur_idx * 5 + 0];
block_boxes[1] = dev_boxes[cur_idx * 5 + 1];
block_boxes[2] = dev_boxes[cur_idx * 5 + 2];
block_boxes[3] = dev_boxes[cur_idx * 5 + 3];
block_boxes[4] = dev_boxes[cur_idx * 5 + 4];
scalar_t ovr = devIoU(cur_max_boxes, block_boxes);
scalar_t weight = 1.0;
if (method == 1) {
if (ovr > n_thresh) {
weight = 1.0 - ovr;
}
} else if (method == 2) {
weight = exp(-(ovr * ovr) / sigma);
} else if (ovr >= n_thresh) {
weight = 0.0;
}
block_boxes[4] *= weight;
dev_boxes[cur_idx * 5 + 4] = block_boxes[4];
if (block_boxes[4] < overlap_thresh) {
keep[col_start] |= 1ULL << tid;
}
}
}
#endif
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment