Unverified Commit 3c33f367 authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Per file C++ Operator registration (#3135)

* Moving deform_conv2d op registration.

* Moving nms op registration.

* Moving new_empty_tensor op registration.

* Moving ps_roi_align op registration.

* Moving ps_roi_pool op registration.

* Moving roi_align op registration.

* Moving roi_pool op registration.

* Restoring headers for forward/backward and fixing styles.

* Restoring the test hack on windows.

* Stricter header inclusion.
parent 6cb4fc21
#include <ATen/ATen.h> #include <ATen/ATen.h>
#include <torch/script.h> #include <torch/script.h>
#include <torch/torch.h> #include <torch/torch.h>
#include <torchvision/roi_align.h>
#include <torchvision/nms.h> #include <torchvision/nms.h>
#include <torchvision/roi_align.h>
#ifdef _WIN32 #ifdef _WIN32
// Windows only // Windows only
// This is necessary until operators are automatically registered on include // This is necessary until operators are automatically registered on include
static auto _nms = &vision::ops::nms_cpu; static auto _nms = &vision::ops::nms;
#endif #endif
int main() { int main() {
......
...@@ -66,7 +66,8 @@ ...@@ -66,7 +66,8 @@
// modified from // modified from
// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp
#include "deform_conv2d_kernel.h" #include <ATen/ATen.h>
#include <torch/library.h>
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -852,9 +853,7 @@ at::Tensor backward_gradient_parameters( ...@@ -852,9 +853,7 @@ at::Tensor backward_gradient_parameters(
return grad_weight; return grad_weight;
} }
} // namespace at::Tensor deform_conv2d_forward_kernel(
at::Tensor deform_conv2d_forward_cpu(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& weight, const at::Tensor& weight,
const at::Tensor& offset, const at::Tensor& offset,
...@@ -1070,7 +1069,7 @@ at::Tensor deform_conv2d_forward_cpu( ...@@ -1070,7 +1069,7 @@ at::Tensor deform_conv2d_forward_cpu(
} }
std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor> std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor>
deform_conv2d_backward_cpu( deform_conv2d_backward_kernel(
const at::Tensor& grad_out, const at::Tensor& grad_out,
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& weight, const at::Tensor& weight,
...@@ -1141,5 +1140,12 @@ deform_conv2d_backward_cpu( ...@@ -1141,5 +1140,12 @@ deform_conv2d_backward_cpu(
grad_input, grad_weight, grad_offset, grad_mask, grad_bias); grad_input, grad_weight, grad_offset, grad_mask, grad_bias);
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("deform_conv2d", deform_conv2d_forward_kernel);
m.impl("_deform_conv2d_backward", deform_conv2d_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor deform_conv2d_forward_cpu(
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
VISION_API std::
tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor>
deform_conv2d_backward_cpu(
const at::Tensor& grad_out,
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
} // namespace ops
} // namespace vision
#include "nms_kernel.h" #include <ATen/ATen.h>
#include <torch/library.h>
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -74,9 +75,7 @@ at::Tensor nms_kernel_impl( ...@@ -74,9 +75,7 @@ at::Tensor nms_kernel_impl(
return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep); return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
} }
} // namespace at::Tensor nms_kernel(
at::Tensor nms_cpu(
const at::Tensor& dets, const at::Tensor& dets,
const at::Tensor& scores, const at::Tensor& scores,
double iou_threshold) { double iou_threshold) {
...@@ -101,11 +100,17 @@ at::Tensor nms_cpu( ...@@ -101,11 +100,17 @@ at::Tensor nms_cpu(
auto result = at::empty({0}, dets.options()); auto result = at::empty({0}, dets.options());
AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_cpu", [&] { AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_kernel", [&] {
result = nms_kernel_impl<scalar_t>(dets, scores, iou_threshold); result = nms_kernel_impl<scalar_t>(dets, scores, iou_threshold);
}); });
return result; return result;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("nms", nms_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor nms_cpu(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);
} // namespace ops
} // namespace vision
#include "ps_roi_align_kernel.h" #include <ATen/ATen.h>
#include <torch/library.h>
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -301,9 +302,7 @@ void ps_roi_align_backward_kernel_impl( ...@@ -301,9 +302,7 @@ void ps_roi_align_backward_kernel_impl(
} }
} }
} // namespace std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_kernel(
std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -318,7 +317,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu( ...@@ -318,7 +317,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ps_roi_align_forward_cpu"; at::CheckedFrom c = "ps_roi_align_forward_kernel";
at::checkAllSameType(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t});
int num_rois = rois.size(0); int num_rois = rois.size(0);
...@@ -343,7 +342,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu( ...@@ -343,7 +342,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous(); auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ps_roi_align_forward_cpu", [&] { input.scalar_type(), "ps_roi_align_forward_kernel", [&] {
ps_roi_align_forward_kernel_impl<scalar_t>( ps_roi_align_forward_kernel_impl<scalar_t>(
output_size, output_size,
input_.data_ptr<scalar_t>(), input_.data_ptr<scalar_t>(),
...@@ -362,7 +361,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu( ...@@ -362,7 +361,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
return std::make_tuple(output, channel_mapping); return std::make_tuple(output, channel_mapping);
} }
at::Tensor ps_roi_align_backward_cpu( at::Tensor ps_roi_align_backward_kernel(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
...@@ -384,7 +383,7 @@ at::Tensor ps_roi_align_backward_cpu( ...@@ -384,7 +383,7 @@ at::Tensor ps_roi_align_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3}; channel_mapping_t{channel_mapping, "channel_mapping", 3};
at::CheckedFrom c = "ps_roi_align_backward_cpu"; at::CheckedFrom c = "ps_roi_align_backward_kernel";
at::checkAllSameType(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t});
auto num_rois = rois.size(0); auto num_rois = rois.size(0);
...@@ -400,7 +399,7 @@ at::Tensor ps_roi_align_backward_cpu( ...@@ -400,7 +399,7 @@ at::Tensor ps_roi_align_backward_cpu(
auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ps_roi_align_backward_cpu", [&] { grad.scalar_type(), "ps_roi_align_backward_kernel", [&] {
ps_roi_align_backward_kernel_impl<scalar_t>( ps_roi_align_backward_kernel_impl<scalar_t>(
grad.numel(), grad.numel(),
grad_.data_ptr<scalar_t>(), grad_.data_ptr<scalar_t>(),
...@@ -420,5 +419,12 @@ at::Tensor ps_roi_align_backward_cpu( ...@@ -420,5 +419,12 @@ at::Tensor ps_roi_align_backward_cpu(
return grad_input; return grad_input;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("ps_roi_align", ps_roi_align_forward_kernel);
m.impl("_ps_roi_align_backward", ps_roi_align_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio);
VISION_API at::Tensor ps_roi_align_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#include "ps_roi_pool_kernel.h" #include <ATen/ATen.h>
#include <torch/library.h>
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -145,9 +146,7 @@ void ps_roi_pool_backward_kernel_impl( ...@@ -145,9 +146,7 @@ void ps_roi_pool_backward_kernel_impl(
} }
} }
} // namespace std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_kernel(
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -161,7 +160,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu( ...@@ -161,7 +160,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ps_roi_pool_forward_cpu"; at::CheckedFrom c = "ps_roi_pool_forward_kernel";
at::checkAllSameType(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t});
int num_rois = rois.size(0); int num_rois = rois.size(0);
...@@ -186,7 +185,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu( ...@@ -186,7 +185,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous(); auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ps_roi_pool_forward_cpu", [&] { input.scalar_type(), "ps_roi_pool_forward_kernel", [&] {
ps_roi_pool_forward_kernel_impl<scalar_t>( ps_roi_pool_forward_kernel_impl<scalar_t>(
input_.data_ptr<scalar_t>(), input_.data_ptr<scalar_t>(),
spatial_scale, spatial_scale,
...@@ -204,7 +203,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu( ...@@ -204,7 +203,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
return std::make_tuple(output, channel_mapping); return std::make_tuple(output, channel_mapping);
} }
at::Tensor ps_roi_pool_backward_cpu( at::Tensor ps_roi_pool_backward_kernel(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
...@@ -225,7 +224,7 @@ at::Tensor ps_roi_pool_backward_cpu( ...@@ -225,7 +224,7 @@ at::Tensor ps_roi_pool_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3}; channel_mapping_t{channel_mapping, "channel_mapping", 3};
at::CheckedFrom c = "ps_roi_pool_backward_cpu"; at::CheckedFrom c = "ps_roi_pool_backward_kernel";
at::checkAllSameType(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t});
auto num_rois = rois.size(0); auto num_rois = rois.size(0);
...@@ -241,7 +240,7 @@ at::Tensor ps_roi_pool_backward_cpu( ...@@ -241,7 +240,7 @@ at::Tensor ps_roi_pool_backward_cpu(
auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ps_roi_pool_backward_cpu", [&] { grad.scalar_type(), "ps_roi_pool_backward_kernel", [&] {
ps_roi_pool_backward_kernel_impl<scalar_t>( ps_roi_pool_backward_kernel_impl<scalar_t>(
grad_.data_ptr<scalar_t>(), grad_.data_ptr<scalar_t>(),
channel_mapping.data_ptr<int>(), channel_mapping.data_ptr<int>(),
...@@ -259,5 +258,12 @@ at::Tensor ps_roi_pool_backward_cpu( ...@@ -259,5 +258,12 @@ at::Tensor ps_roi_pool_backward_cpu(
return grad_input; return grad_input;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("ps_roi_pool", ps_roi_pool_forward_kernel);
m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
VISION_API at::Tensor ps_roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#include "roi_align_kernel.h" #include <ATen/ATen.h>
#include <torch/library.h>
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -388,9 +389,7 @@ void roi_align_backward_kernel_impl( ...@@ -388,9 +389,7 @@ void roi_align_backward_kernel_impl(
} // for } // for
} }
} // namespace at::Tensor roi_align_forward_kernel(
at::Tensor roi_align_forward_cpu(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -404,7 +403,7 @@ at::Tensor roi_align_forward_cpu( ...@@ -404,7 +403,7 @@ at::Tensor roi_align_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "roi_align_forward_cpu"; at::CheckedFrom c = "roi_align_forward_kernel";
at::checkAllSameType(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t});
auto num_rois = rois.size(0); auto num_rois = rois.size(0);
...@@ -422,7 +421,7 @@ at::Tensor roi_align_forward_cpu( ...@@ -422,7 +421,7 @@ at::Tensor roi_align_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous(); auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "roi_align_forward_cpu", [&] { input.scalar_type(), "roi_align_forward_kernel", [&] {
roi_align_forward_kernel_impl<scalar_t>( roi_align_forward_kernel_impl<scalar_t>(
output_size, output_size,
input_.data_ptr<scalar_t>(), input_.data_ptr<scalar_t>(),
...@@ -440,7 +439,7 @@ at::Tensor roi_align_forward_cpu( ...@@ -440,7 +439,7 @@ at::Tensor roi_align_forward_cpu(
return output; return output;
} }
at::Tensor roi_align_backward_cpu( at::Tensor roi_align_backward_kernel(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -457,7 +456,7 @@ at::Tensor roi_align_backward_cpu( ...@@ -457,7 +456,7 @@ at::Tensor roi_align_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "roi_align_backward_cpu"; at::CheckedFrom c = "roi_align_backward_kernel";
at::checkAllSameType(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t});
at::Tensor grad_input = at::Tensor grad_input =
...@@ -476,7 +475,7 @@ at::Tensor roi_align_backward_cpu( ...@@ -476,7 +475,7 @@ at::Tensor roi_align_backward_cpu(
auto rois_ = rois.contiguous(); auto rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "roi_align_backward_cpu", [&] { grad.scalar_type(), "roi_align_backward_kernel", [&] {
roi_align_backward_kernel_impl<scalar_t>( roi_align_backward_kernel_impl<scalar_t>(
grad.numel(), grad.numel(),
grad.data_ptr<scalar_t>(), grad.data_ptr<scalar_t>(),
...@@ -498,5 +497,12 @@ at::Tensor roi_align_backward_cpu( ...@@ -498,5 +497,12 @@ at::Tensor roi_align_backward_cpu(
return grad_input; return grad_input;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("roi_align", roi_align_forward_kernel);
m.impl("_roi_align_backward", roi_align_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor roi_align_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
bool aligned);
VISION_API at::Tensor roi_align_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width,
int64_t sampling_ratio,
bool aligned);
} // namespace ops
} // namespace vision
#include <float.h> #include <float.h>
#include "roi_pool_kernel.h" #include <ATen/ATen.h>
#include <torch/library.h>
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -124,9 +125,7 @@ void roi_pool_backward_kernel_impl( ...@@ -124,9 +125,7 @@ void roi_pool_backward_kernel_impl(
} // num_rois } // num_rois
} }
} // namespace std::tuple<at::Tensor, at::Tensor> roi_pool_forward_kernel(
std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -137,7 +136,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu( ...@@ -137,7 +136,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "roi_pool_forward_cpu"; at::CheckedFrom c = "roi_pool_forward_kernel";
at::checkAllSameType(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t});
int num_rois = rois.size(0); int num_rois = rois.size(0);
...@@ -157,7 +156,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu( ...@@ -157,7 +156,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous(); auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "roi_pool_forward_cpu", [&] { input.scalar_type(), "roi_pool_forward_kernel", [&] {
roi_pool_forward_kernel_impl<scalar_t>( roi_pool_forward_kernel_impl<scalar_t>(
input_.data_ptr<scalar_t>(), input_.data_ptr<scalar_t>(),
spatial_scale, spatial_scale,
...@@ -174,7 +173,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu( ...@@ -174,7 +173,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
return std::make_tuple(output, argmax); return std::make_tuple(output, argmax);
} }
at::Tensor roi_pool_backward_cpu( at::Tensor roi_pool_backward_kernel(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& argmax, const at::Tensor& argmax,
...@@ -194,7 +193,7 @@ at::Tensor roi_pool_backward_cpu( ...@@ -194,7 +193,7 @@ at::Tensor roi_pool_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}; at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "roi_pool_backward_cpu"; at::CheckedFrom c = "roi_pool_backward_kernel";
at::checkAllSameType(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t});
auto num_rois = rois.size(0); auto num_rois = rois.size(0);
...@@ -215,7 +214,7 @@ at::Tensor roi_pool_backward_cpu( ...@@ -215,7 +214,7 @@ at::Tensor roi_pool_backward_cpu(
auto rois_ = rois.contiguous(); auto rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "roi_pool_backward_cpu", [&] { grad.scalar_type(), "roi_pool_backward_kernel", [&] {
roi_pool_backward_kernel_impl<scalar_t>( roi_pool_backward_kernel_impl<scalar_t>(
grad.data_ptr<scalar_t>(), grad.data_ptr<scalar_t>(),
argmax.data_ptr<int>(), argmax.data_ptr<int>(),
...@@ -235,5 +234,12 @@ at::Tensor roi_pool_backward_cpu( ...@@ -235,5 +234,12 @@ at::Tensor roi_pool_backward_cpu(
return grad_input; return grad_input;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("roi_pool", roi_pool_forward_kernel);
m.impl("_roi_pool_backward", roi_pool_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
VISION_API at::Tensor roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
...@@ -66,12 +66,13 @@ ...@@ -66,12 +66,13 @@
// modified from // modified from
// https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp // https://github.com/open-mmlab/mmdetection/blob/master/mmdet/ops/dcn/src/deform_conv_cuda.cpp
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <torch/library.h>
#include <THC/THCAtomics.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
#include "deform_conv2d_kernel.h"
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -896,9 +897,7 @@ at::Tensor backward_gradient_parameters( ...@@ -896,9 +897,7 @@ at::Tensor backward_gradient_parameters(
return grad_weight; return grad_weight;
} }
} // namespace at::Tensor deform_conv2d_forward_kernel(
at::Tensor deform_conv2d_forward_cuda(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& weight, const at::Tensor& weight,
const at::Tensor& offset, const at::Tensor& offset,
...@@ -1115,7 +1114,7 @@ at::Tensor deform_conv2d_forward_cuda( ...@@ -1115,7 +1114,7 @@ at::Tensor deform_conv2d_forward_cuda(
} }
std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor> std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor>
deform_conv2d_backward_cuda( deform_conv2d_backward_kernel(
const at::Tensor& grad_out, const at::Tensor& grad_out,
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& weight, const at::Tensor& weight,
...@@ -1187,5 +1186,12 @@ deform_conv2d_backward_cuda( ...@@ -1187,5 +1186,12 @@ deform_conv2d_backward_cuda(
grad_input, grad_weight, grad_offset, grad_mask, grad_bias); grad_input, grad_weight, grad_offset, grad_mask, grad_bias);
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CUDA, m) {
m.impl("deform_conv2d", deform_conv2d_forward_kernel);
m.impl("_deform_conv2d_backward", deform_conv2d_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor deform_conv2d_forward_cuda(
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
VISION_API std::
tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor>
deform_conv2d_backward_cuda(
const at::Tensor& grad_out,
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
} // namespace ops
} // namespace vision
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <torch/library.h>
#include "cuda_helpers.h" #include "cuda_helpers.h"
#include "nms_kernel.h"
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -74,9 +75,7 @@ __global__ void nms_kernel_impl( ...@@ -74,9 +75,7 @@ __global__ void nms_kernel_impl(
} }
} }
} // namespace at::Tensor nms_kernel(
at::Tensor nms_cuda(
const at::Tensor& dets, const at::Tensor& dets,
const at::Tensor& scores, const at::Tensor& scores,
double iou_threshold) { double iou_threshold) {
...@@ -127,7 +126,7 @@ at::Tensor nms_cuda( ...@@ -127,7 +126,7 @@ at::Tensor nms_cuda(
cudaStream_t stream = at::cuda::getCurrentCUDAStream(); cudaStream_t stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
dets_sorted.scalar_type(), "nms_cuda", [&] { dets_sorted.scalar_type(), "nms_kernel", [&] {
nms_kernel_impl<scalar_t><<<blocks, threads, 0, stream>>>( nms_kernel_impl<scalar_t><<<blocks, threads, 0, stream>>>(
dets_num, dets_num,
iou_threshold, iou_threshold,
...@@ -166,5 +165,11 @@ at::Tensor nms_cuda( ...@@ -166,5 +165,11 @@ at::Tensor nms_cuda(
.to(order_t.device(), keep.scalar_type())}); .to(order_t.device(), keep.scalar_type())});
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CUDA, m) {
m.impl("nms", nms_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor nms_cuda(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);
} // namespace ops
} // namespace vision
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <torch/library.h>
#include <THC/THCAtomics.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
#include "ps_roi_align_kernel.h"
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -295,9 +296,7 @@ __global__ void ps_roi_align_backward_kernel_impl( ...@@ -295,9 +296,7 @@ __global__ void ps_roi_align_backward_kernel_impl(
} }
} }
} // namespace std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_kernel(
std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -312,7 +311,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda( ...@@ -312,7 +311,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ps_roi_align_forward_cuda"; at::CheckedFrom c = "ps_roi_align_forward_kernel";
at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameGPU(c, {input_t, rois_t});
at::checkAllSameType(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t});
...@@ -348,7 +347,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda( ...@@ -348,7 +347,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda(
auto input_ = input.contiguous(), rois_ = rois.contiguous(); auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ps_roi_align_forward_cuda", [&] { input.scalar_type(), "ps_roi_align_forward_kernel", [&] {
ps_roi_align_forward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>( ps_roi_align_forward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
output_size, output_size,
input_.data_ptr<scalar_t>(), input_.data_ptr<scalar_t>(),
...@@ -369,7 +368,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda( ...@@ -369,7 +368,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda(
return std::make_tuple(output, channel_mapping); return std::make_tuple(output, channel_mapping);
} }
at::Tensor ps_roi_align_backward_cuda( at::Tensor ps_roi_align_backward_kernel(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
...@@ -390,7 +389,7 @@ at::Tensor ps_roi_align_backward_cuda( ...@@ -390,7 +389,7 @@ at::Tensor ps_roi_align_backward_cuda(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3}; channel_mapping_t{channel_mapping, "channel_mapping", 3};
at::CheckedFrom c = "ps_roi_align_backward_cuda"; at::CheckedFrom c = "ps_roi_align_backward_kernel";
at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t}); at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t});
at::checkAllSameType(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t});
...@@ -417,7 +416,7 @@ at::Tensor ps_roi_align_backward_cuda( ...@@ -417,7 +416,7 @@ at::Tensor ps_roi_align_backward_cuda(
auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ps_roi_align_backward_cuda", [&] { grad.scalar_type(), "ps_roi_align_backward_kernel", [&] {
ps_roi_align_backward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>( ps_roi_align_backward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
grad.numel(), grad.numel(),
grad_.data_ptr<scalar_t>(), grad_.data_ptr<scalar_t>(),
...@@ -438,5 +437,12 @@ at::Tensor ps_roi_align_backward_cuda( ...@@ -438,5 +437,12 @@ at::Tensor ps_roi_align_backward_cuda(
return grad_input; return grad_input;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CUDA, m) {
m.impl("ps_roi_align", ps_roi_align_forward_kernel);
m.impl("_ps_roi_align_backward", ps_roi_align_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cuda(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio);
VISION_API at::Tensor ps_roi_align_backward_cuda(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h> #include <ATen/cuda/CUDAContext.h>
#include <c10/cuda/CUDAGuard.h> #include <c10/cuda/CUDAGuard.h>
#include <torch/library.h>
#include <THC/THCAtomics.cuh> #include <THC/THCAtomics.cuh>
#include "cuda_helpers.h" #include "cuda_helpers.h"
#include "ps_roi_pool_kernel.h"
namespace vision { namespace vision {
namespace ops { namespace ops {
...@@ -136,9 +137,7 @@ __global__ void ps_roi_pool_backward_kernel_impl( ...@@ -136,9 +137,7 @@ __global__ void ps_roi_pool_backward_kernel_impl(
} }
} }
} // namespace std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_kernel(
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -152,7 +151,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda( ...@@ -152,7 +151,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2}; at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ps_roi_pool_forward_cuda"; at::CheckedFrom c = "ps_roi_pool_forward_kernel";
at::checkAllSameGPU(c, {input_t, rois_t}); at::checkAllSameGPU(c, {input_t, rois_t});
at::checkAllSameType(c, {input_t, rois_t}); at::checkAllSameType(c, {input_t, rois_t});
...@@ -188,7 +187,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda( ...@@ -188,7 +187,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda(
auto input_ = input.contiguous(), rois_ = rois.contiguous(); auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ps_roi_pool_forward_cuda", [&] { input.scalar_type(), "ps_roi_pool_forward_kernel", [&] {
ps_roi_pool_forward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>( ps_roi_pool_forward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
output_size, output_size,
input_.data_ptr<scalar_t>(), input_.data_ptr<scalar_t>(),
...@@ -207,7 +206,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda( ...@@ -207,7 +206,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cuda(
return std::make_tuple(output, channel_mapping); return std::make_tuple(output, channel_mapping);
} }
at::Tensor ps_roi_pool_backward_cuda( at::Tensor ps_roi_pool_backward_kernel(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
...@@ -227,7 +226,7 @@ at::Tensor ps_roi_pool_backward_cuda( ...@@ -227,7 +226,7 @@ at::Tensor ps_roi_pool_backward_cuda(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2}, at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3}; channel_mapping_t{channel_mapping, "channel_mapping", 3};
at::CheckedFrom c = "ps_roi_pool_backward_cuda"; at::CheckedFrom c = "ps_roi_pool_backward_kernel";
at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t}); at::checkAllSameGPU(c, {grad_t, rois_t, channel_mapping_t});
at::checkAllSameType(c, {grad_t, rois_t}); at::checkAllSameType(c, {grad_t, rois_t});
...@@ -254,7 +253,7 @@ at::Tensor ps_roi_pool_backward_cuda( ...@@ -254,7 +253,7 @@ at::Tensor ps_roi_pool_backward_cuda(
auto grad_ = grad.contiguous(), rois_ = rois.contiguous(); auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF( AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ps_roi_pool_backward_cuda", [&] { grad.scalar_type(), "ps_roi_pool_backward_kernel", [&] {
ps_roi_pool_backward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>( ps_roi_pool_backward_kernel_impl<scalar_t><<<grid, block, 0, stream>>>(
grad.numel(), grad.numel(),
grad_.data_ptr<scalar_t>(), grad_.data_ptr<scalar_t>(),
...@@ -274,5 +273,12 @@ at::Tensor ps_roi_pool_backward_cuda( ...@@ -274,5 +273,12 @@ at::Tensor ps_roi_pool_backward_cuda(
return grad_input; return grad_input;
} }
} // namespace
TORCH_LIBRARY_IMPL(torchvision, CUDA, m) {
m.impl("ps_roi_pool", ps_roi_pool_forward_kernel);
m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_kernel);
}
} // namespace ops } // namespace ops
} // namespace vision } // namespace vision
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment