Unverified Commit 0ebbb0ab authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Encapsulate and Standardise C++ Ops (#3097)

* Encapsulate and standardize deform_conv2d (#3074)

* Rename files.

* Standardizing method names.

* Adding anonymous namespaces.

* Applying C++ naming rules and alinging variable names across headers and cpp files.

* Syncing names across implementations.

* Rename deform_conv2d.h to deform_conv2d.cpp

* Use header files:
- Create header files for kernel implementation and remove definitions from vision_*.h files.
- Eliminate unnecessary headers and ensure all cpp include their headers.

* Change the naming convention for kernel implementations.

* Remove the _param postfix from the variables and standardizing names.

* Exposing public forward/backward methods to the C++ API and moving methods around to minimize git blame changes.

* Encapsulate and standardize nms (#3081)

* Syncing, where possible, the names of functions across devices.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Update CMakeLists.txt to include all headers.

* Encapsulate and standardize ps_roi_align (#3082)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.
Syncing, where possible, the names of functions across devices.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize ps_roi_pool (#3084)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize roi_align (#3085)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize roi_pool  (#3088)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Adding all internal functions in anonymous namespaces.

* Syncing variable names between the cpp files and their header files.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize new_empty_tensor_op (#3089)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Create foreach cpp file a separate header file with "public" functions.

* Adding all internal functions in anonymous namespaces.

* Convert to const ref all possible parameters.

* Removing unnecessary repeated includes.

* Encapsulate and standardize C++ Ops - Clean up (#3094)

* Removing unnecessary repeated includes.

* Remove unnecessary vision_cpu.h, vision_cuda.h, autocast.h.

* Fixing naming convention and correcting method names on macros.

* Turn on clang formatter for cu files and fixing broken styles.

* Replace "#ifndef ... #define ... #endif" with "#pragma once" on header files.

* Adding operator methods in vision::ops namespace. (#3096)

* Adding operator methods in vision::ops namespace.

* Replace general.h with macros.h

* Adding vision.h to the necessary cpp files.
parent 8520f0be
#ifndef MOBILENET_H #pragma once
#define MOBILENET_H
#include <torch/torch.h> #include <torch/torch.h>
#include "general.h" #include "../macros.h"
namespace vision { namespace vision {
namespace models { namespace models {
...@@ -22,5 +21,3 @@ struct VISION_API MobileNetV2Impl : torch::nn::Module { ...@@ -22,5 +21,3 @@ struct VISION_API MobileNetV2Impl : torch::nn::Module {
TORCH_MODULE(MobileNetV2); TORCH_MODULE(MobileNetV2);
} // namespace models } // namespace models
} // namespace vision } // namespace vision
#endif // MOBILENET_H
#ifndef MODELS_H #pragma once
#define MODELS_H
#include "alexnet.h" #include "alexnet.h"
#include "densenet.h" #include "densenet.h"
...@@ -11,5 +10,3 @@ ...@@ -11,5 +10,3 @@
#include "shufflenetv2.h" #include "shufflenetv2.h"
#include "squeezenet.h" #include "squeezenet.h"
#include "vgg.h" #include "vgg.h"
#endif // MODELS_H
#ifndef MODELSIMPL_H #pragma once
#define MODELSIMPL_H
#include <torch/torch.h> #include <torch/torch.h>
...@@ -42,5 +41,3 @@ inline bool double_compare(double a, double b) { ...@@ -42,5 +41,3 @@ inline bool double_compare(double a, double b) {
} // namespace modelsimpl } // namespace modelsimpl
} // namespace models } // namespace models
} // namespace vision } // namespace vision
#endif // MODELSIMPL_H
#ifndef RESNET_H #pragma once
#define RESNET_H
#include <torch/torch.h> #include <torch/torch.h>
#include "general.h" #include "../macros.h"
namespace vision { namespace vision {
namespace models { namespace models {
...@@ -256,5 +255,3 @@ TORCH_MODULE(WideResNet101_2); ...@@ -256,5 +255,3 @@ TORCH_MODULE(WideResNet101_2);
} // namespace models } // namespace models
} // namespace vision } // namespace vision
#endif // RESNET_H
#ifndef SHUFFLENETV2_H #pragma once
#define SHUFFLENETV2_H
#include <torch/torch.h> #include <torch/torch.h>
#include "general.h" #include "../macros.h"
namespace vision { namespace vision {
namespace models { namespace models {
...@@ -44,5 +43,3 @@ TORCH_MODULE(ShuffleNetV2_x2_0); ...@@ -44,5 +43,3 @@ TORCH_MODULE(ShuffleNetV2_x2_0);
} // namespace models } // namespace models
} // namespace vision } // namespace vision
#endif // SHUFFLENETV2_H
#ifndef SQUEEZENET_H #pragma once
#define SQUEEZENET_H
#include <torch/torch.h> #include <torch/torch.h>
#include "general.h" #include "../macros.h"
namespace vision { namespace vision {
namespace models { namespace models {
...@@ -36,5 +35,3 @@ TORCH_MODULE(SqueezeNet1_1); ...@@ -36,5 +35,3 @@ TORCH_MODULE(SqueezeNet1_1);
} // namespace models } // namespace models
} // namespace vision } // namespace vision
#endif // SQUEEZENET_H
#ifndef VGG_H #pragma once
#define VGG_H
#include <torch/torch.h> #include <torch/torch.h>
#include "general.h" #include "../macros.h"
namespace vision { namespace vision {
namespace models { namespace models {
...@@ -89,5 +88,3 @@ TORCH_MODULE(VGG19BN); ...@@ -89,5 +88,3 @@ TORCH_MODULE(VGG19BN);
} // namespace models } // namespace models
} // namespace vision } // namespace vision
#endif // VGG_H
#pragma once #pragma once
// All pure C++ headers for the C++ frontend. #include "new_empty_tensor_op.h"
#include <torch/all.h> #include <torch/extension.h>
namespace vision {
namespace ops {
namespace {
class NewEmptyTensorOp : public torch::autograd::Function<NewEmptyTensorOp> { class NewEmptyTensorOp : public torch::autograd::Function<NewEmptyTensorOp> {
public: public:
static torch::autograd::variable_list forward( static torch::autograd::variable_list forward(
torch::autograd::AutogradContext* ctx, torch::autograd::AutogradContext* ctx,
torch::autograd::Variable input, const torch::autograd::Variable& input,
c10::List<int64_t> new_shape) { const c10::List<int64_t>& new_shape) {
ctx->saved_data["shape"] = input.sizes(); ctx->saved_data["shape"] = input.sizes();
std::vector<int64_t> shape(new_shape.begin(), new_shape.end()); std::vector<int64_t> shape(new_shape.begin(), new_shape.end());
return {input.new_empty(shape, at::TensorOptions())}; return {input.new_empty(shape, at::TensorOptions())};
...@@ -16,7 +21,7 @@ class NewEmptyTensorOp : public torch::autograd::Function<NewEmptyTensorOp> { ...@@ -16,7 +21,7 @@ class NewEmptyTensorOp : public torch::autograd::Function<NewEmptyTensorOp> {
static torch::autograd::variable_list backward( static torch::autograd::variable_list backward(
torch::autograd::AutogradContext* ctx, torch::autograd::AutogradContext* ctx,
torch::autograd::variable_list grad_output) { const torch::autograd::variable_list& grad_output) {
// Use data saved in forward // Use data saved in forward
auto shape = ctx->saved_data["shape"].toIntList(); auto shape = ctx->saved_data["shape"].toIntList();
auto out = forward(ctx, grad_output[0], shape); auto out = forward(ctx, grad_output[0], shape);
...@@ -24,6 +29,13 @@ class NewEmptyTensorOp : public torch::autograd::Function<NewEmptyTensorOp> { ...@@ -24,6 +29,13 @@ class NewEmptyTensorOp : public torch::autograd::Function<NewEmptyTensorOp> {
} }
}; };
at::Tensor new_empty_tensor(const at::Tensor& input, c10::List<int64_t> shape) { } // namespace
at::Tensor new_empty_tensor(
const at::Tensor& input,
const c10::List<int64_t>& shape) {
return NewEmptyTensorOp::apply(input, shape)[0]; return NewEmptyTensorOp::apply(input, shape)[0];
} }
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
namespace vision {
namespace ops {
at::Tensor new_empty_tensor(
const at::Tensor& input,
const c10::List<int64_t>& shape);
} // namespace ops
} // namespace vision
#include "nms.h"
#include <torch/extension.h>
#if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#endif
namespace vision {
namespace ops {
at::Tensor nms(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
static auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("torchvision::nms", "")
.typed<decltype(nms)>();
return op.call(dets, scores, iou_threshold);
}
#if defined(WITH_CUDA) || defined(WITH_HIP)
at::Tensor nms_autocast(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
return nms(
at::autocast::cached_cast(at::kFloat, dets),
at::autocast::cached_cast(at::kFloat, scores),
iou_threshold);
}
#endif
} // namespace ops
} // namespace vision
#pragma once #pragma once
#include "cpu/vision_cpu.h" #include "cpu/nms_kernel.h"
#ifdef WITH_CUDA #ifdef WITH_CUDA
#include "autocast.h" #include "cuda/nms_kernel.h"
#include "cuda/vision_cuda.h"
#endif #endif
#ifdef WITH_HIP #ifdef WITH_HIP
#include "autocast.h" #include "hip/nms_kernel.h"
#include "hip/vision_cuda.h"
#endif #endif
// nms dispatch nexus namespace vision {
namespace ops {
// C++ Forward
at::Tensor nms( at::Tensor nms(
const at::Tensor& dets, const at::Tensor& dets,
const at::Tensor& scores, const at::Tensor& scores,
double iou_threshold) { double iou_threshold);
static auto op = c10::Dispatcher::singleton()
.findSchemaOrThrow("torchvision::nms", "")
.typed<decltype(nms)>();
return op.call(dets, scores, iou_threshold);
}
// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
at::Tensor nms_autocast( at::Tensor nms_autocast(
const at::Tensor& dets, const at::Tensor& dets,
const at::Tensor& scores, const at::Tensor& scores,
double iou_threshold) { double iou_threshold);
c10::impl::ExcludeDispatchKeyGuard no_autocast(c10::DispatchKey::Autocast);
return nms(
at::autocast::cached_cast(at::kFloat, dets),
at::autocast::cached_cast(at::kFloat, scores),
iou_threshold);
}
#endif #endif
} // namespace ops
} // namespace vision
#pragma once #include "ps_roi_align.h"
#include <torch/extension.h>
#include "cpu/vision_cpu.h" #if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#endif #endif
#include <iostream> namespace vision {
namespace ops {
// TODO: put this stuff in torchvision namespace
std::tuple<at::Tensor, at::Tensor> ps_roi_align( std::tuple<at::Tensor, at::Tensor> ps_roi_align(
const at::Tensor& input, const at::Tensor& input,
...@@ -30,7 +23,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align( ...@@ -30,7 +23,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_align(
} }
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> PSROIAlign_autocast( std::tuple<at::Tensor, at::Tensor> ps_roi_align_autocast(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -82,6 +75,8 @@ at::Tensor _ps_roi_align_backward( ...@@ -82,6 +75,8 @@ at::Tensor _ps_roi_align_backward(
width); width);
} }
namespace {
class PSROIAlignFunction class PSROIAlignFunction
: public torch::autograd::Function<PSROIAlignFunction> { : public torch::autograd::Function<PSROIAlignFunction> {
public: public:
...@@ -186,7 +181,9 @@ class PSROIAlignBackwardFunction ...@@ -186,7 +181,9 @@ class PSROIAlignBackwardFunction
} }
}; };
std::tuple<at::Tensor, at::Tensor> PSROIAlign_autograd( } // namespace
std::tuple<at::Tensor, at::Tensor> ps_roi_align_autograd(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -199,7 +196,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_autograd( ...@@ -199,7 +196,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_autograd(
return std::make_tuple(result[0], result[1]); return std::make_tuple(result[0], result[1]);
} }
at::Tensor PSROIAlign_backward_autograd( at::Tensor ps_roi_align_backward_autograd(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
...@@ -224,3 +221,6 @@ at::Tensor PSROIAlign_backward_autograd( ...@@ -224,3 +221,6 @@ at::Tensor PSROIAlign_backward_autograd(
height, height,
width)[0]; width)[0];
} }
} // namespace ops
} // namespace vision
#pragma once #pragma once
#include <torch/extension.h>
#include "../macros.h"
VISION_API at::Tensor DeformConv2d_forward_cpu( #include "cpu/ps_roi_align_kernel.h"
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t groups,
int64_t deformable_groups,
bool use_mask);
VISION_API std:: #ifdef WITH_CUDA
tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor> #include "cuda/ps_roi_align_kernel.h"
DeformConv2d_backward_cpu( #endif
const at::Tensor& grad_out, #ifdef WITH_HIP
const at::Tensor& input, #include "hip/ps_roi_align_kernel.h"
const at::Tensor& weight, #endif
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t groups,
int64_t deformable_groups,
bool use_mask);
VISION_API at::Tensor nms_cpu( namespace vision {
const at::Tensor& dets, namespace ops {
const at::Tensor& scores,
double iou_threshold);
VISION_API std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu( // C++ Forward
std::tuple<at::Tensor, at::Tensor> ps_roi_align(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -50,75 +21,52 @@ VISION_API std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu( ...@@ -50,75 +21,52 @@ VISION_API std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu(
int64_t pooled_width, int64_t pooled_width,
int64_t sampling_ratio); int64_t sampling_ratio);
VISION_API at::Tensor PSROIAlign_backward_cpu( // Autocast Forward
const at::Tensor& grad, #if defined(WITH_CUDA) || defined(WITH_HIP)
const at::Tensor& rois, std::tuple<at::Tensor, at::Tensor> ps_roi_align_autocast(
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
VISION_API std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
int64_t pooled_height, int64_t pooled_height,
int64_t pooled_width); int64_t pooled_width,
int64_t sampling_ratio);
#endif
VISION_API at::Tensor PSROIPool_backward_cpu( // C++ Backward
at::Tensor _ps_roi_align_backward(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
double spatial_scale, double spatial_scale,
int64_t pooled_height, int64_t pooled_height,
int64_t pooled_width, int64_t pooled_width,
int64_t sampling_ratio,
int64_t batch_size, int64_t batch_size,
int64_t channels, int64_t channels,
int64_t height, int64_t height,
int64_t width); int64_t width);
VISION_API at::Tensor ROIAlign_forward_cpu( // Autograd Forward and Backward
std::tuple<at::Tensor, at::Tensor> ps_roi_align_autograd(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
int64_t pooled_height, int64_t pooled_height,
int64_t pooled_width, int64_t pooled_width,
int64_t sampling_ratio, int64_t sampling_ratio);
bool aligned);
VISION_API at::Tensor ROIAlign_backward_cpu( at::Tensor ps_roi_align_backward_autograd(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale, double spatial_scale,
int64_t pooled_height, int64_t pooled_height,
int64_t pooled_width, int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width,
int64_t sampling_ratio, int64_t sampling_ratio,
bool aligned);
VISION_API std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
VISION_API at::Tensor ROIPool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size, int64_t batch_size,
int64_t channels, int64_t channels,
int64_t height, int64_t height,
int64_t width); int64_t width);
} // namespace ops
} // namespace vision
#pragma once #include "ps_roi_pool.h"
#include <torch/extension.h>
#include "cpu/vision_cpu.h" #if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#endif #endif
// TODO: put this stuff in torchvision namespace namespace vision {
namespace ops {
std::tuple<at::Tensor, at::Tensor> ps_roi_pool( std::tuple<at::Tensor, at::Tensor> ps_roi_pool(
const at::Tensor& input, const at::Tensor& input,
...@@ -26,7 +21,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool( ...@@ -26,7 +21,7 @@ std::tuple<at::Tensor, at::Tensor> ps_roi_pool(
} }
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> PSROIPool_autocast( std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autocast(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -74,6 +69,8 @@ at::Tensor _ps_roi_pool_backward( ...@@ -74,6 +69,8 @@ at::Tensor _ps_roi_pool_backward(
width); width);
} }
namespace {
class PSROIPoolFunction : public torch::autograd::Function<PSROIPoolFunction> { class PSROIPoolFunction : public torch::autograd::Function<PSROIPoolFunction> {
public: public:
static torch::autograd::variable_list forward( static torch::autograd::variable_list forward(
...@@ -166,7 +163,9 @@ class PSROIPoolBackwardFunction ...@@ -166,7 +163,9 @@ class PSROIPoolBackwardFunction
} }
}; };
std::tuple<at::Tensor, at::Tensor> PSROIPool_autograd( } // namespace
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autograd(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -178,7 +177,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_autograd( ...@@ -178,7 +177,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_autograd(
return std::make_tuple(result[0], result[1]); return std::make_tuple(result[0], result[1]);
} }
at::Tensor PSROIPool_backward_autograd( at::Tensor ps_roi_pool_backward_autograd(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& channel_mapping, const at::Tensor& channel_mapping,
...@@ -201,3 +200,6 @@ at::Tensor PSROIPool_backward_autograd( ...@@ -201,3 +200,6 @@ at::Tensor PSROIPool_backward_autograd(
height, height,
width)[0]; width)[0];
} }
} // namespace ops
} // namespace vision
#pragma once
#include "cpu/ps_roi_pool_kernel.h"
#ifdef WITH_CUDA
#include "cuda/ps_roi_pool_kernel.h"
#endif
#ifdef WITH_HIP
#include "hip/ps_roi_pool_kernel.h"
#endif
namespace vision {
namespace ops {
// C++ Forward
std::tuple<at::Tensor, at::Tensor> ps_roi_pool(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
#endif
// C++ Backward
at::Tensor _ps_roi_pool_backward(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
// Autograd Forward and Backward
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
at::Tensor ps_roi_pool_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#pragma once #include "roi_align.h"
#include <torch/extension.h>
#include "cpu/vision_cpu.h" #if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#endif #endif
// TODO: put this stuff in torchvision namespace namespace vision {
namespace ops {
// roi_align dispatch nexus
at::Tensor roi_align( at::Tensor roi_align(
const at::Tensor& input, // Input feature map. const at::Tensor& input, // Input feature map.
const at::Tensor& rois, // List of ROIs to pool over. const at::Tensor& rois, // List of ROIs to pool over.
...@@ -39,7 +33,7 @@ at::Tensor roi_align( ...@@ -39,7 +33,7 @@ at::Tensor roi_align(
} }
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
at::Tensor ROIAlign_autocast( at::Tensor roi_align_autocast(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -90,6 +84,8 @@ at::Tensor _roi_align_backward( ...@@ -90,6 +84,8 @@ at::Tensor _roi_align_backward(
aligned); aligned);
} }
namespace {
class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> { class ROIAlignFunction : public torch::autograd::Function<ROIAlignFunction> {
public: public:
static torch::autograd::variable_list forward( static torch::autograd::variable_list forward(
...@@ -189,7 +185,9 @@ class ROIAlignBackwardFunction ...@@ -189,7 +185,9 @@ class ROIAlignBackwardFunction
} }
}; };
at::Tensor ROIAlign_autograd( } // namespace
at::Tensor roi_align_autograd(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -207,7 +205,7 @@ at::Tensor ROIAlign_autograd( ...@@ -207,7 +205,7 @@ at::Tensor ROIAlign_autograd(
aligned)[0]; aligned)[0];
} }
at::Tensor ROIAlign_backward_autograd( at::Tensor roi_align_backward_autograd(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -232,3 +230,6 @@ at::Tensor ROIAlign_backward_autograd( ...@@ -232,3 +230,6 @@ at::Tensor ROIAlign_backward_autograd(
sampling_ratio, sampling_ratio,
aligned)[0]; aligned)[0];
} }
} // namespace ops
} // namespace vision
#pragma once
#include "cpu/roi_align_kernel.h"
#ifdef WITH_CUDA
#include "cuda/roi_align_kernel.h"
#endif
#ifdef WITH_HIP
#include "hip/roi_align_kernel.h"
#endif
namespace vision {
namespace ops {
// C++ Forward
at::Tensor roi_align(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
bool aligned);
// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP)
at::Tensor roi_align_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
bool aligned);
#endif
// C++ Backward
at::Tensor _roi_align_backward(
const at::Tensor& grad,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width,
int64_t sampling_ratio,
bool aligned);
// Autograd Forward and Backward
at::Tensor roi_align_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
bool aligned);
at::Tensor roi_align_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width,
int64_t sampling_ratio,
bool aligned);
} // namespace ops
} // namespace vision
#pragma once #include "roi_pool.h"
#include <torch/extension.h>
#include "cpu/vision_cpu.h" #if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#ifdef WITH_CUDA
#include "autocast.h"
#include "cuda/vision_cuda.h"
#endif
#ifdef WITH_HIP
#include "autocast.h"
#include "hip/vision_cuda.h"
#endif #endif
// TODO: put this stuff in torchvision namespace namespace vision {
namespace ops {
std::tuple<at::Tensor, at::Tensor> roi_pool( std::tuple<at::Tensor, at::Tensor> roi_pool(
const at::Tensor& input, const at::Tensor& input,
...@@ -26,7 +21,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool( ...@@ -26,7 +21,7 @@ std::tuple<at::Tensor, at::Tensor> roi_pool(
} }
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> ROIPool_autocast( std::tuple<at::Tensor, at::Tensor> roi_pool_autocast(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -73,6 +68,8 @@ at::Tensor _roi_pool_backward( ...@@ -73,6 +68,8 @@ at::Tensor _roi_pool_backward(
width); width);
} }
namespace {
class ROIPoolFunction : public torch::autograd::Function<ROIPoolFunction> { class ROIPoolFunction : public torch::autograd::Function<ROIPoolFunction> {
public: public:
static torch::autograd::variable_list forward( static torch::autograd::variable_list forward(
...@@ -165,7 +162,9 @@ class ROIPoolBackwardFunction ...@@ -165,7 +162,9 @@ class ROIPoolBackwardFunction
} }
}; };
std::tuple<at::Tensor, at::Tensor> ROIPool_autograd( } // namespace
std::tuple<at::Tensor, at::Tensor> roi_pool_autograd(
const at::Tensor& input, const at::Tensor& input,
const at::Tensor& rois, const at::Tensor& rois,
double spatial_scale, double spatial_scale,
...@@ -177,7 +176,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_autograd( ...@@ -177,7 +176,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_autograd(
return std::make_tuple(result[0], result[1]); return std::make_tuple(result[0], result[1]);
} }
at::Tensor ROIPool_backward_autograd( at::Tensor roi_pool_backward_autograd(
const at::Tensor& grad, const at::Tensor& grad,
const at::Tensor& rois, const at::Tensor& rois,
const at::Tensor& argmax, const at::Tensor& argmax,
...@@ -200,3 +199,6 @@ at::Tensor ROIPool_backward_autograd( ...@@ -200,3 +199,6 @@ at::Tensor ROIPool_backward_autograd(
height, height,
width)[0]; width)[0];
} }
} // namespace ops
} // namespace vision
#pragma once
#include "cpu/roi_pool_kernel.h"
#ifdef WITH_CUDA
#include "cuda/roi_pool_kernel.h"
#endif
#ifdef WITH_HIP
#include "hip/roi_pool_kernel.h"
#endif
namespace vision {
namespace ops {
// C++ Forward
std::tuple<at::Tensor, at::Tensor> roi_pool(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
// Autocast Forward
#if defined(WITH_CUDA) || defined(WITH_HIP)
std::tuple<at::Tensor, at::Tensor> roi_pool_autocast(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
#endif
// C++ Backward
at::Tensor _roi_pool_backward(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
// Autograd Forward and Backward
std::tuple<at::Tensor, at::Tensor> roi_pool_autograd(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
at::Tensor roi_pool_backward_autograd(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#include "vision.h"
#include <Python.h> #include <Python.h>
#include <torch/script.h> #include <torch/script.h>
...@@ -8,13 +10,13 @@ ...@@ -8,13 +10,13 @@
#include <hip/hip_runtime.h> #include <hip/hip_runtime.h>
#endif #endif
#include "DeformConv.h" #include "deform_conv2d.h"
#include "PSROIAlign.h" #include "new_empty_tensor_op.h"
#include "PSROIPool.h"
#include "ROIAlign.h"
#include "ROIPool.h"
#include "empty_tensor_op.h"
#include "nms.h" #include "nms.h"
#include "ps_roi_align.h"
#include "ps_roi_pool.h"
#include "roi_align.h"
#include "roi_pool.h"
// If we are in a Windows environment, we need to define // If we are in a Windows environment, we need to define
// initialization functions for the _custom_ops extension // initialization functions for the _custom_ops extension
...@@ -35,6 +37,8 @@ int64_t cuda_version() { ...@@ -35,6 +37,8 @@ int64_t cuda_version() {
} }
} // namespace vision } // namespace vision
using namespace vision::ops;
TORCH_LIBRARY(torchvision, m) { TORCH_LIBRARY(torchvision, m) {
m.def( m.def(
"deform_conv2d(Tensor input, Tensor weight, Tensor offset, Tensor mask, Tensor bias, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int groups, int offset_groups, bool use_mask) -> Tensor"); "deform_conv2d(Tensor input, Tensor weight, Tensor offset, Tensor mask, Tensor bias, int stride_h, int stride_w, int pad_h, int pad_w, int dilation_h, int dilation_w, int groups, int offset_groups, bool use_mask) -> Tensor");
...@@ -62,57 +66,57 @@ TORCH_LIBRARY(torchvision, m) { ...@@ -62,57 +66,57 @@ TORCH_LIBRARY(torchvision, m) {
} }
TORCH_LIBRARY_IMPL(torchvision, CPU, m) { TORCH_LIBRARY_IMPL(torchvision, CPU, m) {
m.impl("deform_conv2d", DeformConv2d_forward_cpu); m.impl("deform_conv2d", deform_conv2d_forward_cpu);
m.impl("_deform_conv2d_backward", DeformConv2d_backward_cpu); m.impl("_deform_conv2d_backward", deform_conv2d_backward_cpu);
m.impl("nms", nms_cpu); m.impl("nms", nms_cpu);
m.impl("ps_roi_align", PSROIAlign_forward_cpu); m.impl("ps_roi_align", ps_roi_align_forward_cpu);
m.impl("_ps_roi_align_backward", PSROIAlign_backward_cpu); m.impl("_ps_roi_align_backward", ps_roi_align_backward_cpu);
m.impl("ps_roi_pool", PSROIPool_forward_cpu); m.impl("ps_roi_pool", ps_roi_pool_forward_cpu);
m.impl("_ps_roi_pool_backward", PSROIPool_backward_cpu); m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cpu);
m.impl("roi_align", ROIAlign_forward_cpu); m.impl("roi_align", roi_align_forward_cpu);
m.impl("_roi_align_backward", ROIAlign_backward_cpu); m.impl("_roi_align_backward", roi_align_backward_cpu);
m.impl("roi_pool", ROIPool_forward_cpu); m.impl("roi_pool", roi_pool_forward_cpu);
m.impl("_roi_pool_backward", ROIPool_backward_cpu); m.impl("_roi_pool_backward", roi_pool_backward_cpu);
} }
// TODO: Place this in a hypothetical separate torchvision_cuda library // TODO: Place this in a hypothetical separate torchvision_cuda library
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
TORCH_LIBRARY_IMPL(torchvision, CUDA, m) { TORCH_LIBRARY_IMPL(torchvision, CUDA, m) {
m.impl("deform_conv2d", DeformConv2d_forward_cuda); m.impl("deform_conv2d", deform_conv2d_forward_cuda);
m.impl("_deform_conv2d_backward", DeformConv2d_backward_cuda); m.impl("_deform_conv2d_backward", deform_conv2d_backward_cuda);
m.impl("nms", nms_cuda); m.impl("nms", nms_cuda);
m.impl("ps_roi_align", PSROIAlign_forward_cuda); m.impl("ps_roi_align", ps_roi_align_forward_cuda);
m.impl("_ps_roi_align_backward", PSROIAlign_backward_cuda); m.impl("_ps_roi_align_backward", ps_roi_align_backward_cuda);
m.impl("ps_roi_pool", PSROIPool_forward_cuda); m.impl("ps_roi_pool", ps_roi_pool_forward_cuda);
m.impl("_ps_roi_pool_backward", PSROIPool_backward_cuda); m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_cuda);
m.impl("roi_align", ROIAlign_forward_cuda); m.impl("roi_align", roi_align_forward_cuda);
m.impl("_roi_align_backward", ROIAlign_backward_cuda); m.impl("_roi_align_backward", roi_align_backward_cuda);
m.impl("roi_pool", ROIPool_forward_cuda); m.impl("roi_pool", roi_pool_forward_cuda);
m.impl("_roi_pool_backward", ROIPool_backward_cuda); m.impl("_roi_pool_backward", roi_pool_backward_cuda);
} }
#endif #endif
// Autocast only needs to wrap forward pass ops. // Autocast only needs to wrap forward pass ops.
#if defined(WITH_CUDA) || defined(WITH_HIP) #if defined(WITH_CUDA) || defined(WITH_HIP)
TORCH_LIBRARY_IMPL(torchvision, Autocast, m) { TORCH_LIBRARY_IMPL(torchvision, Autocast, m) {
m.impl("deform_conv2d", DeformConv2d_autocast); m.impl("deform_conv2d", deform_conv2d_autocast);
m.impl("nms", nms_autocast); m.impl("nms", nms_autocast);
m.impl("ps_roi_align", PSROIAlign_autocast); m.impl("ps_roi_align", ps_roi_align_autocast);
m.impl("ps_roi_pool", PSROIPool_autocast); m.impl("ps_roi_pool", ps_roi_pool_autocast);
m.impl("roi_align", ROIAlign_autocast); m.impl("roi_align", roi_align_autocast);
m.impl("roi_pool", ROIPool_autocast); m.impl("roi_pool", roi_pool_autocast);
} }
#endif #endif
TORCH_LIBRARY_IMPL(torchvision, Autograd, m) { TORCH_LIBRARY_IMPL(torchvision, Autograd, m) {
m.impl("deform_conv2d", DeformConv2d_autograd); m.impl("deform_conv2d", deform_conv2d_autograd);
m.impl("_deform_conv2d_backward", DeformConv2d_backward_autograd); m.impl("_deform_conv2d_backward", deform_conv2d_backward_autograd);
m.impl("ps_roi_align", PSROIAlign_autograd); m.impl("ps_roi_align", ps_roi_align_autograd);
m.impl("_ps_roi_align_backward", PSROIAlign_backward_autograd); m.impl("_ps_roi_align_backward", ps_roi_align_backward_autograd);
m.impl("ps_roi_pool", PSROIPool_autograd); m.impl("ps_roi_pool", ps_roi_pool_autograd);
m.impl("_ps_roi_pool_backward", PSROIPool_backward_autograd); m.impl("_ps_roi_pool_backward", ps_roi_pool_backward_autograd);
m.impl("roi_align", ROIAlign_autograd); m.impl("roi_align", roi_align_autograd);
m.impl("_roi_align_backward", ROIAlign_backward_autograd); m.impl("_roi_align_backward", roi_align_backward_autograd);
m.impl("roi_pool", ROIPool_autograd); m.impl("roi_pool", roi_pool_autograd);
m.impl("_roi_pool_backward", ROIPool_backward_autograd); m.impl("_roi_pool_backward", roi_pool_backward_autograd);
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment