Unverified Commit 0ebbb0ab authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

Encapsulate and Standardise C++ Ops (#3097)

* Encapsulate and standardize deform_conv2d (#3074)

* Rename files.

* Standardizing method names.

* Adding anonymous namespaces.

* Applying C++ naming rules and alinging variable names across headers and cpp files.

* Syncing names across implementations.

* Rename deform_conv2d.h to deform_conv2d.cpp

* Use header files:
- Create header files for kernel implementation and remove definitions from vision_*.h files.
- Eliminate unnecessary headers and ensure all cpp include their headers.

* Change the naming convention for kernel implementations.

* Remove the _param postfix from the variables and standardizing names.

* Exposing public forward/backward methods to the C++ API and moving methods around to minimize git blame changes.

* Encapsulate and standardize nms (#3081)

* Syncing, where possible, the names of functions across devices.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Update CMakeLists.txt to include all headers.

* Encapsulate and standardize ps_roi_align (#3082)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.
Syncing, where possible, the names of functions across devices.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize ps_roi_pool (#3084)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize roi_align (#3085)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Adding all internal functions in anonymous namespaces.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize roi_pool  (#3088)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Adding all internal functions in anonymous namespaces.

* Syncing variable names between the cpp files and their header files.

* Renaming C++/CUDA kernel files and moving operator code from header to cpp file.

* Create foreach cpp file a separate header file with "public" functions.

* Removing unnecessary repeated includes.

* Encapsulate and standardize new_empty_tensor_op (#3089)

* Renaming C++ files & methods according to recommended naming conventions and aligning them with Python's API.

* Create foreach cpp file a separate header file with "public" functions.

* Adding all internal functions in anonymous namespaces.

* Convert to const ref all possible parameters.

* Removing unnecessary repeated includes.

* Encapsulate and standardize C++ Ops - Clean up (#3094)

* Removing unnecessary repeated includes.

* Remove unnecessary vision_cpu.h, vision_cuda.h, autocast.h.

* Fixing naming convention and correcting method names on macros.

* Turn on clang formatter for cu files and fixing broken styles.

* Replace "#ifndef ... #define ... #endif" with "#pragma once" on header files.

* Adding operator methods in vision::ops namespace. (#3096)

* Adding operator methods in vision::ops namespace.

* Replace general.h with macros.h

* Adding vision.h to the necessary cpp files.
parent 8520f0be
......@@ -32,9 +32,11 @@ file(GLOB HEADERS torchvision/csrc/*.h)
# Image extension
file(GLOB IMAGE_HEADERS torchvision/csrc/cpu/image/*.h)
file(GLOB IMAGE_SOURCES torchvision/csrc/cpu/image/*.cpp)
file(GLOB OPERATOR_SOURCES torchvision/csrc/cpu/*.h torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp)
file(GLOB OPERATOR_HEADERS torchvision/csrc/cpu/*.h)
file(GLOB OPERATOR_SOURCES ${OPERATOR_HEADERS} torchvision/csrc/cpu/*.cpp ${IMAGE_HEADERS} ${IMAGE_SOURCES} ${HEADERS} torchvision/csrc/*.cpp)
if(WITH_CUDA)
file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} torchvision/csrc/cuda/*.h torchvision/csrc/cuda/*.cu)
file(GLOB OPERATOR_HEADERS ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.h)
file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} ${OPERATOR_HEADERS} torchvision/csrc/cuda/*.cu)
endif()
file(GLOB MODELS_HEADERS torchvision/csrc/models/*.h)
file(GLOB MODELS_SOURCES torchvision/csrc/models/*.h torchvision/csrc/models/*.cpp)
......@@ -95,11 +97,11 @@ install(EXPORT TorchVisionTargets
install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
install(FILES
torchvision/csrc/cpu/vision_cpu.h
${OPERATOR_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu)
if(WITH_CUDA)
install(FILES
torchvision/csrc/cuda/vision_cuda.h
${OPERATOR_HEADERS}
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda)
endif()
install(FILES ${MODELS_HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/models)
......@@ -152,8 +152,8 @@ def get_extensions():
)
source_cuda = glob.glob(os.path.join(extensions_dir, 'hip', '*.hip'))
# Copy over additional files
shutil.copy("torchvision/csrc/cuda/cuda_helpers.h", "torchvision/csrc/hip/cuda_helpers.h")
shutil.copy("torchvision/csrc/cuda/vision_cuda.h", "torchvision/csrc/hip/vision_cuda.h")
for file in glob.glob(r"torchvision/csrc/cuda/*.h"):
shutil.copy(file, "torchvision/csrc/hip")
else:
source_cuda = glob.glob(os.path.join(extensions_dir, 'cuda', '*.cu'))
......
#include <ATen/ATen.h>
#include <torch/script.h>
#include <torch/torch.h>
#include <torchvision/ROIAlign.h>
#include <torchvision/cpu/vision_cpu.h>
#include <torchvision/roi_align.h>
#include <torchvision/nms.h>
#ifdef _WIN32
// Windows only
// This is necessary until operators are automatically registered on include
static auto _nms = &nms_cpu;
static auto _nms = &vision::ops::nms_cpu;
#endif
int main() {
......
#pragma once
#if defined(WITH_CUDA) || defined(WITH_HIP)
#include <ATen/autocast_mode.h>
#endif
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor deform_conv2d_forward_cpu(
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
VISION_API std::
tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor>
deform_conv2d_backward_cpu(
const at::Tensor& grad_out,
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
} // namespace ops
} // namespace vision
#include "vision_cpu.h"
#include "nms_kernel.h"
namespace vision {
namespace ops {
namespace {
template <typename scalar_t>
at::Tensor nms_cpu_kernel(
at::Tensor nms_kernel_impl(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold) {
......@@ -69,6 +74,8 @@ at::Tensor nms_cpu_kernel(
return keep_t.narrow(/*dim=*/0, /*start=*/0, /*length=*/num_to_keep);
}
} // namespace
at::Tensor nms_cpu(
const at::Tensor& dets,
const at::Tensor& scores,
......@@ -94,8 +101,11 @@ at::Tensor nms_cpu(
auto result = at::empty({0}, dets.options());
AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms", [&] {
result = nms_cpu_kernel<scalar_t>(dets, scores, iou_threshold);
AT_DISPATCH_FLOATING_TYPES(dets.scalar_type(), "nms_cpu", [&] {
result = nms_kernel_impl<scalar_t>(dets, scores, iou_threshold);
});
return result;
}
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor nms_cpu(
const at::Tensor& dets,
const at::Tensor& scores,
double iou_threshold);
} // namespace ops
} // namespace vision
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <TH/TH.h>
#include "ps_roi_align_kernel.h"
namespace vision {
namespace ops {
namespace {
template <typename T>
T bilinear_interpolate(
......@@ -57,7 +60,7 @@ T bilinear_interpolate(
}
template <typename T>
void PSROIAlignForwardCPU(
void ps_roi_align_forward_kernel_impl(
int nthreads,
const T* input,
const T spatial_scale,
......@@ -202,7 +205,7 @@ inline void add(T* address, const T& val) {
}
template <typename T>
void PSROIAlignBackwardCPU(
void ps_roi_align_backward_kernel_impl(
int nthreads,
const T* grad_output,
const int* channel_mapping,
......@@ -298,7 +301,9 @@ void PSROIAlignBackwardCPU(
}
}
std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu(
} // namespace
std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
......@@ -313,7 +318,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "PSROIAlign_forward_cpu";
at::CheckedFrom c = "ps_roi_align_forward_cpu";
at::checkAllSameType(c, {input_t, rois_t});
int num_rois = rois.size(0);
......@@ -338,8 +343,8 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "PSROIAlign_forward", [&] {
PSROIAlignForwardCPU<scalar_t>(
input.scalar_type(), "ps_roi_align_forward_cpu", [&] {
ps_roi_align_forward_kernel_impl<scalar_t>(
output_size,
input_.data_ptr<scalar_t>(),
spatial_scale,
......@@ -357,7 +362,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIAlign_forward_cpu(
return std::make_tuple(output, channel_mapping);
}
at::Tensor PSROIAlign_backward_cpu(
at::Tensor ps_roi_align_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
......@@ -379,7 +384,7 @@ at::Tensor PSROIAlign_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3};
at::CheckedFrom c = "PSROIAlign_backward_cpu";
at::CheckedFrom c = "ps_roi_align_backward_cpu";
at::checkAllSameType(c, {grad_t, rois_t});
auto num_rois = rois.size(0);
......@@ -395,8 +400,8 @@ at::Tensor PSROIAlign_backward_cpu(
auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "PSROIAlign_backward", [&] {
PSROIAlignBackwardCPU<scalar_t>(
grad.scalar_type(), "ps_roi_align_backward_cpu", [&] {
ps_roi_align_backward_kernel_impl<scalar_t>(
grad.numel(),
grad_.data_ptr<scalar_t>(),
channel_mapping.data_ptr<int>(),
......@@ -414,3 +419,6 @@ at::Tensor PSROIAlign_backward_cpu(
});
return grad_input;
}
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_align_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio);
VISION_API at::Tensor ps_roi_align_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <TH/TH.h>
#include <algorithm>
#include "ps_roi_pool_kernel.h"
namespace vision {
namespace ops {
namespace {
template <class T>
inline void add(T* address, const T& val) {
......@@ -9,7 +11,7 @@ inline void add(T* address, const T& val) {
}
template <typename T>
void PSROIPoolForward(
void ps_roi_pool_forward_kernel_impl(
const T* input,
const T spatial_scale,
int channels,
......@@ -79,7 +81,7 @@ void PSROIPoolForward(
}
template <typename T>
void PSROIPoolBackward(
void ps_roi_pool_backward_kernel_impl(
const T* grad_output,
const int* channel_mapping,
int num_rois,
......@@ -143,7 +145,9 @@ void PSROIPoolBackward(
}
}
std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
} // namespace
std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
......@@ -157,7 +161,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "PSROIPool_forward_cpu";
at::CheckedFrom c = "ps_roi_pool_forward_cpu";
at::checkAllSameType(c, {input_t, rois_t});
int num_rois = rois.size(0);
......@@ -182,8 +186,8 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "PSROIPool_forward", [&] {
PSROIPoolForward<scalar_t>(
input.scalar_type(), "ps_roi_pool_forward_cpu", [&] {
ps_roi_pool_forward_kernel_impl<scalar_t>(
input_.data_ptr<scalar_t>(),
spatial_scale,
channels,
......@@ -200,7 +204,7 @@ std::tuple<at::Tensor, at::Tensor> PSROIPool_forward_cpu(
return std::make_tuple(output, channel_mapping);
}
at::Tensor PSROIPool_backward_cpu(
at::Tensor ps_roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
......@@ -221,7 +225,7 @@ at::Tensor PSROIPool_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2},
channel_mapping_t{channel_mapping, "channel_mapping", 3};
at::CheckedFrom c = "PSROIPool_backward_cpu";
at::CheckedFrom c = "ps_roi_pool_backward_cpu";
at::checkAllSameType(c, {grad_t, rois_t});
auto num_rois = rois.size(0);
......@@ -237,8 +241,8 @@ at::Tensor PSROIPool_backward_cpu(
auto grad_ = grad.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "PSROIPool_backward", [&] {
PSROIPoolBackward<scalar_t>(
grad.scalar_type(), "ps_roi_pool_backward_cpu", [&] {
ps_roi_pool_backward_kernel_impl<scalar_t>(
grad_.data_ptr<scalar_t>(),
channel_mapping.data_ptr<int>(),
num_rois,
......@@ -254,3 +258,6 @@ at::Tensor PSROIPool_backward_cpu(
});
return grad_input;
}
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> ps_roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
VISION_API at::Tensor ps_roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& channel_mapping,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#include <ATen/TensorUtils.h>
#include "vision_cpu.h"
#include "roi_align_kernel.h"
namespace vision {
namespace ops {
namespace {
// implementation taken from Caffe2
template <typename T>
......@@ -111,7 +115,7 @@ void pre_calc_for_bilinear_interpolate(
}
template <typename T>
void ROIAlignForward(
void roi_align_forward_kernel_impl(
int nthreads,
const T* input,
const T& spatial_scale,
......@@ -277,7 +281,7 @@ inline void add(T* address, const T& val) {
}
template <typename T>
void ROIAlignBackward(
void roi_align_backward_kernel_impl(
int nthreads,
const T* grad_output,
const T& spatial_scale,
......@@ -382,9 +386,11 @@ void ROIAlignBackward(
} // ix
} // iy
} // for
} // ROIAlignBackward
}
at::Tensor ROIAlign_forward_cpu(
} // namespace
at::Tensor roi_align_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
......@@ -398,7 +404,7 @@ at::Tensor ROIAlign_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ROIAlign_forward_cpu";
at::CheckedFrom c = "roi_align_forward_cpu";
at::checkAllSameType(c, {input_t, rois_t});
auto num_rois = rois.size(0);
......@@ -416,8 +422,8 @@ at::Tensor ROIAlign_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ROIAlign_forward", [&] {
ROIAlignForward<scalar_t>(
input.scalar_type(), "roi_align_forward_cpu", [&] {
roi_align_forward_kernel_impl<scalar_t>(
output_size,
input_.data_ptr<scalar_t>(),
spatial_scale,
......@@ -434,7 +440,7 @@ at::Tensor ROIAlign_forward_cpu(
return output;
}
at::Tensor ROIAlign_backward_cpu(
at::Tensor roi_align_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
double spatial_scale,
......@@ -451,7 +457,7 @@ at::Tensor ROIAlign_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ROIAlign_backward_cpu";
at::CheckedFrom c = "roi_align_backward_cpu";
at::checkAllSameType(c, {grad_t, rois_t});
at::Tensor grad_input =
......@@ -470,8 +476,8 @@ at::Tensor ROIAlign_backward_cpu(
auto rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ROIAlign_forward", [&] {
ROIAlignBackward<scalar_t>(
grad.scalar_type(), "roi_align_backward_cpu", [&] {
roi_align_backward_kernel_impl<scalar_t>(
grad.numel(),
grad.data_ptr<scalar_t>(),
spatial_scale,
......@@ -491,3 +497,6 @@ at::Tensor ROIAlign_backward_cpu(
});
return grad_input;
}
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor roi_align_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t sampling_ratio,
bool aligned);
VISION_API at::Tensor roi_align_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width,
int64_t sampling_ratio,
bool aligned);
} // namespace ops
} // namespace vision
#include <ATen/ATen.h>
#include <ATen/TensorUtils.h>
#include <TH/TH.h>
#include <algorithm>
#include <float.h>
#include "roi_pool_kernel.h"
namespace vision {
namespace ops {
namespace {
template <class T>
inline void add(T* address, const T& val) {
......@@ -9,7 +13,7 @@ inline void add(T* address, const T& val) {
}
template <typename T>
void RoIPoolForward(
void roi_pool_forward_kernel_impl(
const T* input,
const T spatial_scale,
int channels,
......@@ -78,7 +82,7 @@ void RoIPoolForward(
}
template <typename T>
void RoIPoolBackward(
void roi_pool_backward_kernel_impl(
const T* grad_output,
const int* argmax_data,
int num_rois,
......@@ -120,7 +124,9 @@ void RoIPoolBackward(
} // num_rois
}
std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
} // namespace
std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
......@@ -131,7 +137,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
at::TensorArg input_t{input, "input", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ROIPool_forward_cpu";
at::CheckedFrom c = "roi_pool_forward_cpu";
at::checkAllSameType(c, {input_t, rois_t});
int num_rois = rois.size(0);
......@@ -151,8 +157,8 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
auto input_ = input.contiguous(), rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
input.scalar_type(), "ROIPool_forward", [&] {
RoIPoolForward<scalar_t>(
input.scalar_type(), "roi_pool_forward_cpu", [&] {
roi_pool_forward_kernel_impl<scalar_t>(
input_.data_ptr<scalar_t>(),
spatial_scale,
channels,
......@@ -168,7 +174,7 @@ std::tuple<at::Tensor, at::Tensor> ROIPool_forward_cpu(
return std::make_tuple(output, argmax);
}
at::Tensor ROIPool_backward_cpu(
at::Tensor roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
......@@ -188,7 +194,7 @@ at::Tensor ROIPool_backward_cpu(
at::TensorArg grad_t{grad, "grad", 1}, rois_t{rois, "rois", 2};
at::CheckedFrom c = "ROIPool_backward_cpu";
at::CheckedFrom c = "roi_pool_backward_cpu";
at::checkAllSameType(c, {grad_t, rois_t});
auto num_rois = rois.size(0);
......@@ -209,8 +215,8 @@ at::Tensor ROIPool_backward_cpu(
auto rois_ = rois.contiguous();
AT_DISPATCH_FLOATING_TYPES_AND_HALF(
grad.scalar_type(), "ROIPool_backward", [&] {
RoIPoolBackward<scalar_t>(
grad.scalar_type(), "roi_pool_backward_cpu", [&] {
roi_pool_backward_kernel_impl<scalar_t>(
grad.data_ptr<scalar_t>(),
argmax.data_ptr<int>(),
num_rois,
......@@ -228,3 +234,6 @@ at::Tensor ROIPool_backward_cpu(
});
return grad_input;
}
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API std::tuple<at::Tensor, at::Tensor> roi_pool_forward_cpu(
const at::Tensor& input,
const at::Tensor& rois,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width);
VISION_API at::Tensor roi_pool_backward_cpu(
const at::Tensor& grad,
const at::Tensor& rois,
const at::Tensor& argmax,
double spatial_scale,
int64_t pooled_height,
int64_t pooled_width,
int64_t batch_size,
int64_t channels,
int64_t height,
int64_t width);
} // namespace ops
} // namespace vision
#ifndef REGISTER_H
#define REGISTER_H
#pragma once
#include "Video.h"
......@@ -15,4 +14,3 @@ static auto registerVideo =
.def("next", &Video::Next);
} // namespace
#endif
#pragma once
namespace vision {
namespace ops {
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = (blockIdx.x * blockDim.x) + threadIdx.x; i < (n); \
i += (blockDim.x * gridDim.x))
......@@ -8,3 +11,6 @@ template <typename integer>
constexpr __host__ __device__ inline integer ceil_div(integer n, integer m) {
return (n + m - 1) / m;
}
} // namespace ops
} // namespace vision
#pragma once
#include <ATen/ATen.h>
#include "../macros.h"
namespace vision {
namespace ops {
VISION_API at::Tensor deform_conv2d_forward_cuda(
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
VISION_API std::
tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor>
deform_conv2d_backward_cuda(
const at::Tensor& grad_out,
const at::Tensor& input,
const at::Tensor& weight,
const at::Tensor& offset,
const at::Tensor& mask,
const at::Tensor& bias,
int64_t stride_h,
int64_t stride_w,
int64_t pad_h,
int64_t pad_w,
int64_t dilation_h,
int64_t dilation_w,
int64_t n_weight_grps,
int64_t n_offset_grps,
bool use_mask);
} // namespace ops
} // namespace vision
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment