"vscode:/vscode.git/clone" did not exist on "a1b9cb2a3469e4e682e741af9a0f91e16923205d"
Commit 6f3c5f1c authored by limm's avatar limm
Browse files

support v1.4.0

parent 6f674c7e
...@@ -2,19 +2,36 @@ ...@@ -2,19 +2,36 @@
// https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp // https://github.com/sshaoshuai/Pointnet2.PyTorch/tree/master/pointnet2/src/ball_query.cpp
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void ball_query_forward_impl(int b, int n, int m, float min_radius, #ifdef MMCV_WITH_CUDA
void BallQueryForwardCUDAKernelLauncher(int b, int n, int m, float min_radius,
float max_radius, int nsample,
const Tensor new_xyz, const Tensor xyz,
Tensor idx);
void ball_query_forward_cuda(int b, int n, int m, float min_radius,
float max_radius, int nsample, float max_radius, int nsample,
const Tensor new_xyz, const Tensor xyz, const Tensor new_xyz, const Tensor xyz,
Tensor idx) { Tensor idx) {
DISPATCH_DEVICE_IMPL(ball_query_forward_impl, b, n, m, min_radius, max_radius, BallQueryForwardCUDAKernelLauncher(b, n, m, min_radius, max_radius, nsample,
nsample, new_xyz, xyz, idx); new_xyz, xyz, idx);
} };
#endif
void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor, void ball_query_forward(Tensor new_xyz_tensor, Tensor xyz_tensor,
Tensor idx_tensor, int b, int n, int m, Tensor idx_tensor, int b, int n, int m,
float min_radius, float max_radius, int nsample) { float min_radius, float max_radius, int nsample) {
ball_query_forward_impl(b, n, m, min_radius, max_radius, nsample, if (new_xyz_tensor.device().is_cuda()) {
new_xyz_tensor, xyz_tensor, idx_tensor); #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(new_xyz_tensor);
CHECK_CUDA_INPUT(xyz_tensor);
ball_query_forward_cuda(b, n, m, min_radius, max_radius, nsample,
new_xyz_tensor, xyz_tensor, idx_tensor);
#else
AT_ERROR("ball_query is not compiled with GPU support");
#endif
} else {
AT_ERROR("ball_query is not implemented on CPU");
}
} }
// Copyright (c) OpenMMLab. All rights reserved // Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void bbox_overlaps_impl(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, #ifdef MMCV_WITH_CUDA
void BBoxOverlapsCUDAKernelLauncher(const Tensor bboxes1, const Tensor bboxes2,
Tensor ious, const int mode,
const bool aligned, const int offset);
void bbox_overlaps_cuda(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
const int mode, const bool aligned, const int offset) { const int mode, const bool aligned, const int offset) {
DISPATCH_DEVICE_IMPL(bbox_overlaps_impl, bboxes1, bboxes2, ious, mode, BBoxOverlapsCUDAKernelLauncher(bboxes1, bboxes2, ious, mode, aligned, offset);
aligned, offset);
} }
#endif
void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious, void bbox_overlaps(const Tensor bboxes1, const Tensor bboxes2, Tensor ious,
const int mode, const bool aligned, const int offset) { const int mode, const bool aligned, const int offset) {
bbox_overlaps_impl(bboxes1, bboxes2, ious, mode, aligned, offset); if (bboxes1.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(bboxes1);
CHECK_CUDA_INPUT(bboxes2);
CHECK_CUDA_INPUT(ious);
bbox_overlaps_cuda(bboxes1, bboxes2, ious, mode, aligned, offset);
#else
AT_ERROR("bbox_overlaps is not compiled with GPU support");
#endif
} else {
AT_ERROR("bbox_overlaps is not implemented on CPU");
}
} }
// Copyright (c) OpenMMLab. All rights reserved // Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void border_align_forward_impl(const Tensor &input, const Tensor &boxes, #ifdef MMCV_WITH_CUDA
void BorderAlignForwardCUDAKernelLauncher(const Tensor &input,
const Tensor &boxes, Tensor output,
Tensor argmax_idx,
const int pool_size);
void BorderAlignBackwardCUDAKernelLauncher(const Tensor &grad_output,
const Tensor &boxes,
const Tensor &argmax_idx,
Tensor grad_input,
const int pool_size);
void border_align_forward_cuda(const Tensor &input, const Tensor &boxes,
Tensor output, Tensor argmax_idx, Tensor output, Tensor argmax_idx,
const int pool_size) { const int pool_size) {
DISPATCH_DEVICE_IMPL(border_align_forward_impl, input, boxes, output, BorderAlignForwardCUDAKernelLauncher(input, boxes, output, argmax_idx,
argmax_idx, pool_size); pool_size);
} }
void border_align_backward_impl(const Tensor &grad_output, const Tensor &boxes, void border_align_backward_cuda(const Tensor &grad_output, const Tensor &boxes,
const Tensor &argmax_idx, Tensor grad_input, const Tensor &argmax_idx, Tensor grad_input,
const int pool_size) { const int pool_size) {
DISPATCH_DEVICE_IMPL(border_align_backward_impl, grad_output, boxes, BorderAlignBackwardCUDAKernelLauncher(grad_output, boxes, argmax_idx,
argmax_idx, grad_input, pool_size); grad_input, pool_size);
} }
#endif
void border_align_forward(const Tensor &input, const Tensor &boxes, void border_align_forward(const Tensor &input, const Tensor &boxes,
Tensor output, Tensor argmax_idx, Tensor output, Tensor argmax_idx,
const int pool_size) { const int pool_size) {
border_align_forward_impl(input, boxes, output, argmax_idx, pool_size); if (input.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(input);
CHECK_CUDA_INPUT(boxes);
CHECK_CUDA_INPUT(output);
CHECK_CUDA_INPUT(argmax_idx);
border_align_forward_cuda(input, boxes, output, argmax_idx, pool_size);
#else
AT_ERROR("BorderAlign is not compiled with GPU support");
#endif
} else {
AT_ERROR("BorderAlign is not implemented on CPU");
}
} }
void border_align_backward(const Tensor &grad_output, const Tensor &boxes, void border_align_backward(const Tensor &grad_output, const Tensor &boxes,
const Tensor &argmax_idx, Tensor grad_input, const Tensor &argmax_idx, Tensor grad_input,
const int pool_size) { const int pool_size) {
border_align_backward_impl(grad_output, boxes, argmax_idx, grad_input, if (grad_output.device().is_cuda()) {
pool_size); #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(grad_output);
CHECK_CUDA_INPUT(boxes);
CHECK_CUDA_INPUT(argmax_idx);
CHECK_CUDA_INPUT(grad_input);
border_align_backward_cuda(grad_output, boxes, argmax_idx, grad_input,
pool_size);
#else
AT_ERROR("BorderAlign is not compiled with GPU support");
#endif
} else {
AT_ERROR("BorderAlign is not implemented on CPU");
}
} }
...@@ -7,7 +7,6 @@ ...@@ -7,7 +7,6 @@
using namespace parrots; using namespace parrots;
#ifdef MMCV_WITH_CUDA
void border_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr, void border_align_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins, const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) { OperatorBase::out_list_t& outs) {
...@@ -50,4 +49,3 @@ PARROTS_EXTENSION_REGISTER(border_align_backward) ...@@ -50,4 +49,3 @@ PARROTS_EXTENSION_REGISTER(border_align_backward)
.output(1) .output(1)
.apply(border_align_backward_cuda_parrots) .apply(border_align_backward_cuda_parrots)
.done(); .done();
#endif
...@@ -2,18 +2,28 @@ ...@@ -2,18 +2,28 @@
// modified from // modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h // https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated.h
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void box_iou_rotated_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious, void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned) { const int mode_flag, const bool aligned);
DISPATCH_DEVICE_IMPL(box_iou_rotated_impl, boxes1, boxes2, ious, mode_flag,
aligned); #ifdef MMCV_WITH_CUDA
} void box_iou_rotated_cuda(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned);
#endif
// Interface for Python // Interface for Python
// inline is needed to prevent multiple function definitions when this header is // inline is needed to prevent multiple function definitions when this header is
// included by different cpps // included by different cpps
void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious, void box_iou_rotated(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned) { const int mode_flag, const bool aligned) {
box_iou_rotated_impl(boxes1, boxes2, ious, mode_flag, aligned); assert(boxes1.device().is_cuda() == boxes2.device().is_cuda());
if (boxes1.device().is_cuda()) {
#ifdef MMCV_WITH_CUDA
box_iou_rotated_cuda(boxes1, boxes2, ious, mode_flag, aligned);
#else
AT_ERROR("Not compiled with GPU support");
#endif
} else {
box_iou_rotated_cpu(boxes1, boxes2, ious, mode_flag, aligned);
}
} }
// Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved // Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
// modified from
// https://github.com/facebookresearch/detectron2/blob/master/detectron2/layers/csrc/box_iou_rotated/box_iou_rotated_cpu.cpp
#include "box_iou_rotated_utils.hpp" #include "box_iou_rotated_utils.hpp"
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
template <typename T> template <typename T>
void box_iou_quadri_cpu_kernel(const Tensor boxes1, const Tensor boxes2, void box_iou_rotated_cpu_kernel(const Tensor boxes1, const Tensor boxes2,
Tensor ious, const int mode_flag, Tensor ious, const int mode_flag,
const bool aligned) { const bool aligned) {
int output_size = ious.numel(); int output_size = ious.numel();
auto num_boxes1 = boxes1.size(0); auto num_boxes1 = boxes1.size(0);
auto num_boxes2 = boxes2.size(0); auto num_boxes2 = boxes2.size(0);
if (aligned) { if (aligned) {
for (int i = 0; i < output_size; i++) { for (int i = 0; i < output_size; i++) {
ious[i] = single_box_iou_quadri<T>(boxes1[i].data_ptr<T>(), ious[i] = single_box_iou_rotated<T>(boxes1[i].data_ptr<T>(),
boxes2[i].data_ptr<T>(), mode_flag); boxes2[i].data_ptr<T>(), mode_flag);
} }
} else { } else {
for (int i = 0; i < num_boxes1; i++) { for (int i = 0; i < num_boxes1; i++) {
for (int j = 0; j < num_boxes2; j++) { for (int j = 0; j < num_boxes2; j++) {
ious[i * num_boxes2 + j] = single_box_iou_quadri<T>( ious[i * num_boxes2 + j] = single_box_iou_rotated<T>(
boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>(), mode_flag); boxes1[i].data_ptr<T>(), boxes2[j].data_ptr<T>(), mode_flag);
} }
} }
} }
} }
void box_iou_quadri_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious, void box_iou_rotated_cpu(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned) { const int mode_flag, const bool aligned) {
box_iou_quadri_cpu_kernel<float>(boxes1, boxes2, ious, mode_flag, aligned); box_iou_rotated_cpu_kernel<float>(boxes1, boxes2, ious, mode_flag, aligned);
} }
void box_iou_quadri_impl(const Tensor boxes1, const Tensor boxes2, Tensor ious,
const int mode_flag, const bool aligned);
REGISTER_DEVICE_IMPL(box_iou_quadri_impl, CPU, box_iou_quadri_cpu);
// Copyright (c) OpenMMLab. All rights reserved // Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void carafe_forward_impl(Tensor features, Tensor masks, Tensor rfeatures, #ifdef MMCV_WITH_CUDA
void CARAFEForwardCUDAKernelLauncher(const Tensor features, const Tensor masks,
Tensor rfeatures, Tensor routput,
Tensor rmasks, Tensor output,
const int kernel_size,
const int group_size,
const int scale_factor);
void CARAFEBackwardCUDAKernelLauncher(
const Tensor top_grad, const Tensor rfeatures, const Tensor masks,
Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rbottom_grad,
Tensor rmask_grad, Tensor bottom_grad, Tensor mask_grad,
const int kernel_size, const int group_size, const int scale_factor);
void carafe_forward_cuda(Tensor features, Tensor masks, Tensor rfeatures,
Tensor routput, Tensor rmasks, Tensor output, Tensor routput, Tensor rmasks, Tensor output,
int kernel_size, int group_size, int scale_factor) { int kernel_size, int group_size, int scale_factor) {
DISPATCH_DEVICE_IMPL(carafe_forward_impl, features, masks, rfeatures, routput, CARAFEForwardCUDAKernelLauncher(features, masks, rfeatures, routput, rmasks,
rmasks, output, kernel_size, group_size, scale_factor); output, kernel_size, group_size,
scale_factor);
} }
void carafe_backward_impl(Tensor top_grad, Tensor rfeatures, Tensor masks, void carafe_backward_cuda(Tensor top_grad, Tensor rfeatures, Tensor masks,
Tensor rtop_grad, Tensor rbottom_grad_hs, Tensor rtop_grad, Tensor rbottom_grad_hs,
Tensor rbottom_grad, Tensor rmask_grad, Tensor rbottom_grad, Tensor rmask_grad,
Tensor bottom_grad, Tensor mask_grad, int kernel_size, Tensor bottom_grad, Tensor mask_grad, int kernel_size,
int group_size, int scale_factor) { int group_size, int scale_factor) {
DISPATCH_DEVICE_IMPL(carafe_backward_impl, top_grad, rfeatures, masks, CARAFEBackwardCUDAKernelLauncher(top_grad, rfeatures, masks, rtop_grad,
rtop_grad, rbottom_grad_hs, rbottom_grad, rmask_grad, rbottom_grad_hs, rbottom_grad, rmask_grad,
bottom_grad, mask_grad, kernel_size, group_size, bottom_grad, mask_grad, kernel_size,
scale_factor); group_size, scale_factor);
} }
#endif
void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures, void carafe_forward(Tensor features, Tensor masks, Tensor rfeatures,
Tensor routput, Tensor rmasks, Tensor output, Tensor routput, Tensor rmasks, Tensor output,
int kernel_size, int group_size, int scale_factor) { int kernel_size, int group_size, int scale_factor) {
carafe_forward_impl(features, masks, rfeatures, routput, rmasks, output, if (features.device().is_cuda()) {
kernel_size, group_size, scale_factor); #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(features);
CHECK_CUDA_INPUT(masks);
CHECK_CUDA_INPUT(rfeatures);
CHECK_CUDA_INPUT(routput);
CHECK_CUDA_INPUT(rmasks);
CHECK_CUDA_INPUT(output);
carafe_forward_cuda(features, masks, rfeatures, routput, rmasks, output,
kernel_size, group_size, scale_factor);
#else
AT_ERROR("Carafe is not compiled with GPU support");
#endif
} else {
AT_ERROR("Carafe is not implemented on CPU");
}
} }
void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
...@@ -32,7 +61,24 @@ void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks, ...@@ -32,7 +61,24 @@ void carafe_backward(Tensor top_grad, Tensor rfeatures, Tensor masks,
Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad, Tensor rbottom_grad, Tensor rmask_grad, Tensor bottom_grad,
Tensor mask_grad, int kernel_size, int group_size, Tensor mask_grad, int kernel_size, int group_size,
int scale_factor) { int scale_factor) {
carafe_backward_impl(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs, if (top_grad.device().is_cuda()) {
rbottom_grad, rmask_grad, bottom_grad, mask_grad, #ifdef MMCV_WITH_CUDA
kernel_size, group_size, scale_factor); CHECK_CUDA_INPUT(top_grad);
CHECK_CUDA_INPUT(rfeatures);
CHECK_CUDA_INPUT(masks);
CHECK_CUDA_INPUT(rtop_grad);
CHECK_CUDA_INPUT(rbottom_grad_hs);
CHECK_CUDA_INPUT(rbottom_grad);
CHECK_CUDA_INPUT(rmask_grad);
CHECK_CUDA_INPUT(bottom_grad);
CHECK_CUDA_INPUT(mask_grad);
carafe_backward_cuda(top_grad, rfeatures, masks, rtop_grad, rbottom_grad_hs,
rbottom_grad, rmask_grad, bottom_grad, mask_grad,
kernel_size, group_size, scale_factor);
#else
AT_ERROR("Carafe is not compiled with GPU support");
#endif
} else {
AT_ERROR("Carafe is not implemented on CPU");
}
} }
// Copyright (c) OpenMMLab. All rights reserved // Copyright (c) OpenMMLab. All rights reserved
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void carafe_naive_forward_impl(Tensor features, Tensor masks, Tensor output, #ifdef MMCV_WITH_CUDA
void CARAFENAIVEForwardCUDAKernelLauncher(const Tensor features,
const Tensor masks, Tensor output,
const int kernel_size,
const int group_size,
const int scale_factor);
void CARAFENAIVEBackwardCUDAKernelLauncher(
const Tensor top_grad, const Tensor features, const Tensor masks,
Tensor bottom_grad, Tensor mask_grad, const int kernel_size,
const int group_size, const int scale_factor);
void carafe_naive_forward_cuda(Tensor features, Tensor masks, Tensor output,
int kernel_size, int group_size, int kernel_size, int group_size,
int scale_factor) { int scale_factor) {
DISPATCH_DEVICE_IMPL(carafe_naive_forward_impl, features, masks, output, CARAFENAIVEForwardCUDAKernelLauncher(features, masks, output, kernel_size,
kernel_size, group_size, scale_factor); group_size, scale_factor);
} }
void carafe_naive_backward_impl(Tensor top_grad, Tensor features, Tensor masks, void carafe_naive_backward_cuda(Tensor top_grad, Tensor features, Tensor masks,
Tensor bottom_grad, Tensor mask_grad, Tensor bottom_grad, Tensor mask_grad,
int kernel_size, int group_size, int kernel_size, int group_size,
int scale_factor) { int scale_factor) {
DISPATCH_DEVICE_IMPL(carafe_naive_backward_impl, top_grad, features, masks, CARAFENAIVEBackwardCUDAKernelLauncher(top_grad, features, masks, bottom_grad,
bottom_grad, mask_grad, kernel_size, group_size, mask_grad, kernel_size, group_size,
scale_factor); scale_factor);
} }
#endif
void carafe_naive_forward(Tensor features, Tensor masks, Tensor output, void carafe_naive_forward(Tensor features, Tensor masks, Tensor output,
int kernel_size, int group_size, int scale_factor) { int kernel_size, int group_size, int scale_factor) {
carafe_naive_forward_impl(features, masks, output, kernel_size, group_size, if (features.device().is_cuda()) {
scale_factor); #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(features);
CHECK_CUDA_INPUT(masks);
CHECK_CUDA_INPUT(output);
carafe_naive_forward_cuda(features, masks, output, kernel_size, group_size,
scale_factor);
#else
AT_ERROR("CarafeNaive is not compiled with GPU support");
#endif
} else {
AT_ERROR("CarafeNaive is not implemented on CPU");
}
} }
void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks, void carafe_naive_backward(Tensor top_grad, Tensor features, Tensor masks,
Tensor bottom_grad, Tensor mask_grad, Tensor bottom_grad, Tensor mask_grad,
int kernel_size, int group_size, int scale_factor) { int kernel_size, int group_size, int scale_factor) {
carafe_naive_backward_impl(top_grad, features, masks, bottom_grad, mask_grad, if (top_grad.device().is_cuda()) {
kernel_size, group_size, scale_factor); #ifdef MMCV_WITH_CUDA
CHECK_CUDA_INPUT(top_grad);
CHECK_CUDA_INPUT(features);
CHECK_CUDA_INPUT(masks);
CHECK_CUDA_INPUT(bottom_grad);
CHECK_CUDA_INPUT(mask_grad);
carafe_naive_backward_cuda(top_grad, features, masks, bottom_grad,
mask_grad, kernel_size, group_size,
scale_factor);
#else
AT_ERROR("CarafeNaive is not compiled with GPU support");
#endif
} else {
AT_ERROR("CarafeNaive is not implemented on CPU");
}
} }
// Copyright (c) OpenMMLab. All rights reserved.
// Modified from
// https://github.com/chrdiller/pyTorchChamferDistance/blob/master/chamfer_distance/chamfer_distance.cpp
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void chamfer_distance_forward_impl(const Tensor xyz1, const Tensor xyz2,
const Tensor dist1, const Tensor dist2,
const Tensor idx1, const Tensor idx2) {
DISPATCH_DEVICE_IMPL(chamfer_distance_forward_impl, xyz1, xyz2, dist1, dist2,
idx1, idx2);
}
void chamfer_distance_backward_impl(const Tensor xyz1, const Tensor xyz2,
Tensor idx1, Tensor idx2, Tensor graddist1,
Tensor graddist2, Tensor gradxyz1,
Tensor gradxyz2) {
DISPATCH_DEVICE_IMPL(chamfer_distance_backward_impl, xyz1, xyz2, idx1, idx2,
graddist1, graddist2, gradxyz1, gradxyz2);
}
void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
const Tensor dist1, const Tensor dist2,
const Tensor idx1, const Tensor idx2) {
chamfer_distance_forward_impl(xyz1, xyz2, dist1, dist2, idx1, idx2);
}
void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
Tensor idx1, Tensor idx2, Tensor graddist1,
Tensor graddist2, Tensor gradxyz1,
Tensor gradxyz2) {
chamfer_distance_backward_impl(xyz1, xyz2, idx1, idx2, graddist1, graddist2,
gradxyz1, gradxyz2);
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "chamfer_distance_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void chamfer_distance_forward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto xyz1 = buildATensor(ctx, ins[0]);
auto xyz2 = buildATensor(ctx, ins[1]);
auto dist1 = buildATensor(ctx, outs[0]);
auto dist2 = buildATensor(ctx, outs[1]);
auto idx1 = buildATensor(ctx, outs[2]);
auto idx2 = buildATensor(ctx, outs[3]);
chamfer_distance_forward(xyz1, xyz2, dist1, dist2, idx1, idx2);
}
void chamfer_distance_backward_cuda_parrots(CudaContext& ctx,
const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto xyz1 = buildATensor(ctx, ins[0]);
auto xyz2 = buildATensor(ctx, ins[1]);
auto idx1 = buildATensor(ctx, ins[2]);
auto idx2 = buildATensor(ctx, ins[3]);
auto graddist1 = buildATensor(ctx, ins[4]);
auto graddist2 = buildATensor(ctx, ins[5]);
auto gradxyz1 = buildATensor(ctx, outs[0]);
auto gradxyz2 = buildATensor(ctx, outs[1]);
chamfer_distance_backward(xyz1, xyz2, idx1, idx2, graddist1, graddist2,
gradxyz1, gradxyz2);
}
PARROTS_EXTENSION_REGISTER(chamfer_distance_forward)
.input(2)
.output(4)
.apply(chamfer_distance_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(chamfer_distance_backward)
.input(6)
.output(2)
.apply(chamfer_distance_backward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
#define ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void chamfer_distance_forward(const Tensor xyz1, const Tensor xyz2,
const Tensor dist1, const Tensor dist2,
const Tensor idx1, const Tensor idx);
void chamfer_distance_backward(const Tensor xyz1, const Tensor xyz2,
Tensor idx1, Tensor idx2, Tensor graddist1,
Tensor graddist2, Tensor gradxyz1,
Tensor gradxyz2);
#endif // ACTIVE_CHAMFER_DISTANCE_PYTORCH_H
...@@ -102,6 +102,7 @@ std::vector<std::vector<int>> contour_expand(Tensor kernel_mask, ...@@ -102,6 +102,7 @@ std::vector<std::vector<int>> contour_expand(Tensor kernel_mask,
IntArrayRef data_shape = kernel_mask.sizes(); IntArrayRef data_shape = kernel_mask.sizes();
auto data_label_map = internal_kernel_label.data_ptr<int32_t>(); auto data_label_map = internal_kernel_label.data_ptr<int32_t>();
IntArrayRef label_map_shape = internal_kernel_label.sizes();
vector<vector<int>> text_line; vector<vector<int>> text_line;
kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num, kernel_dilate(ptr_data, data_shape, data_label_map, kernel_num,
......
// Copyright (c) OpenMMLab. All rights reserved
// modified from
// https://github.com/SDL-GuoZonghao/BeyondBoundingBox/tree/main/mmdet/ops/iou/src
#include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void convex_iou_impl(const Tensor pointsets, const Tensor polygons,
Tensor ious) {
DISPATCH_DEVICE_IMPL(convex_iou_impl, pointsets, polygons, ious);
}
void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious) {
convex_iou_impl(pointsets, polygons, ious);
}
void convex_giou_impl(const Tensor pointsets, const Tensor polygons,
Tensor output) {
DISPATCH_DEVICE_IMPL(convex_giou_impl, pointsets, polygons, output);
}
void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output) {
convex_giou_impl(pointsets, polygons, output);
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "convex_iou_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void convex_iou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto pointsets = buildATensor(ctx, ins[0]);
auto polygons = buildATensor(ctx, ins[1]);
auto ious = buildATensor(ctx, outs[0]);
convex_iou(pointsets, polygons, ious);
}
void convex_giou_forward_cuda_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
auto pointsets = buildATensor(ctx, ins[0]);
auto polygons = buildATensor(ctx, ins[1]);
auto output = buildATensor(ctx, outs[0]);
convex_giou(pointsets, polygons, output);
}
PARROTS_EXTENSION_REGISTER(convex_iou)
.input(2)
.output(1)
.apply(convex_iou_forward_cuda_parrots)
.done();
PARROTS_EXTENSION_REGISTER(convex_giou)
.input(2)
.output(1)
.apply(convex_giou_forward_cuda_parrots)
.done();
#endif
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CONVEX_IOU_PYTORCH_H
#define CONVEX_IOU_PYTORCH_H
#include <torch/extension.h>
using namespace at;
void convex_iou(const Tensor pointsets, const Tensor polygons, Tensor ious);
void convex_giou(const Tensor pointsets, const Tensor polygons, Tensor output);
#endif // RIROI_ALIGN_ROTATED_PYTORCH_H
// Copyright (c) OpenMMLab. All rights reserved
// Modified from
// https://github.com/princeton-vl/CornerNet-Lite/tree/master/core/models/py_utils/_cpools/src
#include "pytorch_cpp_helper.hpp"
Tensor bottom_pool_forward(Tensor input) {
// Initialize output
Tensor output = at::zeros_like(input);
// Get height
int64_t height = input.size(2);
output.copy_(input);
for (int64_t ind = 1; ind < height; ind <<= 1) {
Tensor max_temp = at::slice(output, 2, ind, height);
Tensor cur_temp = at::slice(output, 2, ind, height).clone();
Tensor next_temp = at::slice(output, 2, 0, height - ind).clone();
at::max_out(max_temp, cur_temp, next_temp);
}
return output;
}
Tensor bottom_pool_backward(Tensor input, Tensor grad_output) {
auto output = at::zeros_like(input);
int32_t batch = input.size(0);
int32_t channel = input.size(1);
int32_t height = input.size(2);
int32_t width = input.size(3);
auto max_val = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kFloat));
auto max_ind = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kLong));
auto input_temp = input.select(2, 0);
max_val.copy_(input_temp);
max_ind.fill_(0);
auto output_temp = output.select(2, 0);
auto grad_output_temp = grad_output.select(2, 0);
output_temp.copy_(grad_output_temp);
auto un_max_ind = max_ind.unsqueeze(2);
auto gt_mask = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kBool));
auto max_temp = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kFloat));
for (int32_t ind = 0; ind < height - 1; ++ind) {
input_temp = input.select(2, ind + 1);
at::gt_out(gt_mask, input_temp, max_val);
at::masked_select_out(max_temp, input_temp, gt_mask);
max_val.masked_scatter_(gt_mask, max_temp);
max_ind.masked_fill_(gt_mask, ind + 1);
grad_output_temp = grad_output.select(2, ind + 1).unsqueeze(2);
output.scatter_add_(2, un_max_ind, grad_output_temp);
}
return output;
}
Tensor left_pool_forward(Tensor input) {
// Initialize output
Tensor output = at::zeros_like(input);
// Get width
int64_t width = input.size(3);
output.copy_(input);
for (int64_t ind = 1; ind < width; ind <<= 1) {
Tensor max_temp = at::slice(output, 3, 0, width - ind);
Tensor cur_temp = at::slice(output, 3, 0, width - ind).clone();
Tensor next_temp = at::slice(output, 3, ind, width).clone();
at::max_out(max_temp, cur_temp, next_temp);
}
return output;
}
Tensor left_pool_backward(Tensor input, Tensor grad_output) {
auto output = at::zeros_like(input);
int32_t batch = input.size(0);
int32_t channel = input.size(1);
int32_t height = input.size(2);
int32_t width = input.size(3);
auto max_val = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kFloat));
auto max_ind = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kLong));
auto input_temp = input.select(3, width - 1);
max_val.copy_(input_temp);
max_ind.fill_(width - 1);
auto output_temp = output.select(3, width - 1);
auto grad_output_temp = grad_output.select(3, width - 1);
output_temp.copy_(grad_output_temp);
auto un_max_ind = max_ind.unsqueeze(3);
auto gt_mask = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kBool));
auto max_temp = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kFloat));
for (int32_t ind = 1; ind < width; ++ind) {
input_temp = input.select(3, width - ind - 1);
at::gt_out(gt_mask, input_temp, max_val);
at::masked_select_out(max_temp, input_temp, gt_mask);
max_val.masked_scatter_(gt_mask, max_temp);
max_ind.masked_fill_(gt_mask, width - ind - 1);
grad_output_temp = grad_output.select(3, width - ind - 1).unsqueeze(3);
output.scatter_add_(3, un_max_ind, grad_output_temp);
}
return output;
}
Tensor right_pool_forward(Tensor input) {
// Initialize output
Tensor output = at::zeros_like(input);
// Get width
int64_t width = input.size(3);
output.copy_(input);
for (int64_t ind = 1; ind < width; ind <<= 1) {
Tensor max_temp = at::slice(output, 3, ind, width);
Tensor cur_temp = at::slice(output, 3, ind, width).clone();
Tensor next_temp = at::slice(output, 3, 0, width - ind).clone();
at::max_out(max_temp, cur_temp, next_temp);
}
return output;
}
Tensor right_pool_backward(Tensor input, Tensor grad_output) {
Tensor output = at::zeros_like(input);
int32_t batch = input.size(0);
int32_t channel = input.size(1);
int32_t height = input.size(2);
int32_t width = input.size(3);
auto max_val = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kFloat));
auto max_ind = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kLong));
auto input_temp = input.select(3, 0);
max_val.copy_(input_temp);
max_ind.fill_(0);
auto output_temp = output.select(3, 0);
auto grad_output_temp = grad_output.select(3, 0);
output_temp.copy_(grad_output_temp);
auto un_max_ind = max_ind.unsqueeze(3);
auto gt_mask = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kBool));
auto max_temp = torch::zeros({batch, channel, height},
at::device(at::kCUDA).dtype(at::kFloat));
for (int32_t ind = 0; ind < width - 1; ++ind) {
input_temp = input.select(3, ind + 1);
at::gt_out(gt_mask, input_temp, max_val);
at::masked_select_out(max_temp, input_temp, gt_mask);
max_val.masked_scatter_(gt_mask, max_temp);
max_ind.masked_fill_(gt_mask, ind + 1);
grad_output_temp = grad_output.select(3, ind + 1).unsqueeze(3);
output.scatter_add_(3, un_max_ind, grad_output_temp);
}
return output;
}
Tensor top_pool_forward(Tensor input) {
// Initialize output
Tensor output = at::zeros_like(input);
// Get height
int64_t height = input.size(2);
output.copy_(input);
for (int64_t ind = 1; ind < height; ind <<= 1) {
Tensor max_temp = at::slice(output, 2, 0, height - ind);
Tensor cur_temp = at::slice(output, 2, 0, height - ind).clone();
Tensor next_temp = at::slice(output, 2, ind, height).clone();
at::max_out(max_temp, cur_temp, next_temp);
}
return output;
}
Tensor top_pool_backward(Tensor input, Tensor grad_output) {
auto output = at::zeros_like(input);
int32_t batch = input.size(0);
int32_t channel = input.size(1);
int32_t height = input.size(2);
int32_t width = input.size(3);
auto max_val = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kFloat));
auto max_ind = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kLong));
auto input_temp = input.select(2, height - 1);
max_val.copy_(input_temp);
max_ind.fill_(height - 1);
auto output_temp = output.select(2, height - 1);
auto grad_output_temp = grad_output.select(2, height - 1);
output_temp.copy_(grad_output_temp);
auto un_max_ind = max_ind.unsqueeze(2);
auto gt_mask = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kBool));
auto max_temp = torch::zeros({batch, channel, width},
at::device(at::kCUDA).dtype(at::kFloat));
for (int32_t ind = 1; ind < height; ++ind) {
input_temp = input.select(2, height - ind - 1);
at::gt_out(gt_mask, input_temp, max_val);
at::masked_select_out(max_temp, input_temp, gt_mask);
max_val.masked_scatter_(gt_mask, max_temp);
max_ind.masked_fill_(gt_mask, height - ind - 1);
grad_output_temp = grad_output.select(2, height - ind - 1).unsqueeze(2);
output.scatter_add_(2, un_max_ind, grad_output_temp);
}
return output;
}
// Copyright (c) OpenMMLab. All rights reserved
#include <parrots/compute/aten.hpp>
#include <parrots/extension.hpp>
#include <parrots/foundation/ssattrs.hpp>
#include "corner_pool_pytorch.h"
using namespace parrots;
#ifdef MMCV_WITH_CUDA
void bottom_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = bottom_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void bottom_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = bottom_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
void left_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = left_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void left_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = left_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
void right_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = right_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void right_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = right_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
void top_pool_forward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = top_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void top_pool_backward_parrots(CudaContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = top_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
#endif
void bottom_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = bottom_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void bottom_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = bottom_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
void left_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = left_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void left_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = left_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
void right_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = right_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void right_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = right_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
void top_pool_forward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input;
input = buildATensor(ctx, ins[0]);
auto out = top_pool_forward(input);
updateDArray(ctx, out, outs[0]);
}
void top_pool_backward_parrots_cpu(HostContext& ctx, const SSElement& attr,
const OperatorBase::in_list_t& ins,
OperatorBase::out_list_t& outs) {
at::Tensor input, grad_output;
input = buildATensor(ctx, ins[0]);
grad_output = buildATensor(ctx, ins[1]);
auto out = top_pool_backward(input, grad_output);
updateDArray(ctx, out, outs[0]);
}
PARROTS_EXTENSION_REGISTER(bottom_pool_forward)
.input(1)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(bottom_pool_forward_parrots)
#endif
.apply(bottom_pool_forward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(bottom_pool_backward)
.input(2)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(bottom_pool_backward_parrots)
#endif
.apply(bottom_pool_backward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(top_pool_forward)
.input(1)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(top_pool_forward_parrots)
#endif
.apply(top_pool_forward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(top_pool_backward)
.input(2)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(top_pool_backward_parrots)
#endif
.apply(top_pool_backward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(left_pool_forward)
.input(1)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(left_pool_forward_parrots)
#endif
.apply(left_pool_forward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(left_pool_backward)
.input(2)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(left_pool_backward_parrots)
#endif
.apply(left_pool_backward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(right_pool_forward)
.input(1)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(right_pool_forward_parrots)
#endif
.apply(right_pool_forward_parrots_cpu)
.done();
PARROTS_EXTENSION_REGISTER(right_pool_backward)
.input(2)
.output(1)
#ifdef MMCV_WITH_CUDA
.apply(right_pool_backward_parrots)
#endif
.apply(right_pool_backward_parrots_cpu)
.done();
// Copyright (c) OpenMMLab. All rights reserved
#ifndef CORNER_POOL_PYTORCH_H
#define CORNER_POOL_PYTORCH_H
#include <torch/extension.h>
at::Tensor bottom_pool_forward(at::Tensor input);
at::Tensor bottom_pool_backward(at::Tensor input, at::Tensor grad_output);
at::Tensor left_pool_forward(at::Tensor input);
at::Tensor left_pool_backward(at::Tensor input, at::Tensor grad_output);
at::Tensor right_pool_forward(at::Tensor input);
at::Tensor right_pool_backward(at::Tensor input, at::Tensor grad_output);
at::Tensor top_pool_forward(at::Tensor input);
at::Tensor top_pool_backward(at::Tensor input, at::Tensor grad_output);
#endif // CORNER_POOL_PYTORCH_H
...@@ -2,37 +2,65 @@ ...@@ -2,37 +2,65 @@
#include <iostream> #include <iostream>
#include "pytorch_cpp_helper.hpp" #include "pytorch_cpp_helper.hpp"
#include "pytorch_device_registry.hpp"
void correlation_forward_impl(Tensor input1, Tensor input2, Tensor output, #ifdef MMCV_WITH_CUDA
void CorrelationForwardCUDAKernelLauncher(Tensor input1, Tensor input2,
Tensor output, int kH, int kW,
int patchH, int patchW, int padH,
int padW, int dilationH,
int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW);
void CorrelationBackwardCUDAKernelLauncher(Tensor grad_output, Tensor input1,
Tensor input2, Tensor grad_input1,
Tensor grad_input2, int kH, int kW,
int patchH, int patchW, int padH,
int padW, int dilationH,
int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW);
void correlation_cuda_forward(Tensor input1, Tensor input2, Tensor output,
int kH, int kW, int patchH, int patchW, int padH, int kH, int kW, int patchH, int patchW, int padH,
int padW, int dilationH, int dilationW, int padW, int dilationH, int dilationW,
int dilation_patchH, int dilation_patchW, int dH, int dilation_patchH, int dilation_patchW, int dH,
int dW) { int dW) {
DISPATCH_DEVICE_IMPL(correlation_forward_impl, input1, input2, output, kH, kW, CorrelationForwardCUDAKernelLauncher(
patchH, patchW, padH, padW, dilationH, dilationW, input1, input2, output, kH, kW, patchH, patchW, padH, padW, dilationH,
dilation_patchH, dilation_patchW, dH, dW); dilationW, dilation_patchH, dilation_patchW, dH, dW);
} }
void correlation_backward_impl(Tensor grad_output, Tensor input1, Tensor input2, void correlation_cuda_backward(Tensor grad_output, Tensor input1, Tensor input2,
Tensor grad_input1, Tensor grad_input2, int kH, Tensor grad_input1, Tensor grad_input2, int kH,
int kW, int patchH, int patchW, int padH, int kW, int patchH, int patchW, int padH,
int padW, int dilationH, int dilationW, int padW, int dilationH, int dilationW,
int dilation_patchH, int dilation_patchW, int dH, int dilation_patchH, int dilation_patchW, int dH,
int dW) { int dW) {
DISPATCH_DEVICE_IMPL(correlation_backward_impl, grad_output, input1, input2, CorrelationBackwardCUDAKernelLauncher(
grad_input1, grad_input2, kH, kW, patchH, patchW, padH, grad_output, input1, input2, grad_input1, grad_input2, kH, kW, patchH,
padW, dilationH, dilationW, dilation_patchH, patchW, padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW); dilation_patchW, dH, dW);
} }
#endif
void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH, void correlation_forward(Tensor input1, Tensor input2, Tensor output, int kH,
int kW, int patchH, int patchW, int padH, int padW, int kW, int patchH, int patchW, int padH, int padW,
int dilationH, int dilationW, int dilation_patchH, int dilationH, int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW) { int dilation_patchW, int dH, int dW) {
correlation_forward_impl(input1, input2, output, kH, kW, patchH, patchW, padH, if (input1.device().is_cuda() && input2.device().is_cuda()) {
padW, dilationH, dilationW, dilation_patchH, #ifdef MMCV_WITH_CUDA
dilation_patchW, dH, dW); CHECK_CUDA_INPUT(input1);
CHECK_CUDA_INPUT(input2);
correlation_cuda_forward(input1, input2, output, kH, kW, patchH, patchW,
padH, padW, dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
#else
AT_ERROR("Correlation is not compiled with GPU support");
#endif
} else {
AT_ERROR("Correlation is not implemented on CPU");
}
} }
void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2, void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
...@@ -40,8 +68,20 @@ void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2, ...@@ -40,8 +68,20 @@ void correlation_backward(Tensor grad_output, Tensor input1, Tensor input2,
int kW, int patchH, int patchW, int padH, int padW, int kW, int patchH, int patchW, int padH, int padW,
int dilationH, int dilationW, int dilation_patchH, int dilationH, int dilationW, int dilation_patchH,
int dilation_patchW, int dH, int dW) { int dilation_patchW, int dH, int dW) {
correlation_backward_impl(grad_output, input1, input2, grad_input1, if (input1.device().is_cuda() && input2.device().is_cuda()) {
grad_input2, kH, kW, patchH, patchW, padH, padW, #ifdef MMCV_WITH_CUDA
dilationH, dilationW, dilation_patchH, CHECK_CUDA_INPUT(grad_output);
dilation_patchW, dH, dW); CHECK_CUDA_INPUT(input1);
CHECK_CUDA_INPUT(input2);
correlation_cuda_backward(grad_output, input1, input2, grad_input1,
grad_input2, kH, kW, patchH, patchW, padH, padW,
dilationH, dilationW, dilation_patchH,
dilation_patchW, dH, dW);
#else
AT_ERROR("Correlation is not compiled with GPU support");
#endif
} else {
AT_ERROR("Correlation is not implemented on CPU");
}
} }
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment