Unverified Commit 847ac3de authored by Vasilis Vryniotis's avatar Vasilis Vryniotis Committed by GitHub
Browse files

[FBcode->GH] Fix missing kernel guards (#4620) (#4743)

Summary:
Pull Request resolved: https://github.com/pytorch/vision/pull/4620

Pull Request resolved: https://github.com/pytorch/nestedtensor/pull/455



Fixes missing kernel guards as identified by D30072495

Reviewed By: jingsh, xush6528

Differential Revision: D31553158

fbshipit-source-id: 80de017ba2ddc52e2a684df9b3eae5de84ed49f4
Co-authored-by: default avatarRichard Barnes <rbarnes@fb.com>
parent d6102173
...@@ -228,7 +228,9 @@ void deformable_im2col( ...@@ -228,7 +228,9 @@ void deformable_im2col(
int deformable_group, int deformable_group,
bool use_mask, bool use_mask,
at::Tensor data_col) { at::Tensor data_col) {
int64_t num_kernels = (int64_t)n_in_channels * out_h * out_w * parallel_imgs; at::cuda::CUDAGuard device_guard(input.get_device());
const int64_t num_kernels = (int64_t)n_in_channels * out_h * out_w * parallel_imgs;
const unsigned int threads = GET_THREADS(); const unsigned int threads = GET_THREADS();
const unsigned int blocks = GET_BLOCKS(threads, num_kernels); const unsigned int blocks = GET_BLOCKS(threads, num_kernels);
...@@ -408,12 +410,14 @@ void compute_grad_input( ...@@ -408,12 +410,14 @@ void compute_grad_input(
int n_offset_grps, int n_offset_grps,
bool use_mask, bool use_mask,
at::Tensor grad_im) { at::Tensor grad_im) {
int out_h = at::cuda::CUDAGuard device_guard(columns.get_device());
const int out_h =
(height + 2 * pad_h - (dilation_h * (weight_h - 1) + 1)) / stride_h + 1; (height + 2 * pad_h - (dilation_h * (weight_h - 1) + 1)) / stride_h + 1;
int out_w = const int out_w =
(width + 2 * pad_w - (dilation_w * (weight_w - 1) + 1)) / stride_w + 1; (width + 2 * pad_w - (dilation_w * (weight_w - 1) + 1)) / stride_w + 1;
int64_t num_kernels = const int64_t num_kernels =
(int64_t)channels * weight_h * weight_w * out_h * out_w * parallel_imgs; (int64_t)channels * weight_h * weight_w * out_h * out_w * parallel_imgs;
const unsigned int threads = GET_THREADS(); const unsigned int threads = GET_THREADS();
...@@ -650,11 +654,13 @@ void compute_grad_offset_and_mask( ...@@ -650,11 +654,13 @@ void compute_grad_offset_and_mask(
bool use_mask, bool use_mask,
at::Tensor grad_offset, at::Tensor grad_offset,
at::Tensor grad_mask) { at::Tensor grad_mask) {
int out_h = at::cuda::CUDAGuard device_guard(columns.get_device());
const int out_h =
(height + 2 * pad_h - (dilation_h * (weight_h - 1) + 1)) / stride_h + 1; (height + 2 * pad_h - (dilation_h * (weight_h - 1) + 1)) / stride_h + 1;
int out_w = const int out_w =
(width + 2 * pad_w - (dilation_w * (weight_w - 1) + 1)) / stride_w + 1; (width + 2 * pad_w - (dilation_w * (weight_w - 1) + 1)) / stride_w + 1;
int64_t num_kernels = (int64_t)out_h * out_w * 2 * weight_h * weight_w * const int64_t num_kernels = (int64_t)out_h * out_w * 2 * weight_h * weight_w *
n_offset_grps * parallel_imgs; n_offset_grps * parallel_imgs;
const unsigned int threads = GET_THREADS(); const unsigned int threads = GET_THREADS();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment