Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
14cb538f
Commit
14cb538f
authored
Jan 16, 2019
by
yhcao6
Browse files
clean unnecessary comment
parent
9acb38be
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
87 additions
and
114 deletions
+87
-114
mmdet/ops/dcn/src/deform_conv_cuda.cpp
mmdet/ops/dcn/src/deform_conv_cuda.cpp
+5
-30
mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
+2
-0
mmdet/ops/dcn/src/deform_psroi_pooling_cuda.cu
mmdet/ops/dcn/src/deform_psroi_pooling_cuda.cu
+2
-0
mmdet/ops/dcn/src/modulated_dcn_cuda.cpp
mmdet/ops/dcn/src/modulated_dcn_cuda.cpp
+39
-47
mmdet/ops/dcn/src/modulated_deform_im2col_cuda.cu
mmdet/ops/dcn/src/modulated_deform_im2col_cuda.cu
+39
-37
No files found.
mmdet/ops/dcn/src/deform_conv_cuda.cpp
View file @
14cb538f
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda.c
#include <torch/torch.h>
#include <torch/torch.h>
#include <cmath>
#include <cmath>
...
@@ -37,10 +39,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
...
@@ -37,10 +39,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
int
dilationW
,
int
deformable_group
)
int
dilationW
,
int
deformable_group
)
{
{
// AT_CHECK(weight->nDimension == 4, 5,
// "4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
// "but got: %s",
// weight->nDimension);
AT_CHECK
(
weight
.
ndimension
()
==
4
,
AT_CHECK
(
weight
.
ndimension
()
==
4
,
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"4D weight tensor (nOutputPlane,nInputPlane,kH,kW) expected, "
"but got: %s"
,
"but got: %s"
,
...
@@ -53,10 +51,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
...
@@ -53,10 +51,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
"kernel size should be greater than zero, but got kH: %d kW: %d"
,
"kernel size should be greater than zero, but got kH: %d kW: %d"
,
kH
,
kW
);
kH
,
kW
);
// AT_CHECK((weight->size[2] == kH && weight->size[3] == kW), 9,
// "kernel size should be consistent with weight, ",
// "but got kH: %d kW: %d weight.size(2): %d, weight.size(3): %d", kH,
// kW, weight->size[2], weight->size[3]);
AT_CHECK
((
weight
.
size
(
2
)
==
kH
&&
AT_CHECK
((
weight
.
size
(
2
)
==
kH
&&
weight
.
size
(
3
)
==
kW
),
weight
.
size
(
3
)
==
kW
),
"kernel size should be consistent with weight, "
,
"kernel size should be consistent with weight, "
,
...
@@ -70,7 +64,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
...
@@ -70,7 +64,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d"
,
"dilation should be greater than 0, but got dilationH: %d dilationW: %d"
,
dilationH
,
dilationW
);
dilationH
,
dilationW
);
// int ndim = input->nDimension;
int
ndim
=
input
.
ndimension
();
int
ndim
=
input
.
ndimension
();
int
dimf
=
0
;
int
dimf
=
0
;
int
dimh
=
1
;
int
dimh
=
1
;
...
@@ -86,10 +79,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
...
@@ -86,10 +79,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
AT_CHECK
(
ndim
==
3
||
ndim
==
4
,
AT_CHECK
(
ndim
==
3
||
ndim
==
4
,
"3D or 4D input tensor expected but got: %s"
,
ndim
);
"3D or 4D input tensor expected but got: %s"
,
ndim
);
// long nInputPlane = weight->size[1];
// long inputHeight = input->size[dimh];
// long inputWidth = input->size[dimw];
// long nOutputPlane = weight->size[0];
long
nInputPlane
=
weight
.
size
(
1
);
long
nInputPlane
=
weight
.
size
(
1
);
long
inputHeight
=
input
.
size
(
dimh
);
long
inputHeight
=
input
.
size
(
dimh
);
long
inputWidth
=
input
.
size
(
dimw
);
long
inputWidth
=
input
.
size
(
dimw
);
...
@@ -114,10 +103,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
...
@@ -114,10 +103,6 @@ void shape_check(at::Tensor input, at::Tensor offset,
AT_CHECK
((
inputHeight
>=
kH
&&
inputWidth
>=
kW
),
AT_CHECK
((
inputHeight
>=
kH
&&
inputWidth
>=
kW
),
"input image is smaller than kernel"
);
"input image is smaller than kernel"
);
// AT_CHECK(
// (offset->size[2] == outputHeight && offset->size[3] == outputWidth), 3,
// "invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d", outputHeight, outputWidth,
// offset->size[2], offset->size[3]);
AT_CHECK
(
AT_CHECK
(
(
offset
.
size
(
2
)
==
outputHeight
&&
offset
.
size
(
3
)
==
outputWidth
),
(
offset
.
size
(
2
)
==
outputHeight
&&
offset
.
size
(
3
)
==
outputWidth
),
"invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d"
,
"invalid spatial size of offset, expected height: %d width: %d, but got height: %d width: %d"
,
...
@@ -152,9 +137,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
...
@@ -152,9 +137,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
// todo: add new output buffer and transpose it to output (or directly transpose output)
// todo: add new output buffer and transpose it to output (or directly transpose output)
// todo: possibly change data indexing because of parallel_imgs
// todo: possibly change data indexing because of parallel_imgs
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, weight, offset,
// output, columns, ones));
shape_check
(
input
,
offset
,
NULL
,
weight
,
kH
,
kW
,
dH
,
dW
,
padH
,
padW
,
shape_check
(
input
,
offset
,
NULL
,
weight
,
kH
,
kW
,
dH
,
dW
,
padH
,
padW
,
dilationH
,
dilationW
,
deformable_group
);
dilationH
,
dilationW
,
deformable_group
);
...
@@ -185,8 +167,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
...
@@ -185,8 +167,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
AT_CHECK
((
offset
.
size
(
0
)
==
batchSize
),
"invalid batch size of offset"
);
AT_CHECK
((
offset
.
size
(
0
)
==
batchSize
),
"invalid batch size of offset"
);
// bias = bias ? THCudaTensor_newContiguous(state, bias) : bias;
output
=
output
.
view
({
batchSize
/
im2col_step
,
im2col_step
,
nOutputPlane
,
outputHeight
,
outputWidth
});
output
=
output
.
view
({
batchSize
/
im2col_step
,
im2col_step
,
nOutputPlane
,
outputHeight
,
outputWidth
});
columns
=
at
::
zeros
({
nInputPlane
*
kW
*
kH
,
im2col_step
*
outputHeight
*
outputWidth
},
input
.
type
());
columns
=
at
::
zeros
({
nInputPlane
*
kW
*
kH
,
im2col_step
*
outputHeight
*
outputWidth
},
input
.
type
());
...
@@ -212,7 +192,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
...
@@ -212,7 +192,6 @@ int deform_conv_forward_cuda(at::Tensor input, at::Tensor weight,
output_buffer
[
elt
].
flatten
(
1
).
addmm_
(
weight
.
flatten
(
1
),
columns
).
view_as
(
output_buffer
[
elt
]);
output_buffer
[
elt
].
flatten
(
1
).
addmm_
(
weight
.
flatten
(
1
),
columns
).
view_as
(
output_buffer
[
elt
]);
}
}
// the reason I use seemingly redundant output_buffer is that THCudaTensor API handles successive transpose and resize poorly
output_buffer
=
output_buffer
.
view
(
output_buffer
=
output_buffer
.
view
(
{
batchSize
/
im2col_step
,
nOutputPlane
,
im2col_step
,
outputHeight
,
outputWidth
});
{
batchSize
/
im2col_step
,
nOutputPlane
,
im2col_step
,
outputHeight
,
outputWidth
});
output_buffer
.
transpose_
(
1
,
2
);
output_buffer
.
transpose_
(
1
,
2
);
...
@@ -239,9 +218,6 @@ int deform_conv_backward_input_cuda(
...
@@ -239,9 +218,6 @@ int deform_conv_backward_input_cuda(
int
dilationW
,
int
dilationH
,
int
deformable_group
,
int
im2col_step
)
int
dilationW
,
int
dilationH
,
int
deformable_group
,
int
im2col_step
)
{
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 6, input, gradOutput, weight,
// offset, columns, gradInput));
shape_check
(
input
,
offset
,
&
gradOutput
,
weight
,
kH
,
kW
,
dH
,
dW
,
padH
,
shape_check
(
input
,
offset
,
&
gradOutput
,
weight
,
kH
,
kW
,
dH
,
dW
,
padH
,
padW
,
dilationH
,
dilationW
,
deformable_group
);
padW
,
dilationH
,
dilationW
,
deformable_group
);
...
@@ -341,8 +317,6 @@ int deform_conv_backward_parameters_cuda(
...
@@ -341,8 +317,6 @@ int deform_conv_backward_parameters_cuda(
// todo: transpose and reshape outGrad
// todo: transpose and reshape outGrad
// todo: reshape columns
// todo: reshape columns
// todo: add im2col_step as input
// todo: add im2col_step as input
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, offset, gradOutput,
// gradWeight, columns));
shape_check
(
input
,
offset
,
&
gradOutput
,
gradWeight
,
kH
,
kW
,
dH
,
dW
,
shape_check
(
input
,
offset
,
&
gradOutput
,
gradWeight
,
kH
,
kW
,
dH
,
dW
,
padH
,
padW
,
dilationH
,
dilationW
,
deformable_group
);
padH
,
padW
,
dilationH
,
dilationW
,
deformable_group
);
...
@@ -402,8 +376,9 @@ int deform_conv_backward_parameters_cuda(
...
@@ -402,8 +376,9 @@ int deform_conv_backward_parameters_cuda(
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
inputWidth
,
kH
,
kW
,
padH
,
padW
,
dH
,
dW
,
dilationH
,
dilationW
,
im2col_step
,
deformable_group
,
columns
);
im2col_step
,
deformable_group
,
columns
);
gradWeight
.
copy_
(
gradWeight
.
flatten
(
1
).
addmm_
(
gradWeight
=
gradWeight
.
flatten
(
1
).
addmm_
(
gradOutputBuffer
[
elt
].
flatten
(
1
),
columns
.
transpose
(
1
,
0
),
1.0
,
scale
).
view_as
(
gradWeight
));
gradOutputBuffer
[
elt
].
flatten
(
1
),
columns
.
transpose
(
1
,
0
),
1.0
,
scale
)
.
view_as
(
gradWeight
);
}
}
input
=
input
.
view
({
batchSize
,
nInputPlane
,
inputHeight
,
inputWidth
});
input
=
input
.
view
({
batchSize
,
nInputPlane
,
inputHeight
,
inputWidth
});
...
...
mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
View file @
14cb538f
...
@@ -58,6 +58,8 @@
...
@@ -58,6 +58,8 @@
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
* \author Yuwen Xiong, Haozhi Qi, Jifeng Dai, Xizhou Zhu, Han Hu, Dazhi Cheng
*/
*/
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/deform_conv_cuda_kernel.cu
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
#include <stdio.h>
...
...
mmdet/ops/dcn/src/deform_psroi_pooling_cuda.cu
View file @
14cb538f
...
@@ -6,6 +6,8 @@
...
@@ -6,6 +6,8 @@
* \author Yi Li, Guodong Zhang, Jifeng Dai
* \author Yi Li, Guodong Zhang, Jifeng Dai
*/
*/
/***************** Adapted by Charles Shang *********************/
/***************** Adapted by Charles Shang *********************/
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/deform_psroi_pooling_cuda.cu
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
#include <stdio.h>
...
...
mmdet/ops/dcn/src/modulated_dcn_cuda.cpp
View file @
14cb538f
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob /mmdetection/mmdet/ops/dcn/src/modulated_dcn_cuda.c
#include <torch/torch.h>
#include <torch/torch.h>
#include <cmath>
#include <cmath>
#include <vector>
#include <vector>
// author: Charles Shang
// https://github.com/torch/cunn/blob/master/lib/THCUNN/generic/SpatialConvolutionMM.cu
void
modulated_deformable_im2col_cuda
(
const
at
::
Tensor
data_im
,
const
at
::
Tensor
data_offset
,
void
modulated_deformable_im2col_cuda
(
const
at
::
Tensor
data_im
,
const
at
::
Tensor
data_offset
,
const
at
::
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
at
::
Tensor
data_mask
,
const
int
batch_size
,
const
int
channels
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
const
int
height_im
,
const
int
width_im
,
const
int
height_col
,
...
@@ -33,7 +34,6 @@ void modulated_deformable_col2im_coord_cuda(const at::Tensor data_col, const at:
...
@@ -33,7 +34,6 @@ void modulated_deformable_col2im_coord_cuda(const at::Tensor data_col, const at:
const
int
deformable_group
,
at
::
Tensor
grad_offset
,
const
int
deformable_group
,
at
::
Tensor
grad_offset
,
at
::
Tensor
grad_mask
);
at
::
Tensor
grad_mask
);
void
DeformablePSROIPoolForward
(
const
at
::
Tensor
data
,
void
DeformablePSROIPoolForward
(
const
at
::
Tensor
data
,
const
at
::
Tensor
bbox
,
const
at
::
Tensor
bbox
,
const
at
::
Tensor
trans
,
const
at
::
Tensor
trans
,
...
@@ -76,7 +76,6 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
...
@@ -76,7 +76,6 @@ void DeformablePSROIPoolBackwardAcc(const at::Tensor out_grad,
const
int
sample_per_part
,
const
int
sample_per_part
,
const
float
trans_std
);
const
float
trans_std
);
void
modulated_deform_conv_cuda_forward
(
at
::
Tensor
input
,
at
::
Tensor
weight
,
void
modulated_deform_conv_cuda_forward
(
at
::
Tensor
input
,
at
::
Tensor
weight
,
at
::
Tensor
bias
,
at
::
Tensor
ones
,
at
::
Tensor
bias
,
at
::
Tensor
ones
,
at
::
Tensor
offset
,
at
::
Tensor
mask
,
at
::
Tensor
offset
,
at
::
Tensor
mask
,
...
@@ -87,7 +86,6 @@ void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
...
@@ -87,7 +86,6 @@ void modulated_deform_conv_cuda_forward(at::Tensor input, at::Tensor weight,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
)
const
int
deformable_group
)
{
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 8, input, weight, bias, ones, offset, mask, output, columns));
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
AT_CHECK
(
weight
.
is_contiguous
(),
"weight tensor has to be contiguous"
);
AT_CHECK
(
weight
.
is_contiguous
(),
"weight tensor has to be contiguous"
);
...
@@ -156,8 +154,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
...
@@ -156,8 +154,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
int
dilation_h
,
int
dilation_w
,
int
dilation_h
,
int
dilation_w
,
int
deformable_group
)
int
deformable_group
)
{
{
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 13, input, weight, bias, ones, offset, mask, columns,
// grad_input, grad_weight, grad_bias, grad_offset, grad_mask, grad_output));
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
AT_CHECK
(
weight
.
is_contiguous
(),
"weight tensor has to be contiguous"
);
AT_CHECK
(
weight
.
is_contiguous
(),
"weight tensor has to be contiguous"
);
...
@@ -220,7 +216,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
...
@@ -220,7 +216,6 @@ void modulated_deform_conv_cuda_backward(at::Tensor input, at::Tensor weight,
grad_bias
=
grad_bias
.
view
({
-
1
,
1
}).
addmm_
(
grad_output
[
b
].
flatten
(
1
),
ones
.
view
({
-
1
,
1
})).
view
(
-
1
);
grad_bias
=
grad_bias
.
view
({
-
1
,
1
}).
addmm_
(
grad_output
[
b
].
flatten
(
1
),
ones
.
view
({
-
1
,
1
})).
view
(
-
1
);
}
}
}
}
void
deform_psroi_pooling_cuda_forward
(
at
::
Tensor
input
,
at
::
Tensor
bbox
,
void
deform_psroi_pooling_cuda_forward
(
at
::
Tensor
input
,
at
::
Tensor
bbox
,
...
@@ -236,13 +231,12 @@ void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
...
@@ -236,13 +231,12 @@ void deform_psroi_pooling_cuda_forward(at::Tensor input, at::Tensor bbox,
const
float
trans_std
)
const
float
trans_std
)
{
{
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 5, input, bbox, trans, out, top_count));
const
int
batch
=
input
.
size
(
0
);
const
int
batch
=
input
.
size
(
0
);
const
int
channels
=
input
.
size
(
1
);
const
int
channels
=
input
.
size
(
1
);
const
int
height
=
input
.
size
(
2
);
const
int
height
=
input
.
size
(
2
);
const
int
width
=
input
.
size
(
3
);
const
int
width
=
input
.
size
(
3
);
const
int
channels_trans
=
no_trans
?
2
:
trans
.
size
(
1
);
const
int
channels_trans
=
no_trans
?
2
:
trans
.
size
(
1
);
const
int
num_bbox
=
bbox
.
size
(
0
);
const
int
num_bbox
=
bbox
.
size
(
0
);
if
(
num_bbox
!=
out
.
size
(
0
))
if
(
num_bbox
!=
out
.
size
(
0
))
...
@@ -278,14 +272,12 @@ void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
...
@@ -278,14 +272,12 @@ void deform_psroi_pooling_cuda_backward(at::Tensor out_grad,
{
{
AT_CHECK
(
out_grad
.
is_contiguous
(),
"out_grad tensor has to be contiguous"
);
AT_CHECK
(
out_grad
.
is_contiguous
(),
"out_grad tensor has to be contiguous"
);
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
AT_CHECK
(
input
.
is_contiguous
(),
"input tensor has to be contiguous"
);
// THCAssertSameGPU(THCudaTensor_checkGPU(state, 7, input, bbox, trans, out_grad, top_count,
// input_grad, trans_grad));
const
int
batch
=
input
.
size
(
0
);
const
int
batch
=
input
.
size
(
0
);
const
int
channels
=
input
.
size
(
1
);
const
int
channels
=
input
.
size
(
1
);
const
int
height
=
input
.
size
(
2
);
const
int
height
=
input
.
size
(
2
);
const
int
width
=
input
.
size
(
3
);
const
int
width
=
input
.
size
(
3
);
const
int
channels_trans
=
no_trans
?
2
:
trans
.
size
(
1
);
const
int
channels_trans
=
no_trans
?
2
:
trans
.
size
(
1
);
const
int
num_bbox
=
bbox
.
size
(
0
);
const
int
num_bbox
=
bbox
.
size
(
0
);
if
(
num_bbox
!=
out_grad
.
size
(
0
))
if
(
num_bbox
!=
out_grad
.
size
(
0
))
...
...
mmdet/ops/dcn/src/modulated_deform_im2col_cuda.cu
View file @
14cb538f
// modify from https://github.com/chengdazhi/Deformable-Convolution-V2-PyTorch/blob/mmdetection/mmdet/ops/dcn/src/cuda/modulated_deform_im2col_cuda.cu
#include <ATen/ATen.h>
#include <ATen/ATen.h>
#include <THC/THCAtomics.cuh>
#include <THC/THCAtomics.cuh>
#include <stdio.h>
#include <stdio.h>
...
@@ -17,7 +19,6 @@ inline int GET_BLOCKS(const int N)
...
@@ -17,7 +19,6 @@ inline int GET_BLOCKS(const int N)
return
(
N
+
CUDA_NUM_THREADS
-
1
)
/
CUDA_NUM_THREADS
;
return
(
N
+
CUDA_NUM_THREADS
-
1
)
/
CUDA_NUM_THREADS
;
}
}
template
<
typename
scalar_t
>
template
<
typename
scalar_t
>
__device__
scalar_t
dmcn_im2col_bilinear
(
const
scalar_t
*
bottom_data
,
const
int
data_width
,
__device__
scalar_t
dmcn_im2col_bilinear
(
const
scalar_t
*
bottom_data
,
const
int
data_width
,
const
int
height
,
const
int
width
,
scalar_t
h
,
scalar_t
w
)
const
int
height
,
const
int
width
,
scalar_t
h
,
scalar_t
w
)
...
@@ -326,7 +327,8 @@ void modulated_deformable_im2col_cuda(
...
@@ -326,7 +327,8 @@ void modulated_deformable_im2col_cuda(
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kenerl_w
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kenerl_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
at
::
Tensor
data_col
)
{
const
int
deformable_group
,
at
::
Tensor
data_col
)
{
// num_axes should be smaller than block size
// num_axes should be smaller than block size
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
num_kernels
=
channels
*
batch_size
*
height_col
*
width_col
;
const
int
num_kernels
=
channels
*
batch_size
*
height_col
*
width_col
;
...
@@ -338,7 +340,7 @@ void modulated_deformable_im2col_cuda(
...
@@ -338,7 +340,7 @@ void modulated_deformable_im2col_cuda(
const
scalar_t
*
data_mask_
=
data_mask
.
data
<
scalar_t
>
();
const
scalar_t
*
data_mask_
=
data_mask
.
data
<
scalar_t
>
();
scalar_t
*
data_col_
=
data_col
.
data
<
scalar_t
>
();
scalar_t
*
data_col_
=
data_col
.
data
<
scalar_t
>
();
modulated_deformable_im2col_gpu_kernel
<<<
GET_BLOCKS
(
num_kernels
),
CUDA_NUM_THREADS
>>>
(
modulated_deformable_im2col_gpu_kernel
<<<
GET_BLOCKS
(
num_kernels
),
CUDA_NUM_THREADS
>>>
(
num_kernels
,
data_im_
,
data_offset_
,
data_mask_
,
height_im
,
width_im
,
kernel_h
,
kenerl_w
,
num_kernels
,
data_im_
,
data_offset_
,
data_mask_
,
height_im
,
width_im
,
kernel_h
,
kenerl_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
batch_size
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col_
);
batch_size
,
channels
,
deformable_group
,
height_col
,
width_col
,
data_col_
);
...
@@ -349,7 +351,6 @@ void modulated_deformable_im2col_cuda(
...
@@ -349,7 +351,6 @@ void modulated_deformable_im2col_cuda(
{
{
printf
(
"error in modulated_deformable_im2col_cuda: %s
\n
"
,
cudaGetErrorString
(
err
));
printf
(
"error in modulated_deformable_im2col_cuda: %s
\n
"
,
cudaGetErrorString
(
err
));
}
}
}
}
void
modulated_deformable_col2im_cuda
(
void
modulated_deformable_col2im_cuda
(
...
@@ -358,7 +359,8 @@ void modulated_deformable_col2im_cuda(
...
@@ -358,7 +359,8 @@ void modulated_deformable_col2im_cuda(
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
height_col
,
const
int
width_col
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
at
::
Tensor
grad_im
){
const
int
deformable_group
,
at
::
Tensor
grad_im
)
{
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
channel_per_deformable_group
=
channels
/
deformable_group
;
const
int
num_kernels
=
channels
*
kernel_h
*
kernel_w
*
batch_size
*
height_col
*
width_col
;
const
int
num_kernels
=
channels
*
kernel_h
*
kernel_w
*
batch_size
*
height_col
*
width_col
;
...
@@ -370,7 +372,7 @@ void modulated_deformable_col2im_cuda(
...
@@ -370,7 +372,7 @@ void modulated_deformable_col2im_cuda(
const
scalar_t
*
data_mask_
=
data_mask
.
data
<
scalar_t
>
();
const
scalar_t
*
data_mask_
=
data_mask
.
data
<
scalar_t
>
();
scalar_t
*
grad_im_
=
grad_im
.
data
<
scalar_t
>
();
scalar_t
*
grad_im_
=
grad_im
.
data
<
scalar_t
>
();
modulated_deformable_col2im_gpu_kernel
<<<
GET_BLOCKS
(
num_kernels
),
CUDA_NUM_THREADS
>>>
(
modulated_deformable_col2im_gpu_kernel
<<<
GET_BLOCKS
(
num_kernels
),
CUDA_NUM_THREADS
>>>
(
num_kernels
,
data_col_
,
data_offset_
,
data_mask_
,
channels
,
height_im
,
width_im
,
num_kernels
,
data_col_
,
data_offset_
,
data_mask_
,
channels
,
height_im
,
width_im
,
kernel_h
,
kernel_w
,
pad_h
,
pad_h
,
stride_h
,
stride_w
,
kernel_h
,
kernel_w
,
pad_h
,
pad_h
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
...
@@ -382,7 +384,6 @@ void modulated_deformable_col2im_cuda(
...
@@ -382,7 +384,6 @@ void modulated_deformable_col2im_cuda(
{
{
printf
(
"error in modulated_deformable_col2im_cuda: %s
\n
"
,
cudaGetErrorString
(
err
));
printf
(
"error in modulated_deformable_col2im_cuda: %s
\n
"
,
cudaGetErrorString
(
err
));
}
}
}
}
void
modulated_deformable_col2im_coord_cuda
(
void
modulated_deformable_col2im_coord_cuda
(
...
@@ -392,7 +393,8 @@ void modulated_deformable_col2im_coord_cuda(
...
@@ -392,7 +393,8 @@ void modulated_deformable_col2im_coord_cuda(
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
pad_h
,
const
int
pad_w
,
const
int
stride_h
,
const
int
stride_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
dilation_h
,
const
int
dilation_w
,
const
int
deformable_group
,
const
int
deformable_group
,
at
::
Tensor
grad_offset
,
at
::
Tensor
grad_mask
)
{
at
::
Tensor
grad_offset
,
at
::
Tensor
grad_mask
)
{
const
int
num_kernels
=
batch_size
*
height_col
*
width_col
*
2
*
kernel_h
*
kernel_w
*
deformable_group
;
const
int
num_kernels
=
batch_size
*
height_col
*
width_col
*
2
*
kernel_h
*
kernel_w
*
deformable_group
;
const
int
channel_per_deformable_group
=
channels
*
kernel_h
*
kernel_w
/
deformable_group
;
const
int
channel_per_deformable_group
=
channels
*
kernel_h
*
kernel_w
/
deformable_group
;
...
@@ -405,7 +407,7 @@ void modulated_deformable_col2im_coord_cuda(
...
@@ -405,7 +407,7 @@ void modulated_deformable_col2im_coord_cuda(
scalar_t
*
grad_offset_
=
grad_offset
.
data
<
scalar_t
>
();
scalar_t
*
grad_offset_
=
grad_offset
.
data
<
scalar_t
>
();
scalar_t
*
grad_mask_
=
grad_mask
.
data
<
scalar_t
>
();
scalar_t
*
grad_mask_
=
grad_mask
.
data
<
scalar_t
>
();
modulated_deformable_col2im_coord_gpu_kernel
<<<
GET_BLOCKS
(
num_kernels
),
CUDA_NUM_THREADS
>>>
(
modulated_deformable_col2im_coord_gpu_kernel
<<<
GET_BLOCKS
(
num_kernels
),
CUDA_NUM_THREADS
>>>
(
num_kernels
,
data_col_
,
data_im_
,
data_offset_
,
data_mask_
,
channels
,
height_im
,
width_im
,
num_kernels
,
data_col_
,
data_im_
,
data_offset_
,
data_mask_
,
channels
,
height_im
,
width_im
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
kernel_h
,
kernel_w
,
pad_h
,
pad_w
,
stride_h
,
stride_w
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
dilation_h
,
dilation_w
,
channel_per_deformable_group
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment