Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
f92f64c2
Unverified
Commit
f92f64c2
authored
Aug 07, 2019
by
Kai Chen
Committed by
GitHub
Aug 07, 2019
Browse files
use int64_t instead of long in cuda kernels (#1131)
parent
bc94b212
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
19 additions
and
18 deletions
+19
-18
mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
+11
-12
mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
+8
-6
No files found.
mmdet/ops/masked_conv/src/masked_conv2d_kernel.cu
View file @
f92f64c2
...
...
@@ -18,9 +18,9 @@ __global__ void MaskedIm2colForward(const int n, const scalar_t *data_im,
const
int
height
,
const
int
width
,
const
int
kernel_h
,
const
int
kernel_w
,
const
int
pad_h
,
const
int
pad_w
,
const
long
*
mask_h_idx
,
const
long
*
mask_w_idx
,
const
int
mask_cnt
,
scalar_t
*
data_col
)
{
const
int64_t
*
mask_h_idx
,
const
int64_t
*
mask_w_idx
,
const
int
mask_cnt
,
scalar_t
*
data_col
)
{
// mask_cnt * channels
CUDA_1D_KERNEL_LOOP
(
index
,
n
)
{
const
int
m_index
=
index
%
mask_cnt
;
...
...
@@ -59,8 +59,8 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height,
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
bottom_data
.
scalar_type
(),
"MaskedIm2colLaucherForward"
,
([
&
]
{
const
scalar_t
*
bottom_data_
=
bottom_data
.
data
<
scalar_t
>
();
const
long
*
mask_h_idx_
=
mask_h_idx
.
data
<
long
>
();
const
long
*
mask_w_idx_
=
mask_w_idx
.
data
<
long
>
();
const
int64_t
*
mask_h_idx_
=
mask_h_idx
.
data
<
int64_t
>
();
const
int64_t
*
mask_w_idx_
=
mask_w_idx
.
data
<
int64_t
>
();
scalar_t
*
top_data_
=
top_data
.
data
<
scalar_t
>
();
MaskedIm2colForward
<
scalar_t
>
<<<
GET_BLOCKS
(
output_size
),
THREADS_PER_BLOCK
>>>
(
...
...
@@ -74,16 +74,15 @@ int MaskedIm2colForwardLaucher(const at::Tensor bottom_data, const int height,
template
<
typename
scalar_t
>
__global__
void
MaskedCol2imForward
(
const
int
n
,
const
scalar_t
*
data_col
,
const
int
height
,
const
int
width
,
const
int
channels
,
const
long
*
mask_h_idx
,
const
long
*
mask_w_idx
,
const
int
mask_cnt
,
scalar_t
*
data_im
)
{
const
int
channels
,
const
int64_t
*
mask_h_idx
,
const
int64_t
*
mask_w_idx
,
const
int
mask_cnt
,
scalar_t
*
data_im
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
n
)
{
const
int
m_index
=
index
%
mask_cnt
;
const
int
h_im
=
mask_h_idx
[
m_index
];
const
int
w_im
=
mask_w_idx
[
m_index
];
const
int
c_im
=
index
/
mask_cnt
;
// int kernel_extent_w = (kernel_w - 1) + 1;
// int kernel_extent_h = (kernel_h - 1) + 1;
// compute the start and end of the output
data_im
[(
c_im
*
height
+
h_im
)
*
width
+
w_im
]
=
data_col
[
index
];
}
...
...
@@ -99,8 +98,8 @@ int MaskedCol2imForwardLaucher(const at::Tensor bottom_data, const int height,
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
bottom_data
.
scalar_type
(),
"MaskedCol2imLaucherForward"
,
([
&
]
{
const
scalar_t
*
bottom_data_
=
bottom_data
.
data
<
scalar_t
>
();
const
long
*
mask_h_idx_
=
mask_h_idx
.
data
<
long
>
();
const
long
*
mask_w_idx_
=
mask_w_idx
.
data
<
long
>
();
const
int64_t
*
mask_h_idx_
=
mask_h_idx
.
data
<
int64_t
>
();
const
int64_t
*
mask_w_idx_
=
mask_w_idx
.
data
<
int64_t
>
();
scalar_t
*
top_data_
=
top_data
.
data
<
scalar_t
>
();
MaskedCol2imForward
<
scalar_t
>
...
...
mmdet/ops/sigmoid_focal_loss/src/sigmoid_focal_loss_cuda.cu
View file @
f92f64c2
...
...
@@ -23,7 +23,7 @@
template
<
typename
scalar_t
>
__global__
void
SigmoidFocalLossForward
(
const
int
nthreads
,
const
scalar_t
*
logits
,
const
long
*
targets
,
const
int64_t
*
targets
,
const
int
num_classes
,
const
float
gamma
,
const
float
alpha
,
const
int
num
,
scalar_t
*
losses
)
{
...
...
@@ -60,7 +60,7 @@ __global__ void SigmoidFocalLossForward(const int nthreads,
template
<
typename
scalar_t
>
__global__
void
SigmoidFocalLossBackward
(
const
int
nthreads
,
const
scalar_t
*
logits
,
const
long
*
targets
,
const
int
nthreads
,
const
scalar_t
*
logits
,
const
int64_t
*
targets
,
const
scalar_t
*
d_losses
,
const
int
num_classes
,
const
float
gamma
,
const
float
alpha
,
const
int
num
,
scalar_t
*
d_logits
)
{
CUDA_1D_KERNEL_LOOP
(
i
,
nthreads
)
{
...
...
@@ -109,7 +109,8 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
auto
losses
=
at
::
empty
({
num_samples
,
logits
.
size
(
1
)},
logits
.
options
());
auto
losses_size
=
num_samples
*
logits
.
size
(
1
);
dim3
grid
(
std
::
min
(
THCCeilDiv
((
long
)
losses_size
,
512L
),
4096L
));
dim3
grid
(
std
::
min
(
THCCeilDiv
((
int64_t
)
losses_size
,
(
int64_t
)
512
),
(
int64_t
)
4096
));
dim3
block
(
512
);
if
(
losses
.
numel
()
==
0
)
{
...
...
@@ -121,7 +122,7 @@ at::Tensor SigmoidFocalLoss_forward_cuda(const at::Tensor &logits,
logits
.
scalar_type
(),
"SigmoidFocalLoss_forward"
,
[
&
]
{
SigmoidFocalLossForward
<
scalar_t
><<<
grid
,
block
>>>
(
losses_size
,
logits
.
contiguous
().
data
<
scalar_t
>
(),
targets
.
contiguous
().
data
<
long
>
(),
num_classes
,
gamma
,
alpha
,
targets
.
contiguous
().
data
<
int64_t
>
(),
num_classes
,
gamma
,
alpha
,
num_samples
,
losses
.
data
<
scalar_t
>
());
});
THCudaCheck
(
cudaGetLastError
());
...
...
@@ -147,7 +148,8 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
auto
d_logits
=
at
::
zeros
({
num_samples
,
num_classes
},
logits
.
options
());
auto
d_logits_size
=
num_samples
*
logits
.
size
(
1
);
dim3
grid
(
std
::
min
(
THCCeilDiv
((
long
)
d_logits_size
,
512L
),
4096L
));
dim3
grid
(
std
::
min
(
THCCeilDiv
((
int64_t
)
d_logits_size
,
(
int64_t
)
512
),
(
int64_t
)
4096
));
dim3
block
(
512
);
if
(
d_logits
.
numel
()
==
0
)
{
...
...
@@ -159,7 +161,7 @@ at::Tensor SigmoidFocalLoss_backward_cuda(const at::Tensor &logits,
logits
.
scalar_type
(),
"SigmoidFocalLoss_backward"
,
[
&
]
{
SigmoidFocalLossBackward
<
scalar_t
><<<
grid
,
block
>>>
(
d_logits_size
,
logits
.
contiguous
().
data
<
scalar_t
>
(),
targets
.
contiguous
().
data
<
long
>
(),
targets
.
contiguous
().
data
<
int64_t
>
(),
d_losses
.
contiguous
().
data
<
scalar_t
>
(),
num_classes
,
gamma
,
alpha
,
num_samples
,
d_logits
.
data
<
scalar_t
>
());
});
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment