Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
SOLOv2-pytorch
Commits
108fc9e1
Commit
108fc9e1
authored
Sep 02, 2018
by
Kai Chen
Browse files
set up the codebase skeleton (WIP)
parent
6985ef31
Changes
92
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
738 additions
and
0 deletions
+738
-0
mmdet/ops/roi_align/src/roi_align_kernel.cu
mmdet/ops/roi_align/src/roi_align_kernel.cu
+319
-0
mmdet/ops/roi_pool/__init__.py
mmdet/ops/roi_pool/__init__.py
+2
-0
mmdet/ops/roi_pool/functions/__init__.py
mmdet/ops/roi_pool/functions/__init__.py
+0
-0
mmdet/ops/roi_pool/functions/roi_pool.py
mmdet/ops/roi_pool/functions/roi_pool.py
+56
-0
mmdet/ops/roi_pool/gradcheck.py
mmdet/ops/roi_pool/gradcheck.py
+15
-0
mmdet/ops/roi_pool/modules/__init__.py
mmdet/ops/roi_pool/modules/__init__.py
+0
-0
mmdet/ops/roi_pool/modules/roi_pool.py
mmdet/ops/roi_pool/modules/roi_pool.py
+14
-0
mmdet/ops/roi_pool/setup.py
mmdet/ops/roi_pool/setup.py
+12
-0
mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
+86
-0
mmdet/ops/roi_pool/src/roi_pool_kernel.cu
mmdet/ops/roi_pool/src/roi_pool_kernel.cu
+193
-0
mmdet/version.py
mmdet/version.py
+1
-0
setup.py
setup.py
+40
-0
No files found.
mmdet/ops/roi_align/src/roi_align_kernel.cu
0 → 100644
View file @
108fc9e1
#include <ATen/ATen.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <math.h>
#include <stdio.h>
#include <vector>
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
#define THREADS_PER_BLOCK 1024
inline
int
GET_BLOCKS
(
const
int
N
)
{
int
optimal_block_num
=
(
N
+
THREADS_PER_BLOCK
-
1
)
/
THREADS_PER_BLOCK
;
int
max_block_num
=
65000
;
return
min
(
optimal_block_num
,
max_block_num
);
}
template
<
typename
scalar_t
>
__device__
scalar_t
bilinear_interpolate
(
const
scalar_t
*
bottom_data
,
const
int
height
,
const
int
width
,
scalar_t
y
,
scalar_t
x
)
{
// deal with cases that inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
return
0
;
}
if
(
y
<=
0
)
y
=
0
;
if
(
x
<=
0
)
x
=
0
;
int
y_low
=
(
int
)
y
;
int
x_low
=
(
int
)
x
;
int
y_high
;
int
x_high
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
scalar_t
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
scalar_t
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
scalar_t
ly
=
y
-
y_low
;
scalar_t
lx
=
x
-
x_low
;
scalar_t
hy
=
1.
-
ly
;
scalar_t
hx
=
1.
-
lx
;
// do bilinear interpolation
scalar_t
lt
=
bottom_data
[
y_low
*
width
+
x_low
];
scalar_t
rt
=
bottom_data
[
y_low
*
width
+
x_high
];
scalar_t
lb
=
bottom_data
[
y_high
*
width
+
x_low
];
scalar_t
rb
=
bottom_data
[
y_high
*
width
+
x_high
];
scalar_t
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
scalar_t
val
=
(
w1
*
lt
+
w2
*
rt
+
w3
*
lb
+
w4
*
rb
);
return
val
;
}
template
<
typename
scalar_t
>
__global__
void
ROIAlignForward
(
const
int
nthreads
,
const
scalar_t
*
bottom_data
,
const
scalar_t
*
bottom_rois
,
const
scalar_t
spatial_scale
,
const
int
sample_num
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
scalar_t
*
top_data
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
// (n, c, ph, pw) is an element in the aligned output
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
scalar_t
*
offset_bottom_rois
=
bottom_rois
+
n
*
5
;
int
roi_batch_ind
=
offset_bottom_rois
[
0
];
scalar_t
roi_start_w
=
offset_bottom_rois
[
1
]
*
spatial_scale
;
scalar_t
roi_start_h
=
offset_bottom_rois
[
2
]
*
spatial_scale
;
scalar_t
roi_end_w
=
(
offset_bottom_rois
[
3
]
+
1
)
*
spatial_scale
;
scalar_t
roi_end_h
=
(
offset_bottom_rois
[
4
]
+
1
)
*
spatial_scale
;
// Force malformed ROIs to be 1x1
scalar_t
roi_width
=
fmaxf
((
scalar_t
)
roi_end_w
-
roi_start_w
,
0.
);
scalar_t
roi_height
=
fmaxf
((
scalar_t
)
roi_end_h
-
roi_start_h
,
0.
);
scalar_t
bin_size_h
=
roi_height
/
pooled_height
;
scalar_t
bin_size_w
=
roi_width
/
pooled_width
;
const
scalar_t
*
offset_bottom_data
=
bottom_data
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
sample_num_h
=
(
sample_num
>
0
)
?
sample_num
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
sample_num_w
=
(
sample_num
>
0
)
?
sample_num
:
ceil
(
roi_width
/
pooled_width
);
scalar_t
h
=
(
scalar_t
)(
ph
+
0.5
)
*
bin_size_h
+
roi_start_h
;
scalar_t
w
=
(
scalar_t
)(
pw
+
0.5
)
*
bin_size_w
+
roi_start_w
;
int
hstart
=
fminf
(
floor
(
h
),
height
-
2
);
int
wstart
=
fminf
(
floor
(
w
),
width
-
2
);
scalar_t
output_val
=
0
;
for
(
int
iy
=
0
;
iy
<
sample_num_h
;
iy
++
)
{
const
scalar_t
y
=
roi_start_h
+
ph
*
bin_size_h
+
(
scalar_t
)(
iy
+
scalar_t
(
.5
f
))
*
bin_size_h
/
(
scalar_t
)(
sample_num_h
);
for
(
int
ix
=
0
;
ix
<
sample_num_w
;
ix
++
)
{
const
scalar_t
x
=
roi_start_w
+
pw
*
bin_size_w
+
(
scalar_t
)(
ix
+
scalar_t
(
.5
f
))
*
bin_size_w
/
(
scalar_t
)(
sample_num_w
);
scalar_t
val
=
bilinear_interpolate
<
scalar_t
>
(
offset_bottom_data
,
height
,
width
,
y
,
x
);
output_val
+=
val
;
}
}
output_val
/=
(
sample_num_h
*
sample_num_w
);
top_data
[
index
]
=
output_val
;
}
}
int
ROIAlignForwardLaucher
(
const
at
::
Tensor
features
,
const
at
::
Tensor
rois
,
const
float
spatial_scale
,
const
int
sample_num
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_height
,
const
int
pooled_width
,
at
::
Tensor
output
)
{
const
int
output_size
=
num_rois
*
pooled_height
*
pooled_width
*
channels
;
AT_DISPATCH_FLOATING_TYPES
(
features
.
type
(),
"ROIAlignLaucherForward"
,
([
&
]
{
const
scalar_t
*
bottom_data
=
features
.
data
<
scalar_t
>
();
const
scalar_t
*
rois_data
=
rois
.
data
<
scalar_t
>
();
scalar_t
*
top_data
=
output
.
data
<
scalar_t
>
();
ROIAlignForward
<
scalar_t
><<<
GET_BLOCKS
(
output_size
),
THREADS_PER_BLOCK
>>>
(
output_size
,
bottom_data
,
rois_data
,
scalar_t
(
spatial_scale
),
sample_num
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
top_data
);
}));
cudaError_t
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"cudaCheckError() failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
1
;
}
template
<
typename
scalar_t
>
__device__
void
bilinear_interpolate_gradient
(
const
int
height
,
const
int
width
,
scalar_t
y
,
scalar_t
x
,
scalar_t
&
w1
,
scalar_t
&
w2
,
scalar_t
&
w3
,
scalar_t
&
w4
,
int
&
x_low
,
int
&
x_high
,
int
&
y_low
,
int
&
y_high
)
{
// deal with cases that inverse elements are out of feature map boundary
if
(
y
<
-
1.0
||
y
>
height
||
x
<
-
1.0
||
x
>
width
)
{
w1
=
w2
=
w3
=
w4
=
0.
;
x_low
=
x_high
=
y_low
=
y_high
=
-
1
;
return
;
}
if
(
y
<=
0
)
y
=
0
;
if
(
x
<=
0
)
x
=
0
;
y_low
=
(
int
)
y
;
x_low
=
(
int
)
x
;
if
(
y_low
>=
height
-
1
)
{
y_high
=
y_low
=
height
-
1
;
y
=
(
scalar_t
)
y_low
;
}
else
{
y_high
=
y_low
+
1
;
}
if
(
x_low
>=
width
-
1
)
{
x_high
=
x_low
=
width
-
1
;
x
=
(
scalar_t
)
x_low
;
}
else
{
x_high
=
x_low
+
1
;
}
scalar_t
ly
=
y
-
y_low
;
scalar_t
lx
=
x
-
x_low
;
scalar_t
hy
=
1.
-
ly
;
scalar_t
hx
=
1.
-
lx
;
w1
=
hy
*
hx
,
w2
=
hy
*
lx
,
w3
=
ly
*
hx
,
w4
=
ly
*
lx
;
return
;
}
template
<
typename
scalar_t
>
__global__
void
ROIAlignBackward
(
const
int
nthreads
,
const
scalar_t
*
top_diff
,
const
scalar_t
*
bottom_rois
,
const
scalar_t
spatial_scale
,
const
int
sample_num
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
scalar_t
*
bottom_diff
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
// (n, c, ph, pw) is an element in the aligned output
int
pw
=
index
%
pooled_width
;
int
ph
=
(
index
/
pooled_width
)
%
pooled_height
;
int
c
=
(
index
/
pooled_width
/
pooled_height
)
%
channels
;
int
n
=
index
/
pooled_width
/
pooled_height
/
channels
;
const
scalar_t
*
offset_bottom_rois
=
bottom_rois
+
n
*
5
;
int
roi_batch_ind
=
offset_bottom_rois
[
0
];
scalar_t
roi_start_w
=
offset_bottom_rois
[
1
]
*
spatial_scale
;
scalar_t
roi_start_h
=
offset_bottom_rois
[
2
]
*
spatial_scale
;
scalar_t
roi_end_w
=
(
offset_bottom_rois
[
3
]
+
1
)
*
spatial_scale
;
scalar_t
roi_end_h
=
(
offset_bottom_rois
[
4
]
+
1
)
*
spatial_scale
;
// Force malformed ROIs to be 1x1
scalar_t
roi_width
=
fmaxf
((
scalar_t
)
roi_end_w
-
roi_start_w
,
0.
);
scalar_t
roi_height
=
fmaxf
((
scalar_t
)
roi_end_h
-
roi_start_h
,
0.
);
scalar_t
bin_size_h
=
roi_height
/
pooled_height
;
scalar_t
bin_size_w
=
roi_width
/
pooled_width
;
scalar_t
*
offset_bottom_diff
=
bottom_diff
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
int
offset_top
=
(
n
*
channels
+
c
)
*
pooled_height
*
pooled_width
+
ph
*
pooled_width
+
pw
;
scalar_t
offset_top_diff
=
top_diff
[
offset_top
];
int
sample_num_h
=
(
sample_num
>
0
)
?
sample_num
:
ceil
(
roi_height
/
pooled_height
);
// e.g., = 2
int
sample_num_w
=
(
sample_num
>
0
)
?
sample_num
:
ceil
(
roi_width
/
pooled_width
);
const
scalar_t
count
=
(
scalar_t
)(
sample_num_h
*
sample_num_w
);
scalar_t
h
=
(
scalar_t
)(
ph
+
0.5
)
*
bin_size_h
+
roi_start_h
;
scalar_t
w
=
(
scalar_t
)(
pw
+
0.5
)
*
bin_size_w
+
roi_start_w
;
int
hstart
=
fminf
(
floor
(
h
),
height
-
2
);
int
wstart
=
fminf
(
floor
(
w
),
width
-
2
);
for
(
int
iy
=
0
;
iy
<
sample_num_h
;
iy
++
)
{
const
scalar_t
y
=
roi_start_h
+
ph
*
bin_size_h
+
(
scalar_t
)(
iy
+
.5
f
)
*
bin_size_h
/
(
scalar_t
)(
sample_num_h
);
for
(
int
ix
=
0
;
ix
<
sample_num_w
;
ix
++
)
{
const
scalar_t
x
=
roi_start_w
+
pw
*
bin_size_w
+
(
scalar_t
)(
ix
+
.5
f
)
*
bin_size_w
/
(
scalar_t
)(
sample_num_w
);
scalar_t
w1
,
w2
,
w3
,
w4
;
int
x_low
,
x_high
,
y_low
,
y_high
;
bilinear_interpolate_gradient
<
scalar_t
>
(
height
,
width
,
y
,
x
,
w1
,
w2
,
w3
,
w4
,
x_low
,
x_high
,
y_low
,
y_high
);
scalar_t
g1
=
offset_top_diff
*
w1
/
count
;
scalar_t
g2
=
offset_top_diff
*
w2
/
count
;
scalar_t
g3
=
offset_top_diff
*
w3
/
count
;
scalar_t
g4
=
offset_top_diff
*
w4
/
count
;
if
(
x_low
>=
0
&&
x_high
>=
0
&&
y_low
>=
0
&&
y_high
>=
0
)
{
atomicAdd
(
offset_bottom_diff
+
y_low
*
width
+
x_low
,
g1
);
atomicAdd
(
offset_bottom_diff
+
y_low
*
width
+
x_high
,
g2
);
atomicAdd
(
offset_bottom_diff
+
y_high
*
width
+
x_low
,
g3
);
atomicAdd
(
offset_bottom_diff
+
y_high
*
width
+
x_high
,
g4
);
}
}
}
}
}
template
<
>
__global__
void
ROIAlignBackward
<
double
>
(
const
int
nthreads
,
const
double
*
top_diff
,
const
double
*
bottom_rois
,
const
double
spatial_scale
,
const
int
sample_num
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_height
,
const
int
pooled_width
,
double
*
bottom_diff
)
{}
int
ROIAlignBackwardLaucher
(
const
at
::
Tensor
top_grad
,
const
at
::
Tensor
rois
,
const
float
spatial_scale
,
const
int
sample_num
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_height
,
const
int
pooled_width
,
at
::
Tensor
bottom_grad
)
{
const
int
output_size
=
num_rois
*
pooled_height
*
pooled_width
*
channels
;
AT_DISPATCH_FLOATING_TYPES
(
top_grad
.
type
(),
"ROIAlignLaucherBackward"
,
([
&
]
{
const
scalar_t
*
top_diff
=
top_grad
.
data
<
scalar_t
>
();
const
scalar_t
*
rois_data
=
rois
.
data
<
scalar_t
>
();
scalar_t
*
bottom_diff
=
bottom_grad
.
data
<
scalar_t
>
();
if
(
sizeof
(
scalar_t
)
==
sizeof
(
double
))
{
fprintf
(
stderr
,
"double is not supported
\n
"
);
exit
(
-
1
);
}
ROIAlignBackward
<
scalar_t
><<<
GET_BLOCKS
(
output_size
),
THREADS_PER_BLOCK
>>>
(
output_size
,
top_diff
,
rois_data
,
spatial_scale
,
sample_num
,
channels
,
height
,
width
,
pooled_height
,
pooled_width
,
bottom_diff
);
}));
cudaError_t
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"cudaCheckError() failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
1
;
}
mmdet/ops/roi_pool/__init__.py
0 → 100644
View file @
108fc9e1
from
.functions.roi_pool
import
roi_pool
from
.modules.roi_pool
import
RoIPool
mmdet/ops/roi_pool/functions/__init__.py
0 → 100644
View file @
108fc9e1
mmdet/ops/roi_pool/functions/roi_pool.py
0 → 100644
View file @
108fc9e1
import
torch
from
torch.autograd
import
Function
from
..
import
roi_pool_cuda
class
RoIPoolFunction
(
Function
):
@
staticmethod
def
forward
(
ctx
,
features
,
rois
,
out_size
,
spatial_scale
):
if
isinstance
(
out_size
,
int
):
out_h
=
out_size
out_w
=
out_size
elif
isinstance
(
out_size
,
tuple
):
assert
len
(
out_size
)
==
2
assert
isinstance
(
out_size
[
0
],
int
)
assert
isinstance
(
out_size
[
1
],
int
)
out_h
,
out_w
=
out_size
else
:
raise
TypeError
(
'"out_size" must be an integer or tuple of integers'
)
assert
features
.
is_cuda
ctx
.
save_for_backward
(
rois
)
num_channels
=
features
.
size
(
1
)
num_rois
=
rois
.
size
(
0
)
out_size
=
(
num_rois
,
num_channels
,
out_h
,
out_w
)
output
=
features
.
new_zeros
(
*
out_size
)
argmax
=
features
.
new_zeros
(
*
out_size
,
dtype
=
torch
.
int
)
roi_pool_cuda
.
forward
(
features
,
rois
,
out_h
,
out_w
,
spatial_scale
,
output
,
argmax
)
ctx
.
spatial_scale
=
spatial_scale
ctx
.
feature_size
=
features
.
size
()
ctx
.
argmax
=
argmax
return
output
@
staticmethod
def
backward
(
ctx
,
grad_output
):
assert
grad_output
.
is_cuda
spatial_scale
=
ctx
.
spatial_scale
feature_size
=
ctx
.
feature_size
argmax
=
ctx
.
argmax
rois
=
ctx
.
saved_tensors
[
0
]
assert
feature_size
is
not
None
grad_input
=
grad_rois
=
None
if
ctx
.
needs_input_grad
[
0
]:
grad_input
=
grad_output
.
new
(
feature_size
).
zero_
()
roi_pool_cuda
.
backward
(
grad_output
,
rois
,
argmax
,
spatial_scale
,
grad_input
)
return
grad_input
,
grad_rois
,
None
,
None
roi_pool
=
RoIPoolFunction
.
apply
mmdet/ops/roi_pool/gradcheck.py
0 → 100644
View file @
108fc9e1
import
torch
from
torch.autograd
import
gradcheck
import
os.path
as
osp
import
sys
sys
.
path
.
append
(
osp
.
abspath
(
osp
.
join
(
__file__
,
'../../'
)))
from
roi_pooling
import
RoIPool
feat
=
torch
.
randn
(
4
,
16
,
15
,
15
,
requires_grad
=
True
).
cuda
()
rois
=
torch
.
Tensor
([[
0
,
0
,
0
,
50
,
50
],
[
0
,
10
,
30
,
43
,
55
],
[
1
,
67
,
40
,
110
,
120
]]).
cuda
()
inputs
=
(
feat
,
rois
)
print
(
'Gradcheck for roi pooling...'
)
test
=
gradcheck
(
RoIPool
(
4
,
1.0
/
8
),
inputs
,
eps
=
1e-5
,
atol
=
1e-3
)
print
(
test
)
mmdet/ops/roi_pool/modules/__init__.py
0 → 100644
View file @
108fc9e1
mmdet/ops/roi_pool/modules/roi_pool.py
0 → 100644
View file @
108fc9e1
from
torch.nn.modules.module
import
Module
from
..functions.roi_pool
import
roi_pool
class
RoIPool
(
Module
):
def
__init__
(
self
,
out_size
,
spatial_scale
):
super
(
RoIPool
,
self
).
__init__
()
self
.
out_size
=
out_size
self
.
spatial_scale
=
float
(
spatial_scale
)
def
forward
(
self
,
features
,
rois
):
return
roi_pool
(
features
,
rois
,
self
.
out_size
,
self
.
spatial_scale
)
mmdet/ops/roi_pool/setup.py
0 → 100644
View file @
108fc9e1
from
setuptools
import
setup
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
setup
(
name
=
'roi_pool'
,
ext_modules
=
[
CUDAExtension
(
'roi_pool_cuda'
,
[
'src/roi_pool_cuda.cpp'
,
'src/roi_pool_kernel.cu'
,
])
],
cmdclass
=
{
'build_ext'
:
BuildExtension
})
mmdet/ops/roi_pool/src/roi_pool_cuda.cpp
0 → 100644
View file @
108fc9e1
#include <torch/torch.h>
#include <cmath>
#include <vector>
int
ROIPoolForwardLaucher
(
const
at
::
Tensor
features
,
const
at
::
Tensor
rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_h
,
const
int
pooled_w
,
at
::
Tensor
output
,
at
::
Tensor
argmax
);
int
ROIPoolBackwardLaucher
(
const
at
::
Tensor
top_grad
,
const
at
::
Tensor
rois
,
const
at
::
Tensor
argmax
,
const
float
spatial_scale
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_h
,
const
int
pooled_w
,
at
::
Tensor
bottom_grad
);
#define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda(), #x " must be a CUDAtensor ")
#define CHECK_CONTIGUOUS(x) \
AT_ASSERT(x.is_contiguous(), #x " must be contiguous ")
#define CHECK_INPUT(x) \
CHECK_CUDA(x); \
CHECK_CONTIGUOUS(x)
int
roi_pooling_forward_cuda
(
at
::
Tensor
features
,
at
::
Tensor
rois
,
int
pooled_height
,
int
pooled_width
,
float
spatial_scale
,
at
::
Tensor
output
,
at
::
Tensor
argmax
)
{
CHECK_INPUT
(
features
);
CHECK_INPUT
(
rois
);
CHECK_INPUT
(
output
);
CHECK_INPUT
(
argmax
);
// Number of ROIs
int
num_rois
=
rois
.
size
(
0
);
int
size_rois
=
rois
.
size
(
1
);
if
(
size_rois
!=
5
)
{
printf
(
"wrong roi size
\n
"
);
return
0
;
}
int
channels
=
features
.
size
(
1
);
int
height
=
features
.
size
(
2
);
int
width
=
features
.
size
(
3
);
ROIPoolForwardLaucher
(
features
,
rois
,
spatial_scale
,
channels
,
height
,
width
,
num_rois
,
pooled_height
,
pooled_width
,
output
,
argmax
);
return
1
;
}
int
roi_pooling_backward_cuda
(
at
::
Tensor
top_grad
,
at
::
Tensor
rois
,
at
::
Tensor
argmax
,
float
spatial_scale
,
at
::
Tensor
bottom_grad
)
{
CHECK_INPUT
(
top_grad
);
CHECK_INPUT
(
rois
);
CHECK_INPUT
(
argmax
);
CHECK_INPUT
(
bottom_grad
);
int
pooled_height
=
top_grad
.
size
(
2
);
int
pooled_width
=
top_grad
.
size
(
3
);
int
num_rois
=
rois
.
size
(
0
);
int
size_rois
=
rois
.
size
(
1
);
if
(
size_rois
!=
5
)
{
printf
(
"wrong roi size
\n
"
);
return
0
;
}
int
batch_size
=
bottom_grad
.
size
(
0
);
int
channels
=
bottom_grad
.
size
(
1
);
int
height
=
bottom_grad
.
size
(
2
);
int
width
=
bottom_grad
.
size
(
3
);
ROIPoolBackwardLaucher
(
top_grad
,
rois
,
argmax
,
spatial_scale
,
batch_size
,
channels
,
height
,
width
,
num_rois
,
pooled_height
,
pooled_width
,
bottom_grad
);
return
1
;
}
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"forward"
,
&
roi_pooling_forward_cuda
,
"Roi_Pooling forward (CUDA)"
);
m
.
def
(
"backward"
,
&
roi_pooling_backward_cuda
,
"Roi_Pooling backward (CUDA)"
);
}
mmdet/ops/roi_pool/src/roi_pool_kernel.cu
0 → 100644
View file @
108fc9e1
#include <ATen/ATen.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include <math.h>
#include <stdio.h>
#include <vector>
#define CUDA_1D_KERNEL_LOOP(i, n) \
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < n; \
i += blockDim.x * gridDim.x)
#define THREADS_PER_BLOCK 1024
inline
int
GET_BLOCKS
(
const
int
N
)
{
int
optimal_block_num
=
(
N
+
THREADS_PER_BLOCK
-
1
)
/
THREADS_PER_BLOCK
;
int
max_block_num
=
65000
;
return
min
(
optimal_block_num
,
max_block_num
);
}
template
<
typename
scalar_t
>
__global__
void
ROIPoolForward
(
const
int
nthreads
,
const
scalar_t
*
bottom_data
,
const
scalar_t
*
rois
,
const
scalar_t
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_h
,
const
int
pooled_w
,
scalar_t
*
top_data
,
int
*
argmax_data
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
// (n, c, ph, pw) is an element in the pooled output
int
pw
=
index
%
pooled_w
;
int
ph
=
(
index
/
pooled_w
)
%
pooled_h
;
int
c
=
(
index
/
pooled_w
/
pooled_h
)
%
channels
;
int
n
=
index
/
pooled_w
/
pooled_h
/
channels
;
const
scalar_t
*
offset_rois
=
rois
+
n
*
5
;
int
roi_batch_ind
=
offset_rois
[
0
];
// calculate the roi region on feature maps
scalar_t
roi_x1
=
offset_rois
[
1
]
*
spatial_scale
;
scalar_t
roi_y1
=
offset_rois
[
2
]
*
spatial_scale
;
scalar_t
roi_x2
=
(
offset_rois
[
3
]
+
1
)
*
spatial_scale
;
scalar_t
roi_y2
=
(
offset_rois
[
4
]
+
1
)
*
spatial_scale
;
// force malformed rois to be 1x1
scalar_t
roi_w
=
roi_x2
-
roi_x1
;
scalar_t
roi_h
=
roi_y2
-
roi_y1
;
if
(
roi_w
<=
0
||
roi_h
<=
0
)
continue
;
scalar_t
bin_size_w
=
roi_w
/
static_cast
<
scalar_t
>
(
pooled_w
);
scalar_t
bin_size_h
=
roi_h
/
static_cast
<
scalar_t
>
(
pooled_h
);
// the corresponding bin region
int
bin_x1
=
floor
(
static_cast
<
scalar_t
>
(
pw
)
*
bin_size_w
+
roi_x1
);
int
bin_y1
=
floor
(
static_cast
<
scalar_t
>
(
ph
)
*
bin_size_h
+
roi_y1
);
int
bin_x2
=
ceil
(
static_cast
<
scalar_t
>
(
pw
+
1
)
*
bin_size_w
+
roi_x1
);
int
bin_y2
=
ceil
(
static_cast
<
scalar_t
>
(
ph
+
1
)
*
bin_size_h
+
roi_y1
);
// add roi offsets and clip to input boundaries
bin_x1
=
min
(
max
(
bin_x1
,
0
),
width
);
bin_y1
=
min
(
max
(
bin_y1
,
0
),
height
);
bin_x2
=
min
(
max
(
bin_x2
,
0
),
width
);
bin_y2
=
min
(
max
(
bin_y2
,
0
),
height
);
bool
is_empty
=
(
bin_y2
<=
bin_y1
)
||
(
bin_x2
<=
bin_x1
);
// If nothing is pooled, argmax = -1 causes nothing to be backprop'd
int
max_idx
=
-
1
;
bottom_data
+=
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
;
// Define an empty pooling region to be zero
scalar_t
max_val
=
is_empty
?
0
:
bottom_data
[
bin_y1
*
width
+
bin_x1
]
-
1
;
for
(
int
h
=
bin_y1
;
h
<
bin_y2
;
++
h
)
{
for
(
int
w
=
bin_x1
;
w
<
bin_x2
;
++
w
)
{
int
offset
=
h
*
width
+
w
;
if
(
bottom_data
[
offset
]
>
max_val
)
{
max_val
=
bottom_data
[
offset
];
max_idx
=
offset
;
}
}
}
top_data
[
index
]
=
max_val
;
if
(
argmax_data
!=
NULL
)
argmax_data
[
index
]
=
max_idx
;
}
}
int
ROIPoolForwardLaucher
(
const
at
::
Tensor
features
,
const
at
::
Tensor
rois
,
const
float
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_h
,
const
int
pooled_w
,
at
::
Tensor
output
,
at
::
Tensor
argmax
)
{
const
int
output_size
=
num_rois
*
channels
*
pooled_h
*
pooled_w
;
AT_DISPATCH_FLOATING_TYPES
(
features
.
type
(),
"ROIPoolLaucherForward"
,
([
&
]
{
const
scalar_t
*
bottom_data
=
features
.
data
<
scalar_t
>
();
const
scalar_t
*
rois_data
=
rois
.
data
<
scalar_t
>
();
scalar_t
*
top_data
=
output
.
data
<
scalar_t
>
();
int
*
argmax_data
=
argmax
.
data
<
int
>
();
ROIPoolForward
<
scalar_t
><<<
GET_BLOCKS
(
output_size
),
THREADS_PER_BLOCK
>>>
(
output_size
,
bottom_data
,
rois_data
,
scalar_t
(
spatial_scale
),
channels
,
height
,
width
,
pooled_h
,
pooled_w
,
top_data
,
argmax_data
);
}));
cudaError_t
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"cudaCheckError() failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
1
;
}
template
<
typename
scalar_t
>
__global__
void
ROIPoolBackward
(
const
int
nthreads
,
const
scalar_t
*
top_diff
,
const
scalar_t
*
rois
,
const
int
*
argmax_data
,
const
scalar_t
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_h
,
const
int
pooled_w
,
scalar_t
*
bottom_diff
)
{
CUDA_1D_KERNEL_LOOP
(
index
,
nthreads
)
{
int
pw
=
index
%
pooled_w
;
int
ph
=
(
index
/
pooled_w
)
%
pooled_h
;
int
c
=
(
index
/
pooled_w
/
pooled_h
)
%
channels
;
int
n
=
index
/
pooled_w
/
pooled_h
/
channels
;
int
roi_batch_ind
=
rois
[
n
*
5
];
int
bottom_index
=
argmax_data
[(
n
*
channels
+
c
)
*
pooled_h
*
pooled_w
+
ph
*
pooled_w
+
pw
];
atomicAdd
(
bottom_diff
+
(
roi_batch_ind
*
channels
+
c
)
*
height
*
width
+
bottom_index
,
top_diff
[
index
]);
}
}
template
<
>
__global__
void
ROIPoolBackward
<
double
>
(
const
int
nthreads
,
const
double
*
top_diff
,
const
double
*
rois
,
const
int
*
argmax_data
,
const
double
spatial_scale
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
pooled_h
,
const
int
pooled_w
,
double
*
bottom_diff
)
{
// CUDA_1D_KERNEL_LOOP(index, nthreads) {
// int pw = index % pooled_w;
// int ph = (index / pooled_w) % pooled_h;
// int c = (index / pooled_w / pooled_h) % channels;
// int n = index / pooled_w / pooled_h / channels;
// int roi_batch_ind = rois[n * 5];
// int bottom_index = argmax_data[(n * channels + c) * pooled_h * pooled_w +
// ph * pooled_w + pw];
// *(bottom_diff + (roi_batch_ind * channels + c) * height * width +
// bottom_index) +=top_diff[index];
// }
}
int
ROIPoolBackwardLaucher
(
const
at
::
Tensor
top_grad
,
const
at
::
Tensor
rois
,
const
at
::
Tensor
argmax
,
const
float
spatial_scale
,
const
int
batch_size
,
const
int
channels
,
const
int
height
,
const
int
width
,
const
int
num_rois
,
const
int
pooled_h
,
const
int
pooled_w
,
at
::
Tensor
bottom_grad
)
{
const
int
output_size
=
num_rois
*
pooled_h
*
pooled_w
*
channels
;
AT_DISPATCH_FLOATING_TYPES
(
top_grad
.
type
(),
"ROIPoolLaucherBackward"
,
([
&
]
{
const
scalar_t
*
top_diff
=
top_grad
.
data
<
scalar_t
>
();
const
scalar_t
*
rois_data
=
rois
.
data
<
scalar_t
>
();
const
int
*
argmax_data
=
argmax
.
data
<
int
>
();
scalar_t
*
bottom_diff
=
bottom_grad
.
data
<
scalar_t
>
();
if
(
sizeof
(
scalar_t
)
==
sizeof
(
double
))
{
fprintf
(
stderr
,
"double is not supported
\n
"
);
exit
(
-
1
);
}
ROIPoolBackward
<
scalar_t
><<<
GET_BLOCKS
(
output_size
),
THREADS_PER_BLOCK
>>>
(
output_size
,
top_diff
,
rois_data
,
argmax_data
,
scalar_t
(
spatial_scale
),
channels
,
height
,
width
,
pooled_h
,
pooled_w
,
bottom_diff
);
}));
cudaError_t
err
=
cudaGetLastError
();
if
(
cudaSuccess
!=
err
)
{
fprintf
(
stderr
,
"cudaCheckError() failed : %s
\n
"
,
cudaGetErrorString
(
err
));
exit
(
-
1
);
}
return
1
;
}
mmdet/version.py
0 → 100644
View file @
108fc9e1
__version__
=
'0.5.0'
setup.py
0 → 100644
View file @
108fc9e1
from
setuptools
import
find_packages
,
setup
def
readme
():
with
open
(
'README.md'
)
as
f
:
content
=
f
.
read
()
return
content
def
get_version
():
version_file
=
'mmcv/version.py'
with
open
(
version_file
,
'r'
)
as
f
:
exec
(
compile
(
f
.
read
(),
version_file
,
'exec'
))
return
locals
()[
'__version__'
]
setup
(
name
=
'mmdet'
,
version
=
get_version
(),
description
=
'Open MMLab Detection Toolbox'
,
long_description
=
readme
(),
keywords
=
'computer vision, object detection'
,
packages
=
find_packages
(),
classifiers
=
[
'Development Status :: 4 - Beta'
,
'License :: OSI Approved :: GNU General Public License v3 (GPLv3)'
,
'Operating System :: OS Independent'
,
'Programming Language :: Python :: 2'
,
'Programming Language :: Python :: 2.7'
,
'Programming Language :: Python :: 3'
,
'Programming Language :: Python :: 3.4'
,
'Programming Language :: Python :: 3.5'
,
'Programming Language :: Python :: 3.6'
,
'Topic :: Utilities'
,
],
license
=
'GPLv3'
,
setup_requires
=
[
'pytest-runner'
],
tests_require
=
[
'pytest'
],
install_requires
=
[
'numpy'
,
'matplotlib'
,
'six'
,
'terminaltables'
],
zip_safe
=
False
)
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment