Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Pytorch-Encoding
Commits
fed540f2
Unverified
Commit
fed540f2
authored
Aug 09, 2020
by
Hang Zhang
Committed by
GitHub
Aug 09, 2020
Browse files
fix docker (#310)
* fix docker * broken api
parent
1235f3b0
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
58 additions
and
52 deletions
+58
-52
Dockerfile
Dockerfile
+5
-3
encoding/lib/cpu/roi_align_cpu.cpp
encoding/lib/cpu/roi_align_cpu.cpp
+7
-7
encoding/lib/gpu/activation_kernel.cu
encoding/lib/gpu/activation_kernel.cu
+5
-1
encoding/lib/gpu/device_tensor.h
encoding/lib/gpu/device_tensor.h
+1
-1
encoding/lib/gpu/lib_ssd.cu
encoding/lib/gpu/lib_ssd.cu
+25
-25
encoding/lib/gpu/nms_kernel.cu
encoding/lib/gpu/nms_kernel.cu
+9
-9
encoding/lib/gpu/roi_align_kernel.cu
encoding/lib/gpu/roi_align_kernel.cu
+6
-6
No files found.
Dockerfile
View file @
fed540f2
FROM
nvcr.io/nvidia/pytorch:
19
.0
5
-py3
FROM
nvcr.io/nvidia/pytorch:
20
.0
6
-py3
# Set working directory # Set working directory
WORKDIR
/workspace
...
...
@@ -8,14 +8,16 @@ WORKDIR /workspace
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
python3-tk python-pip git tmux htop tree
RUN
python
-m
pip
install
--upgrade
pip
RUN
python
-m
pip
install
torch
==
1.4.0
RUN
python
-m
pip
install
torchvision
==
0.5.0
#
RUN python -m pip install torch==1.4.0
#
RUN python -m pip install torchvision==0.5.0
RUN
python
-m
pip
install
pycocotools
==
2.0.0
#RUN chmod a+rwx -R /opt/conda/
COPY
./setup.py .
COPY
./encoding ./encoding
ENV
FORCE_CUDA="1"
RUN
python setup.py develop
COPY
./experiments ./experiments
encoding/lib/cpu/roi_align_cpu.cpp
View file @
fed540f2
...
...
@@ -413,7 +413,7 @@ at::Tensor ROIAlign_Forward_CPU(
AT_DISPATCH_FLOATING_TYPES
(
input
.
scalar_type
(),
"ROIAlign_Forward_CPU"
,
([
&
]
{
ROIAlignForwardCompute
<
scalar_t
>
(
output
.
numel
(),
input
.
data
<
scalar_t
>
(),
input
.
data
_ptr
<
scalar_t
>
(),
static_cast
<
scalar_t
>
(
spatial_scale
),
channels
,
height
,
...
...
@@ -421,9 +421,9 @@ at::Tensor ROIAlign_Forward_CPU(
pooled_height
,
pooled_width
,
sampling_ratio
,
bottom_rois
.
data
<
scalar_t
>
(),
bottom_rois
.
data
_ptr
<
scalar_t
>
(),
roi_cols
,
output
.
data
<
scalar_t
>
());
output
.
data
_ptr
<
scalar_t
>
());
}));
return
output
;
...
...
@@ -456,10 +456,10 @@ at::Tensor ROIAlign_Backward_CPU(
AT_ASSERT
(
bottom_rois
.
is_contiguous
());
AT_DISPATCH_FLOATING_TYPES
(
bottom_rois
.
type
(),
"ROIAlign_Backward_CPU"
,
([
&
]
{
AT_DISPATCH_FLOATING_TYPES
(
bottom_rois
.
scalar_
type
(),
"ROIAlign_Backward_CPU"
,
([
&
]
{
ROIAlignBackwardCompute
<
scalar_t
>
(
grad_output
.
numel
(),
grad_output
.
data
<
scalar_t
>
(),
grad_output
.
data
_ptr
<
scalar_t
>
(),
num_rois
,
static_cast
<
scalar_t
>
(
spatial_scale
),
channels
,
...
...
@@ -468,8 +468,8 @@ at::Tensor ROIAlign_Backward_CPU(
pooled_height
,
pooled_width
,
sampling_ratio
,
grad_in
.
data
<
scalar_t
>
(),
bottom_rois
.
data
<
scalar_t
>
(),
grad_in
.
data
_ptr
<
scalar_t
>
(),
bottom_rois
.
data
_ptr
<
scalar_t
>
(),
roi_cols
);
}));
...
...
encoding/lib/gpu/activation_kernel.cu
View file @
fed540f2
#include <exception>
#include <torch/extension.h>
#include <ATen/ATen.h>
#include <vector>
...
...
@@ -7,6 +8,7 @@
#include <thrust/transform.h>
#include "common.h"
using
namespace
std
;
namespace
{
...
...
@@ -40,5 +42,7 @@ void LeakyRelu_Backward_CUDA(at::Tensor z, at::Tensor dz, float slope) {
*/
// unstable after scaling
at
::
leaky_relu_
(
z
,
1.0
/
slope
);
at
::
leaky_relu_backward
(
dz
,
z
,
slope
);
// This API is changed on pytorch side, feature broken
throw
"PyTorch API break, Don't use InplaceABN for now."
;
// at::leaky_relu_backward(dz, z, slope, false);
}
encoding/lib/gpu/device_tensor.h
View file @
fed540f2
...
...
@@ -101,7 +101,7 @@ struct DeviceTensor<DType, 1> {
template
<
typename
DType
,
int
Dim
>
static
DeviceTensor
<
DType
,
Dim
>
devicetensor
(
const
at
::
Tensor
&
blob
)
{
DType
*
data
=
blob
.
data
<
DType
>
();
DType
*
data
=
blob
.
data
_ptr
<
DType
>
();
DeviceTensor
<
DType
,
Dim
>
tensor
(
data
,
nullptr
);
for
(
int
i
=
0
;
i
<
Dim
;
++
i
)
{
tensor
.
size_
[
i
]
=
blob
.
size
(
i
);
...
...
encoding/lib/gpu/lib_ssd.cu
View file @
fed540f2
...
...
@@ -348,10 +348,10 @@ std::vector<at::Tensor> box_encoder(const int N_img,
const
at
::
Tensor
&
dbox
,
float
criteria
)
{
// Check everything is on the device
AT_ASSERTM
(
bbox_input
.
type
().
is_cuda
(),
"bboxes must be a CUDA tensor"
);
AT_ASSERTM
(
bbox_offsets
.
type
().
is_cuda
(),
"bbox offsets must be a CUDA tensor"
);
AT_ASSERTM
(
labels_input
.
type
().
is_cuda
(),
"labels must be a CUDA tensor"
);
AT_ASSERTM
(
dbox
.
type
().
is_cuda
(),
"dboxes must be a CUDA tensor"
);
AT_ASSERTM
(
bbox_input
.
is_cuda
(),
"bboxes must be a CUDA tensor"
);
AT_ASSERTM
(
bbox_offsets
.
is_cuda
(),
"bbox offsets must be a CUDA tensor"
);
AT_ASSERTM
(
labels_input
.
is_cuda
(),
"labels must be a CUDA tensor"
);
AT_ASSERTM
(
dbox
.
is_cuda
(),
"dboxes must be a CUDA tensor"
);
// Check at least offsets, bboxes and labels are consistent
// Note: offsets is N+1 vs. N for labels
...
...
@@ -374,7 +374,7 @@ std::vector<at::Tensor> box_encoder(const int N_img,
// allocate final outputs (known size)
#ifdef DEBUG
printf
(
"%d x %d
\n
"
,
N_img
*
M
,
4
);
// at::Tensor bbox_out = dbox.type().tensor({N_img * M, 4});
// at::Tensor bbox_out = dbox.
scalar_
type().tensor({N_img * M, 4});
printf
(
"allocating %lu bytes for output labels
\n
"
,
N_img
*
M
*
sizeof
(
long
));
#endif
at
::
Tensor
labels_out
=
at
::
empty
({
N_img
*
M
},
labels_input
.
options
());
...
...
@@ -398,15 +398,15 @@ std::vector<at::Tensor> box_encoder(const int N_img,
// Encode the inputs
const
int
THREADS_PER_BLOCK
=
256
;
encode
<
THREADS_PER_BLOCK
,
256
><<<
N_img
,
THREADS_PER_BLOCK
,
0
,
stream
.
stream
()
>>>
(
N_img
,
(
float4
*
)
bbox_input
.
data
<
float
>
(),
labels_input
.
data
<
long
>
(),
bbox_offsets
.
data
<
int
>
(),
(
float4
*
)
bbox_input
.
data
_ptr
<
float
>
(),
labels_input
.
data
_ptr
<
long
>
(),
bbox_offsets
.
data
_ptr
<
int
>
(),
M
,
(
float4
*
)
dbox
.
data
<
float
>
(),
(
float4
*
)
dbox
.
data
_ptr
<
float
>
(),
criteria
,
workspace
.
data
<
uint8_t
>
(),
(
float4
*
)
bbox_out
.
data
<
float
>
(),
labels_out
.
data
<
long
>
());
workspace
.
data
_ptr
<
uint8_t
>
(),
(
float4
*
)
bbox_out
.
data
_ptr
<
float
>
(),
labels_out
.
data
_ptr
<
long
>
());
THCudaCheck
(
cudaGetLastError
());
return
{
bbox_out
,
labels_out
};
...
...
@@ -429,11 +429,11 @@ at::Tensor calc_ious(const int N_img,
// Get IoU of all source x default box pairs
calc_ious_kernel
<<<
N_img
,
256
,
0
,
stream
.
stream
()
>>>
(
N_img
,
(
float4
*
)
boxes1
.
data
<
float
>
(),
boxes1_offsets
.
data
<
int
>
(),
(
float4
*
)
boxes1
.
data
_ptr
<
float
>
(),
boxes1_offsets
.
data
_ptr
<
int
>
(),
M
,
(
float4
*
)
boxes2
.
data
<
float
>
(),
ious
.
data
<
float
>
());
(
float4
*
)
boxes2
.
data
_ptr
<
float
>
(),
ious
.
data
_ptr
<
float
>
());
THCudaCheck
(
cudaGetLastError
());
return
ious
;
...
...
@@ -543,9 +543,9 @@ std::vector<at::Tensor> random_horiz_flip(
W
=
img
.
size
(
3
);
}
assert
(
img
.
type
().
is_cuda
());
assert
(
bboxes
.
type
().
is_cuda
());
assert
(
bbox_offsets
.
type
().
is_cuda
());
assert
(
img
.
is_cuda
());
assert
(
bboxes
.
is_cuda
());
assert
(
bbox_offsets
.
is_cuda
());
// printf("%d %d %d %d\n", N, C, H, W);
// Need temp storage of size img
...
...
@@ -554,7 +554,7 @@ std::vector<at::Tensor> random_horiz_flip(
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
AT_DISPATCH_FLOATING_TYPES_AND_HALF
(
img
.
type
(),
img
.
scalar_
type
(),
"HorizFlipImagesAndBoxes"
,
[
&
]
{
HorizFlipImagesAndBoxes
<
scalar_t
><<<
N
,
dim3
(
16
,
16
),
0
,
stream
.
stream
()
>>>
(
...
...
@@ -562,12 +562,12 @@ std::vector<at::Tensor> random_horiz_flip(
C
,
H
,
W
,
img
.
data
<
scalar_t
>
(),
bboxes
.
data
<
float
>
(),
bbox_offsets
.
data
<
int
>
(),
img
.
data
_ptr
<
scalar_t
>
(),
bboxes
.
data
_ptr
<
float
>
(),
bbox_offsets
.
data
_ptr
<
int
>
(),
p
,
flip
.
data
<
float
>
(),
tmp_img
.
data
<
scalar_t
>
(),
flip
.
data
_ptr
<
float
>
(),
tmp_img
.
data
_ptr
<
scalar_t
>
(),
nhwc
);
THCudaCheck
(
cudaGetLastError
());
});
...
...
encoding/lib/gpu/nms_kernel.cu
View file @
fed540f2
...
...
@@ -71,8 +71,8 @@ std::vector<at::Tensor> Non_Max_Suppression_CUDA(
AT_ASSERT
(
input
.
size
(
2
)
==
4
);
AT_ASSERT
(
input
.
is_contiguous
());
AT_ASSERT
(
scores
.
is_contiguous
());
AT_ASSERT
(
input
.
type
().
scalar
T
ype
()
==
at
::
kFloat
||
input
.
type
().
scalar
T
ype
()
==
at
::
kDouble
);
AT_ASSERT
(
scores
.
type
().
scalar
T
ype
()
==
at
::
kFloat
||
scores
.
type
().
scalar
T
ype
()
==
at
::
kDouble
);
AT_ASSERT
(
input
.
scalar
_t
ype
()
==
at
::
kFloat
||
input
.
scalar
_t
ype
()
==
at
::
kDouble
);
AT_ASSERT
(
scores
.
scalar
_t
ype
()
==
at
::
kFloat
||
scores
.
scalar
_t
ype
()
==
at
::
kDouble
);
auto
num_boxes
=
input
.
size
(
1
);
auto
batch_size
=
input
.
size
(
0
);
...
...
@@ -89,12 +89,12 @@ std::vector<at::Tensor> Non_Max_Suppression_CUDA(
//cudaGetDeviceProperties in the funcion body...
dim3
mask_grid
(
batch_size
);
if
(
input
.
type
().
scalar
T
ype
()
==
at
::
kFloat
)
if
(
input
.
scalar
_t
ype
()
==
at
::
kFloat
)
{
nms_kernel
<<<
mask_grid
,
mask_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
mask
.
data
<
unsigned
char
>
(),
input
.
data
<
float
>
(),
sorted_inds
.
data
<
int64_t
>
(),
mask
.
data
_ptr
<
unsigned
char
>
(),
input
.
data
_ptr
<
float
>
(),
sorted_inds
.
data
_ptr
<
int64_t
>
(),
num_boxes
,
thresh
);
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
...
...
@@ -102,9 +102,9 @@ std::vector<at::Tensor> Non_Max_Suppression_CUDA(
else
{
nms_kernel
<<<
mask_grid
,
mask_block
,
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
mask
.
data
<
unsigned
char
>
(),
input
.
data
<
double
>
(),
sorted_inds
.
data
<
int64_t
>
(),
mask
.
data
_ptr
<
unsigned
char
>
(),
input
.
data
_ptr
<
double
>
(),
sorted_inds
.
data
_ptr
<
int64_t
>
(),
num_boxes
,
thresh
);
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
...
...
encoding/lib/gpu/roi_align_kernel.cu
View file @
fed540f2
...
...
@@ -379,7 +379,7 @@ at::Tensor ROIAlign_Forward_CUDA(
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
count
,
input
.
data
<
scalar_t
>
(),
input
.
data
_ptr
<
scalar_t
>
(),
static_cast
<
scalar_t
>
(
spatial_scale
),
channels
,
height
,
...
...
@@ -387,8 +387,8 @@ at::Tensor ROIAlign_Forward_CUDA(
pooled_height
,
pooled_width
,
sampling_ratio
,
rois
.
data
<
scalar_t
>
(),
output
.
data
<
scalar_t
>
());
rois
.
data
_ptr
<
scalar_t
>
(),
output
.
data
_ptr
<
scalar_t
>
());
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
output
;
...
...
@@ -426,7 +426,7 @@ at::Tensor ROIAlign_Backward_CUDA(
0
,
at
::
cuda
::
getCurrentCUDAStream
()
>>>
(
count
,
grad_output
.
data
<
scalar_t
>
(),
grad_output
.
data
_ptr
<
scalar_t
>
(),
num_rois
,
static_cast
<
scalar_t
>
(
spatial_scale
),
channels
,
...
...
@@ -435,8 +435,8 @@ at::Tensor ROIAlign_Backward_CUDA(
pooled_height
,
pooled_width
,
sampling_ratio
,
grad_in
.
data
<
scalar_t
>
(),
rois
.
data
<
scalar_t
>
());
grad_in
.
data
_ptr
<
scalar_t
>
(),
rois
.
data
_ptr
<
scalar_t
>
());
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment