Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
Pytorch-Encoding
Commits
ce461dae
Unverified
Commit
ce461dae
authored
Dec 17, 2018
by
Hang Zhang
Committed by
GitHub
Dec 17, 2018
Browse files
V1.0.0 (#156)
* v1.0
parent
c2cb2aab
Changes
73
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
804 additions
and
146 deletions
+804
-146
encoding/lib/cpu/nms_cpu.cpp
encoding/lib/cpu/nms_cpu.cpp
+1
-1
encoding/lib/cpu/roi_align.cpp
encoding/lib/cpu/roi_align.cpp
+0
-28
encoding/lib/cpu/roi_align_cpu.cpp
encoding/lib/cpu/roi_align_cpu.cpp
+1
-1
encoding/lib/cpu/syncbn_cpu.cpp
encoding/lib/cpu/syncbn_cpu.cpp
+1
-1
encoding/lib/gpu/__init__.py
encoding/lib/gpu/__init__.py
+0
-0
encoding/lib/gpu/activation_kernel.cu
encoding/lib/gpu/activation_kernel.cu
+45
-0
encoding/lib/gpu/encoding_kernel.cu
encoding/lib/gpu/encoding_kernel.cu
+1
-1
encoding/lib/gpu/encodingv2_kernel.cu
encoding/lib/gpu/encodingv2_kernel.cu
+1
-1
encoding/lib/gpu/nms_kernel.cu
encoding/lib/gpu/nms_kernel.cu
+1
-1
encoding/lib/gpu/operator.cpp
encoding/lib/gpu/operator.cpp
+8
-2
encoding/lib/gpu/operator.h
encoding/lib/gpu/operator.h
+57
-24
encoding/lib/gpu/roi_align_kernel.cu
encoding/lib/gpu/roi_align_kernel.cu
+1
-1
encoding/lib/gpu/setup.py
encoding/lib/gpu/setup.py
+1
-0
encoding/lib/gpu/syncbn_kernel.cu
encoding/lib/gpu/syncbn_kernel.cu
+260
-61
encoding/models/__init__.py
encoding/models/__init__.py
+6
-0
encoding/models/base.py
encoding/models/base.py
+36
-11
encoding/models/cifarresnet.py
encoding/models/cifarresnet.py
+139
-0
encoding/models/deeplab.py
encoding/models/deeplab.py
+140
-0
encoding/models/deepten.py
encoding/models/deepten.py
+97
-0
encoding/models/encnet.py
encoding/models/encnet.py
+8
-13
No files found.
encoding/lib/cpu/nms_cpu.cpp
View file @
ce461dae
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include <ATen/NativeFunctions.h>
...
...
encoding/lib/cpu/roi_align.cpp
deleted
100644 → 0
View file @
c2cb2aab
#include <torch/torch.h>
// CPU declarations
at
::
Tensor
ROIAlignForwardCPU
(
const
at
::
Tensor
&
input
,
const
at
::
Tensor
&
bottom_rois
,
int64_t
pooled_height
,
int64_t
pooled_width
,
double
spatial_scale
,
int64_t
sampling_ratio
);
at
::
Tensor
ROIAlignBackwardCPU
(
const
at
::
Tensor
&
bottom_rois
,
const
at
::
Tensor
&
grad_output
,
// gradient of the output of the layer
int64_t
b_size
,
int64_t
channels
,
int64_t
height
,
int64_t
width
,
int64_t
pooled_height
,
int64_t
pooled_width
,
double
spatial_scale
,
int64_t
sampling_ratio
);
PYBIND11_MODULE
(
TORCH_EXTENSION_NAME
,
m
)
{
m
.
def
(
"roi_align_forward"
,
&
ROIAlignForwardCPU
,
"ROI Align forward (CPU)"
);
m
.
def
(
"roi_align_backward"
,
&
ROIAlignBackwardCPU
,
"ROI Align backward (CPU)"
);
}
encoding/lib/cpu/roi_align_cpu.cpp
View file @
ce461dae
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
//#include <omp.h>
...
...
encoding/lib/cpu/syncbn_cpu.cpp
View file @
ce461dae
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include <vector>
...
...
encoding/lib/gpu/__init__.py
deleted
100644 → 0
View file @
c2cb2aab
encoding/lib/gpu/activation_kernel.cu
0 → 100644
View file @
ce461dae
#include <vector>
#include <torch/extension.h>
#include <ATen/ATen.h>
// #include <ATen/cuda/CUDAContext.h>
#include <cuda_runtime_api.h>
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
namespace
{
template
<
typename
T
>
inline
void
leaky_relu_backward_impl
(
T
*
z
,
T
*
dz
,
float
slope
,
int64_t
count
)
{
// Create thrust pointers
thrust
::
device_ptr
<
T
>
th_z
=
thrust
::
device_pointer_cast
(
z
);
thrust
::
device_ptr
<
T
>
th_dz
=
thrust
::
device_pointer_cast
(
dz
);
thrust
::
transform_if
(
th_dz
,
th_dz
+
count
,
th_z
,
th_dz
,
[
slope
]
__device__
(
const
T
&
dz
)
{
return
dz
*
slope
;
},
[]
__device__
(
const
T
&
z
)
{
return
z
<
0
;
});
thrust
::
transform_if
(
th_z
,
th_z
+
count
,
th_z
,
[
slope
]
__device__
(
const
T
&
z
)
{
return
z
/
slope
;
},
[]
__device__
(
const
T
&
z
)
{
return
z
<
0
;
});
}
}
void
LeakyRelu_Forward_CUDA
(
at
::
Tensor
z
,
float
slope
)
{
at
::
leaky_relu_
(
z
,
slope
);
}
void
LeakyRelu_Backward_CUDA
(
at
::
Tensor
z
,
at
::
Tensor
dz
,
float
slope
)
{
int64_t
count
=
z
.
numel
();
AT_DISPATCH_FLOATING_TYPES
(
z
.
type
(),
"LeakyRelu_Backward_CUDA"
,
([
&
]
{
leaky_relu_backward_impl
<
scalar_t
>
(
z
.
data
<
scalar_t
>
(),
dz
.
data
<
scalar_t
>
(),
slope
,
count
);
}));
/*
// unstable after scaling
at::leaky_relu_(z, 1.0 / slope);
at::leaky_relu_backward(dz, z, slope);
*/
}
encoding/lib/gpu/encoding_kernel.cu
View file @
ce461dae
#include <vector>
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
...
...
encoding/lib/gpu/encodingv2_kernel.cu
View file @
ce461dae
#include <vector>
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include <ATen/Functions.h>
#include <ATen/cuda/CUDAContext.h>
...
...
encoding/lib/gpu/nms_kernel.cu
View file @
ce461dae
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include "ATen/NativeFunctions.h"
#include <ATen/cuda/CUDAContext.h>
...
...
encoding/lib/gpu/operator.cpp
View file @
ce461dae
...
...
@@ -9,9 +9,13 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
m
.
def
(
"scaled_l2_forward"
,
&
ScaledL2_Forward_CUDA
,
"ScaledL2 forward (CUDA)"
);
m
.
def
(
"scaled_l2_backward"
,
&
ScaledL2_Backward_CUDA
,
"ScaledL2 backward (CUDA)"
);
m
.
def
(
"batchnorm_forward"
,
&
BatchNorm_Forward_CUDA
,
"BatchNorm forward (CUDA)"
);
m
.
def
(
"batchnorm_inp_forward"
,
&
BatchNorm_Forward_Inp_CUDA
,
"BatchNorm forward (CUDA)"
);
m
.
def
(
"batchnorm_backward"
,
&
BatchNorm_Backward_CUDA
,
"BatchNorm backward (CUDA)"
);
m
.
def
(
"sumsquare_forward"
,
&
Sum_Square_Forward_CUDA
,
"SumSqu forward (CUDA)"
);
m
.
def
(
"sumsquare_backward"
,
&
Sum_Square_Backward_CUDA
,
"SumSqu backward (CUDA)"
);
m
.
def
(
"batchnorm_inp_backward"
,
&
BatchNorm_Inp_Backward_CUDA
,
"BatchNorm backward (CUDA)"
);
m
.
def
(
"expectation_forward"
,
&
Expectation_Forward_CUDA
,
"Expectation forward (CUDA)"
);
m
.
def
(
"expectation_backward"
,
&
Expectation_Backward_CUDA
,
"Expectation backward (CUDA)"
);
m
.
def
(
"expectation_inp_backward"
,
&
Expectation_Inp_Backward_CUDA
,
"Inplace Expectation backward (CUDA)"
);
m
.
def
(
"encoding_dist_forward"
,
&
Encoding_Dist_Forward_CUDA
,
"EncDist forward (CUDA)"
);
m
.
def
(
"encoding_dist_backward"
,
&
Encoding_Dist_Backward_CUDA
,
"Assign backward (CUDA)"
);
m
.
def
(
"encoding_dist_inference_forward"
,
&
Encoding_Dist_Inference_Forward_CUDA
,
...
...
@@ -20,4 +24,6 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
"Assign Inference backward (CUDA)"
);
m
.
def
(
"aggregatev2_forward"
,
&
AggregateV2_Forward_CUDA
,
"AggregateV2 forward (CUDA)"
);
m
.
def
(
"aggregatev2_backward"
,
&
AggregateV2_Backward_CUDA
,
"AggregateV2 backward (CUDA)"
);
m
.
def
(
"leaky_relu_forward"
,
&
LeakyRelu_Forward_CUDA
,
"Learky ReLU forward (CUDA)"
);
m
.
def
(
"leaky_relu_backward"
,
&
LeakyRelu_Backward_CUDA
,
"Learky ReLU backward (CUDA)"
);
}
encoding/lib/gpu/operator.h
View file @
ce461dae
#include <torch/
torch
.h>
#include <torch/
extension
.h>
#include <vector>
at
::
Tensor
ROIAlign_Forward_CUDA
(
...
...
@@ -54,24 +54,53 @@ at::Tensor BatchNorm_Forward_CUDA(
const
at
::
Tensor
mean_
,
const
at
::
Tensor
std_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
);
const
at
::
Tensor
beta_
,
float
eps
);
at
::
Tensor
BatchNorm_Forward_Inp_CUDA
(
const
at
::
Tensor
input_
,
const
at
::
Tensor
ex_
,
const
at
::
Tensor
exs_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
float
eps
);
std
::
vector
<
at
::
Tensor
>
BatchNorm_Backward_CUDA
(
const
at
::
Tensor
gradoutput_
,
const
at
::
Tensor
input_
,
const
at
::
Tensor
mean
_
,
const
at
::
Tensor
std
_
,
const
at
::
Tensor
ex
_
,
const
at
::
Tensor
exs
_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
bool
train
);
float
eps
);
std
::
vector
<
at
::
Tensor
>
Sum_Square_Forward_CUDA
(
std
::
vector
<
at
::
Tensor
>
BatchNorm_Inp_Backward_CUDA
(
const
at
::
Tensor
gradoutput_
,
const
at
::
Tensor
output_
,
const
at
::
Tensor
ex_
,
const
at
::
Tensor
exs_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
float
eps
);
std
::
vector
<
at
::
Tensor
>
Expectation_Forward_CUDA
(
const
at
::
Tensor
input_
);
at
::
Tensor
Sum_Square
_Backward_CUDA
(
at
::
Tensor
Expectation
_Backward_CUDA
(
const
at
::
Tensor
input_
,
const
at
::
Tensor
gradSum_
,
const
at
::
Tensor
gradSquare_
);
const
at
::
Tensor
gradEx_
,
const
at
::
Tensor
gradExs_
);
at
::
Tensor
Expectation_Inp_Backward_CUDA
(
const
at
::
Tensor
gradInput_
,
const
at
::
Tensor
output_
,
const
at
::
Tensor
gradEx_
,
const
at
::
Tensor
gradExs_
,
const
at
::
Tensor
ex_
,
const
at
::
Tensor
exs_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
float
eps
);
at
::
Tensor
Encoding_Dist_Inference_Forward_CUDA
(
const
at
::
Tensor
X_
,
...
...
@@ -111,3 +140,7 @@ std::vector<at::Tensor> AggregateV2_Backward_CUDA(
const
at
::
Tensor
X_
,
const
at
::
Tensor
C_
,
const
at
::
Tensor
STD_
);
void
LeakyRelu_Forward_CUDA
(
at
::
Tensor
z
,
float
slope
);
void
LeakyRelu_Backward_CUDA
(
at
::
Tensor
z
,
at
::
Tensor
dz
,
float
slope
);
encoding/lib/gpu/roi_align_kernel.cu
View file @
ce461dae
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
...
...
encoding/lib/gpu/setup.py
View file @
ce461dae
...
...
@@ -6,6 +6,7 @@ setup(
ext_modules
=
[
CUDAExtension
(
'enclib_gpu'
,
[
'operator.cpp'
,
'activation_kernel.cu'
,
'encoding_kernel.cu'
,
'encodingv2_kernel.cu'
,
'syncbn_kernel.cu'
,
...
...
encoding/lib/gpu/syncbn_kernel.cu
View file @
ce461dae
#include <vector>
#include <torch/tens
or
.h>
#include <torch/
ex
tens
ion
.h>
#include <ATen/ATen.h>
#include <ATen/cuda/CUDAContext.h>
...
...
@@ -11,14 +11,14 @@ namespace {
template
<
typename
DType
,
typename
Acctype
,
typename
DeviceTensor3
>
struct
GradOp
{
__device__
GradOp
(
Acctype
m
,
const
DeviceTensor3
i
,
const
DeviceTensor3
g
)
:
mean
(
m
),
in
put
(
i
),
gradOutput
(
g
)
{}
:
beta
(
m
),
out
put
(
i
),
gradOutput
(
g
)
{}
__device__
__forceinline__
Float2
<
DType
,
Acctype
>
operator
()(
int
batch
,
int
plane
,
int
n
)
{
DType
g
=
gradOutput
[
batch
][
plane
][
n
];
DType
c
=
ScalarConvert
<
Acctype
,
DType
>::
to
(
in
put
[
batch
][
plane
][
n
]
-
mean
);
DType
c
=
ScalarConvert
<
Acctype
,
DType
>::
to
(
out
put
[
batch
][
plane
][
n
]
-
beta
);
return
Float2
<
DType
,
Acctype
>
(
g
,
g
*
c
);
}
const
Acctype
mean
;
const
DeviceTensor3
in
put
;
const
Acctype
beta
;
const
DeviceTensor3
out
put
;
const
DeviceTensor3
gradOutput
;
};
...
...
@@ -88,6 +88,72 @@ __global__ void BatchNorm_Forward_kernel (
}
}
template
<
typename
DType
>
__global__
void
BatchNorm_Forward_Inp_kernel
(
DeviceTensor
<
DType
,
3
>
input
,
DeviceTensor
<
DType
,
1
>
mean
,
DeviceTensor
<
DType
,
1
>
std
,
DeviceTensor
<
DType
,
1
>
gamma
,
DeviceTensor
<
DType
,
1
>
beta
)
{
int
c
=
blockIdx
.
x
;
/* main operation */
for
(
int
b
=
0
;
b
<
input
.
getSize
(
0
);
++
b
)
{
for
(
int
x
=
threadIdx
.
x
;
x
<
input
.
getSize
(
2
);
x
+=
blockDim
.
x
)
{
DType
inp
=
input
[
b
][
c
][
x
];
input
[
b
][
c
][
x
]
=
gamma
[
c
]
*
(
inp
-
mean
[
c
])
/
std
[
c
]
+
beta
[
c
];
}
}
}
template
<
typename
DType
>
__global__
void
BatchNorm_Backward_Inp_kernel
(
DeviceTensor
<
DType
,
3
>
gradoutput
,
DeviceTensor
<
DType
,
3
>
output
,
DeviceTensor
<
DType
,
3
>
gradinput
,
DeviceTensor
<
DType
,
1
>
gradgamma
,
DeviceTensor
<
DType
,
1
>
gradbeta
,
DeviceTensor
<
DType
,
1
>
mean
,
DeviceTensor
<
DType
,
1
>
std
,
DeviceTensor
<
DType
,
1
>
gamma
,
DeviceTensor
<
DType
,
1
>
beta
,
DeviceTensor
<
DType
,
1
>
gradEx
,
DeviceTensor
<
DType
,
1
>
gradExs
)
{
/* declarations of the variables */
/* Get the index and channels */
int
c
=
blockIdx
.
x
;
/* main operation */
GradOp
<
DType
,
DType
,
DeviceTensor
<
DType
,
3
>>
g
(
beta
[
c
],
output
,
gradoutput
);
Float2
<
DType
,
DType
>
res
=
reduce
<
Float2
<
DType
,
DType
>
,
GradOp
<
DType
,
DType
,
DeviceTensor
<
DType
,
3
>>
,
DeviceTensor
<
DType
,
3
>>
(
g
,
gradoutput
,
c
);
DType
gradOutputSum
=
res
.
v1
;
DType
dotP
=
res
.
v2
;
DType
invstd
=
DType
(
1.0
)
/
std
[
c
];
DType
gradScale
=
invstd
*
gamma
[
c
];
if
(
threadIdx
.
x
==
0
)
{
gradEx
[
c
]
=
-
gradOutputSum
*
gradScale
+
mean
[
c
]
*
invstd
*
invstd
*
dotP
;
gradExs
[
c
]
=
-
0.5
*
invstd
*
invstd
*
dotP
;
}
if
(
gradinput
.
numElements
()
>
0
)
{
for
(
int
batch
=
0
;
batch
<
gradoutput
.
getSize
(
0
);
++
batch
)
{
for
(
int
x
=
threadIdx
.
x
;
x
<
gradoutput
.
getSize
(
2
);
x
+=
blockDim
.
x
)
{
gradinput
[
batch
][
c
][
x
]
=
gradoutput
[
batch
][
c
][
x
]
*
gradScale
;
}
}
}
if
(
gradgamma
.
numElements
()
>
0
)
{
if
(
threadIdx
.
x
==
0
)
{
gradgamma
[
c
]
+=
dotP
/
gamma
[
c
];
}
}
if
(
gradbeta
.
numElements
()
>
0
)
{
if
(
threadIdx
.
x
==
0
)
{
gradbeta
[
c
]
+=
gradOutputSum
;
}
}
}
template
<
typename
DType
>
__global__
void
BatchNorm_Backward_kernel
(
DeviceTensor
<
DType
,
3
>
gradoutput
,
...
...
@@ -99,9 +165,8 @@ __global__ void BatchNorm_Backward_kernel (
DeviceTensor
<
DType
,
1
>
std
,
DeviceTensor
<
DType
,
1
>
gamma
,
DeviceTensor
<
DType
,
1
>
beta
,
DeviceTensor
<
DType
,
1
>
gradMean
,
DeviceTensor
<
DType
,
1
>
gradStd
,
bool
train
)
{
DeviceTensor
<
DType
,
1
>
gradEx
,
DeviceTensor
<
DType
,
1
>
gradExs
)
{
/* declarations of the variables */
/* Get the index and channels */
int
c
=
blockIdx
.
x
;
...
...
@@ -114,9 +179,9 @@ __global__ void BatchNorm_Backward_kernel (
DType
dotP
=
res
.
v2
;
DType
invstd
=
DType
(
1.0
)
/
std
[
c
];
DType
gradScale
=
invstd
*
gamma
[
c
];
if
(
train
&&
threadIdx
.
x
==
0
)
{
grad
Mean
[
c
]
=
-
gradOutputSum
*
g
amma
[
c
]
*
invstd
;
grad
Std
[
c
]
=
-
dotP
*
gamma
[
c
]
*
invstd
*
invstd
;
if
(
threadIdx
.
x
==
0
)
{
grad
Ex
[
c
]
=
-
gradOutputSum
*
g
radScale
+
mean
[
c
]
*
invstd
*
invstd
*
dotP
*
gradScale
;
grad
Exs
[
c
]
=
-
0.5
*
invstd
*
invstd
*
dotP
*
gradScale
;
}
if
(
gradinput
.
numElements
()
>
0
)
{
for
(
int
batch
=
0
;
batch
<
gradoutput
.
getSize
(
0
);
++
batch
)
{
...
...
@@ -139,10 +204,11 @@ __global__ void BatchNorm_Backward_kernel (
template
<
typename
DType
>
__global__
void
Sum_Square
_Forward_kernel
(
__global__
void
Expectation
_Forward_kernel
(
DeviceTensor
<
DType
,
3
>
input
,
DeviceTensor
<
DType
,
1
>
sum
,
DeviceTensor
<
DType
,
1
>
square
)
{
DeviceTensor
<
DType
,
1
>
ex
,
DeviceTensor
<
DType
,
1
>
exs
,
DType
norm
)
{
int
c
=
blockIdx
.
x
;
/* main operation */
SumOp
<
DType
,
DType
>
g
(
input
);
...
...
@@ -151,37 +217,60 @@ __global__ void Sum_Square_Forward_kernel (
DType
xsum
=
res
.
v1
;
DType
xsquare
=
res
.
v2
;
if
(
threadIdx
.
x
==
0
)
{
sum
[
c
]
=
xsum
;
square
[
c
]
=
xsquare
;
ex
[
c
]
=
xsum
*
norm
;
exs
[
c
]
=
xsquare
*
norm
;
}
}
template
<
typename
DType
>
__global__
void
Sum_Square
_Backward_kernel
(
__global__
void
Expectation
_Backward_kernel
(
DeviceTensor
<
DType
,
3
>
gradInput
,
DeviceTensor
<
DType
,
3
>
input
,
DeviceTensor
<
DType
,
1
>
gradSum
,
DeviceTensor
<
DType
,
1
>
gradSquare
)
{
DeviceTensor
<
DType
,
1
>
gradEx
,
DeviceTensor
<
DType
,
1
>
gradExs
,
DType
norm
)
{
int
c
=
blockIdx
.
x
;
/* main operation */
for
(
int
batch
=
0
;
batch
<
gradInput
.
getSize
(
0
);
++
batch
)
{
for
(
int
x
=
threadIdx
.
x
;
x
<
gradInput
.
getSize
(
2
);
x
+=
blockDim
.
x
)
{
gradInput
[
batch
][
c
][
x
]
=
gradSum
[
c
]
+
2
*
gradSquare
[
c
]
*
input
[
batch
][
c
][
x
];
for
(
int
x
=
threadIdx
.
x
;
x
<
gradInput
.
getSize
(
2
);
x
+=
blockDim
.
x
)
{
gradInput
[
batch
][
c
][
x
]
=
gradEx
[
c
]
*
norm
+
2
*
gradExs
[
c
]
*
input
[
batch
][
c
][
x
]
*
norm
;
}
}
}
}
// namespcae
template
<
typename
DType
>
__global__
void
Expectation_Backward_Inp_kernel
(
DeviceTensor
<
DType
,
3
>
gradInput
,
DeviceTensor
<
DType
,
3
>
output
,
DeviceTensor
<
DType
,
1
>
gradEx
,
DeviceTensor
<
DType
,
1
>
gradExs
,
DeviceTensor
<
DType
,
1
>
mean
,
DeviceTensor
<
DType
,
1
>
std
,
DeviceTensor
<
DType
,
1
>
gamma
,
DeviceTensor
<
DType
,
1
>
beta
,
DType
norm
)
{
int
c
=
blockIdx
.
x
;
/* main operation */
for
(
int
batch
=
0
;
batch
<
gradInput
.
getSize
(
0
);
++
batch
)
{
for
(
int
x
=
threadIdx
.
x
;
x
<
gradInput
.
getSize
(
2
);
x
+=
blockDim
.
x
)
{
gradInput
[
batch
][
c
][
x
]
+=
gradEx
[
c
]
*
norm
+
2
*
gradExs
[
c
]
*
((
output
[
batch
][
c
][
x
]
-
beta
[
c
])
/
gamma
[
c
]
*
std
[
c
]
+
mean
[
c
])
*
norm
;
}
}
}
}
// namespace
at
::
Tensor
BatchNorm_Forward_CUDA
(
const
at
::
Tensor
input_
,
const
at
::
Tensor
mean
_
,
const
at
::
Tensor
std
_
,
const
at
::
Tensor
ex
_
,
const
at
::
Tensor
exs
_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
)
{
const
at
::
Tensor
beta_
,
float
eps
)
{
auto
output_
=
at
::
zeros_like
(
input_
);
auto
std_
=
(
exs_
-
ex_
*
ex_
+
eps
).
sqrt
();
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
dim3
blocks
(
input_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
input_
.
size
(
2
)));
...
...
@@ -189,85 +278,157 @@ at::Tensor BatchNorm_Forward_CUDA(
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
output
=
devicetensor
<
scalar_t
,
3
>
(
output_
);
DeviceTensor
<
scalar_t
,
3
>
input
=
devicetensor
<
scalar_t
,
3
>
(
input_
);
DeviceTensor
<
scalar_t
,
1
>
mean
=
devicetensor
<
scalar_t
,
1
>
(
mean
_
);
DeviceTensor
<
scalar_t
,
1
>
ex
=
devicetensor
<
scalar_t
,
1
>
(
ex
_
);
DeviceTensor
<
scalar_t
,
1
>
std
=
devicetensor
<
scalar_t
,
1
>
(
std_
);
DeviceTensor
<
scalar_t
,
1
>
gamma
=
devicetensor
<
scalar_t
,
1
>
(
gamma_
);
DeviceTensor
<
scalar_t
,
1
>
beta
=
devicetensor
<
scalar_t
,
1
>
(
beta_
);
/* kernel function */
BatchNorm_Forward_kernel
<
scalar_t
><<<
blocks
,
threads
,
0
,
stream
>>>
(
output
,
input
,
mean
,
std
,
gamma
,
beta
);
output
,
input
,
ex
,
std
,
gamma
,
beta
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
output_
;
}
at
::
Tensor
BatchNorm_Forward_Inp_CUDA
(
const
at
::
Tensor
input_
,
const
at
::
Tensor
ex_
,
const
at
::
Tensor
exs_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
float
eps
)
{
auto
std_
=
(
exs_
-
ex_
*
ex_
+
eps
).
sqrt
();
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
dim3
blocks
(
input_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
input_
.
size
(
2
)));
AT_DISPATCH_FLOATING_TYPES
(
input_
.
type
(),
"BatchNorm_Forward_CUDA"
,
([
&
]
{
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
input
=
devicetensor
<
scalar_t
,
3
>
(
input_
);
DeviceTensor
<
scalar_t
,
1
>
ex
=
devicetensor
<
scalar_t
,
1
>
(
ex_
);
DeviceTensor
<
scalar_t
,
1
>
std
=
devicetensor
<
scalar_t
,
1
>
(
std_
);
DeviceTensor
<
scalar_t
,
1
>
gamma
=
devicetensor
<
scalar_t
,
1
>
(
gamma_
);
DeviceTensor
<
scalar_t
,
1
>
beta
=
devicetensor
<
scalar_t
,
1
>
(
beta_
);
/* kernel function */
BatchNorm_Forward_Inp_kernel
<
scalar_t
><<<
blocks
,
threads
,
0
,
stream
>>>
(
input
,
ex
,
std
,
gamma
,
beta
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
input_
;
}
std
::
vector
<
at
::
Tensor
>
BatchNorm_Inp_Backward_CUDA
(
const
at
::
Tensor
gradoutput_
,
const
at
::
Tensor
output_
,
const
at
::
Tensor
ex_
,
const
at
::
Tensor
exs_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
float
eps
)
{
/* outputs*/
auto
std_
=
(
exs_
-
ex_
*
ex_
+
eps
).
sqrt
();
auto
gradinput_
=
at
::
zeros_like
(
output_
);
auto
gradgamma_
=
at
::
zeros_like
(
gamma_
);
auto
gradbeta_
=
at
::
zeros_like
(
beta_
);
auto
gradEx_
=
at
::
zeros_like
(
ex_
);
auto
gradExs_
=
at
::
zeros_like
(
std_
);
/* cuda utils*/
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
dim3
blocks
(
output_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
output_
.
size
(
2
)));
AT_DISPATCH_FLOATING_TYPES
(
output_
.
type
(),
"BatchNorm_Inp_Backward_CUDA"
,
([
&
]
{
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
gradoutput
=
devicetensor
<
scalar_t
,
3
>
(
gradoutput_
);
DeviceTensor
<
scalar_t
,
3
>
output
=
devicetensor
<
scalar_t
,
3
>
(
output_
);
DeviceTensor
<
scalar_t
,
3
>
gradinput
=
devicetensor
<
scalar_t
,
3
>
(
gradinput_
);
DeviceTensor
<
scalar_t
,
1
>
gradgamma
=
devicetensor
<
scalar_t
,
1
>
(
gradgamma_
);
DeviceTensor
<
scalar_t
,
1
>
gradbeta
=
devicetensor
<
scalar_t
,
1
>
(
gradbeta_
);
DeviceTensor
<
scalar_t
,
1
>
ex
=
devicetensor
<
scalar_t
,
1
>
(
ex_
);
DeviceTensor
<
scalar_t
,
1
>
std
=
devicetensor
<
scalar_t
,
1
>
(
std_
);
DeviceTensor
<
scalar_t
,
1
>
gamma
=
devicetensor
<
scalar_t
,
1
>
(
gamma_
);
DeviceTensor
<
scalar_t
,
1
>
beta
=
devicetensor
<
scalar_t
,
1
>
(
beta_
);
DeviceTensor
<
scalar_t
,
1
>
gradEx
=
devicetensor
<
scalar_t
,
1
>
(
gradEx_
);
DeviceTensor
<
scalar_t
,
1
>
gradExs
=
devicetensor
<
scalar_t
,
1
>
(
gradExs_
);
/* kernel function */
BatchNorm_Backward_Inp_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
gradoutput
,
output
,
gradinput
,
gradgamma
,
gradbeta
,
ex
,
std
,
gamma
,
beta
,
gradEx
,
gradExs
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
{
gradinput_
,
gradEx_
,
gradExs_
,
gradgamma_
,
gradbeta_
};
}
std
::
vector
<
at
::
Tensor
>
BatchNorm_Backward_CUDA
(
const
at
::
Tensor
gradoutput_
,
const
at
::
Tensor
input_
,
const
at
::
Tensor
mean
_
,
const
at
::
Tensor
std
_
,
const
at
::
Tensor
ex
_
,
const
at
::
Tensor
exs
_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
bool
train
)
{
float
eps
)
{
/* outputs*/
at
::
Tensor
gradinput_
=
at
::
zeros_like
(
input_
);
at
::
Tensor
gradgamma_
=
at
::
zeros_like
(
gamma_
);
at
::
Tensor
gradbeta_
=
at
::
zeros_like
(
beta_
);
at
::
Tensor
gradMean_
=
at
::
zeros_like
(
mean_
);
at
::
Tensor
gradStd_
=
at
::
zeros_like
(
std_
);
auto
std_
=
(
exs_
-
ex_
*
ex_
+
eps
).
sqrt
();
auto
gradinput_
=
at
::
zeros_like
(
input_
);
auto
gradgamma_
=
at
::
zeros_like
(
gamma_
);
auto
gradbeta_
=
at
::
zeros_like
(
beta_
);
auto
gradEx_
=
at
::
zeros_like
(
ex_
);
auto
gradExs_
=
at
::
zeros_like
(
std_
);
/* cuda utils*/
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
dim3
blocks
(
input_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
input_
.
size
(
2
)));
AT_DISPATCH_FLOATING_TYPES
(
input_
.
type
(),
"BatchNorm_Backward_CUDA"
,
([
&
]
{
AT_DISPATCH_FLOATING_TYPES
(
input_
.
type
(),
"BatchNorm_
Inp_
Backward_CUDA"
,
([
&
]
{
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
gradoutput
=
devicetensor
<
scalar_t
,
3
>
(
gradoutput_
);
DeviceTensor
<
scalar_t
,
3
>
input
=
devicetensor
<
scalar_t
,
3
>
(
input_
);
DeviceTensor
<
scalar_t
,
3
>
gradinput
=
devicetensor
<
scalar_t
,
3
>
(
gradinput_
);
DeviceTensor
<
scalar_t
,
1
>
gradgamma
=
devicetensor
<
scalar_t
,
1
>
(
gradgamma_
);
DeviceTensor
<
scalar_t
,
1
>
gradbeta
=
devicetensor
<
scalar_t
,
1
>
(
gradbeta_
);
DeviceTensor
<
scalar_t
,
1
>
mean
=
devicetensor
<
scalar_t
,
1
>
(
mean
_
);
DeviceTensor
<
scalar_t
,
1
>
ex
=
devicetensor
<
scalar_t
,
1
>
(
ex
_
);
DeviceTensor
<
scalar_t
,
1
>
std
=
devicetensor
<
scalar_t
,
1
>
(
std_
);
DeviceTensor
<
scalar_t
,
1
>
gamma
=
devicetensor
<
scalar_t
,
1
>
(
gamma_
);
DeviceTensor
<
scalar_t
,
1
>
beta
=
devicetensor
<
scalar_t
,
1
>
(
beta_
);
DeviceTensor
<
scalar_t
,
1
>
grad
Mean
=
devicetensor
<
scalar_t
,
1
>
(
grad
Mean
_
);
DeviceTensor
<
scalar_t
,
1
>
grad
Std
=
devicetensor
<
scalar_t
,
1
>
(
grad
Std
_
);
DeviceTensor
<
scalar_t
,
1
>
grad
Ex
=
devicetensor
<
scalar_t
,
1
>
(
grad
Ex
_
);
DeviceTensor
<
scalar_t
,
1
>
grad
Exs
=
devicetensor
<
scalar_t
,
1
>
(
grad
Exs
_
);
/* kernel function */
BatchNorm_Backward_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
gradoutput
,
input
,
gradinput
,
gradgamma
,
gradbeta
,
mean
,
std
,
gamma
,
beta
,
grad
Mean
,
grad
Std
,
train
);
gradoutput
,
input
,
gradinput
,
gradgamma
,
gradbeta
,
ex
,
std
,
gamma
,
beta
,
grad
Ex
,
grad
Exs
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
{
gradinput_
,
grad
Mean
_
,
grad
Std
_
,
gradgamma_
,
gradbeta_
};
return
{
gradinput_
,
grad
Ex
_
,
grad
Exs
_
,
gradgamma_
,
gradbeta_
};
}
std
::
vector
<
at
::
Tensor
>
Sum_Square
_Forward_CUDA
(
std
::
vector
<
at
::
Tensor
>
Expectation
_Forward_CUDA
(
const
at
::
Tensor
input_
)
{
/* outputs */
a
t
::
Tensor
sum
_
=
torch
::
zeros
({
input_
.
size
(
1
)},
input_
.
options
());
a
t
::
Tensor
square
_
=
torch
::
zeros
({
input_
.
size
(
1
)},
input_
.
options
());
a
uto
ex
_
=
torch
::
zeros
({
input_
.
size
(
1
)},
input_
.
options
());
a
uto
exs
_
=
torch
::
zeros
({
input_
.
size
(
1
)},
input_
.
options
());
/* cuda utils*/
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
dim3
blocks
(
input_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
input_
.
size
(
2
)));
AT_DISPATCH_FLOATING_TYPES
(
input_
.
type
(),
"SumSquare_forward_CUDA"
,
([
&
]
{
scalar_t
norm
=
scalar_t
(
1
)
/
(
input_
.
size
(
0
)
*
input_
.
size
(
2
));
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
input
=
devicetensor
<
scalar_t
,
3
>
(
input_
);
DeviceTensor
<
scalar_t
,
1
>
sum
=
devicetensor
<
scalar_t
,
1
>
(
sum
_
);
DeviceTensor
<
scalar_t
,
1
>
square
=
devicetensor
<
scalar_t
,
1
>
(
square
_
);
DeviceTensor
<
scalar_t
,
1
>
ex
=
devicetensor
<
scalar_t
,
1
>
(
ex
_
);
DeviceTensor
<
scalar_t
,
1
>
exs
=
devicetensor
<
scalar_t
,
1
>
(
exs
_
);
/* kernel function */
Sum_Square
_Forward_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
input
,
sum
,
square
);
Expectation
_Forward_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
input
,
ex
,
exs
,
norm
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
{
sum_
,
square
_
};
return
{
ex_
,
exs
_
};
}
at
::
Tensor
Sum_Square
_Backward_CUDA
(
at
::
Tensor
Expectation
_Backward_CUDA
(
const
at
::
Tensor
input_
,
const
at
::
Tensor
grad
Sum
_
,
const
at
::
Tensor
grad
Square
_
)
{
const
at
::
Tensor
grad
Ex
_
,
const
at
::
Tensor
grad
Exs
_
)
{
/* outputs */
at
::
Tensor
gradInput_
=
at
::
zeros_like
(
input_
);
/* cuda utils*/
...
...
@@ -275,14 +436,52 @@ at::Tensor Sum_Square_Backward_CUDA(
dim3
blocks
(
input_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
input_
.
size
(
2
)));
AT_DISPATCH_FLOATING_TYPES
(
input_
.
type
(),
"SumSquare_Backward_CUDA"
,
([
&
]
{
scalar_t
norm
=
scalar_t
(
1
)
/
(
input_
.
size
(
0
)
*
input_
.
size
(
2
));
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
gradInput
=
devicetensor
<
scalar_t
,
3
>
(
gradInput_
);
DeviceTensor
<
scalar_t
,
3
>
input
=
devicetensor
<
scalar_t
,
3
>
(
input_
);
DeviceTensor
<
scalar_t
,
1
>
gradSum
=
devicetensor
<
scalar_t
,
1
>
(
gradSum_
);
DeviceTensor
<
scalar_t
,
1
>
gradSquare
=
devicetensor
<
scalar_t
,
1
>
(
gradSquare_
);
DeviceTensor
<
scalar_t
,
1
>
gradEx
=
devicetensor
<
scalar_t
,
1
>
(
gradEx_
);
DeviceTensor
<
scalar_t
,
1
>
gradExs
=
devicetensor
<
scalar_t
,
1
>
(
gradExs_
);
/* kernel function */
Expectation_Backward_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
gradInput
,
input
,
gradEx
,
gradExs
,
norm
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
gradInput_
;
}
at
::
Tensor
Expectation_Inp_Backward_CUDA
(
const
at
::
Tensor
gradInput_
,
const
at
::
Tensor
output_
,
const
at
::
Tensor
gradEx_
,
const
at
::
Tensor
gradExs_
,
const
at
::
Tensor
ex_
,
const
at
::
Tensor
exs_
,
const
at
::
Tensor
gamma_
,
const
at
::
Tensor
beta_
,
float
eps
)
{
/* outputs */
//auto gradInput_ = at::zeros_like(output_);
auto
std_
=
(
exs_
-
ex_
*
ex_
+
eps
).
sqrt
();
/* cuda utils*/
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
();
dim3
blocks
(
output_
.
size
(
1
));
dim3
threads
(
getNumThreads
(
output_
.
size
(
2
)));
AT_DISPATCH_FLOATING_TYPES
(
output_
.
type
(),
"SumSquare_Backward_CUDA"
,
([
&
]
{
scalar_t
norm
=
scalar_t
(
1
)
/
(
output_
.
size
(
0
)
*
output_
.
size
(
2
));
/* Device tensors */
DeviceTensor
<
scalar_t
,
3
>
gradInput
=
devicetensor
<
scalar_t
,
3
>
(
gradInput_
);
DeviceTensor
<
scalar_t
,
3
>
input
=
devicetensor
<
scalar_t
,
3
>
(
output_
);
DeviceTensor
<
scalar_t
,
1
>
gradEx
=
devicetensor
<
scalar_t
,
1
>
(
gradEx_
);
DeviceTensor
<
scalar_t
,
1
>
gradExs
=
devicetensor
<
scalar_t
,
1
>
(
gradExs_
);
DeviceTensor
<
scalar_t
,
1
>
ex
=
devicetensor
<
scalar_t
,
1
>
(
ex_
);
DeviceTensor
<
scalar_t
,
1
>
std
=
devicetensor
<
scalar_t
,
1
>
(
std_
);
DeviceTensor
<
scalar_t
,
1
>
gamma
=
devicetensor
<
scalar_t
,
1
>
(
gamma_
);
DeviceTensor
<
scalar_t
,
1
>
beta
=
devicetensor
<
scalar_t
,
1
>
(
beta_
);
/* kernel function */
Sum_Square_Backward_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
gradInput
,
input
,
gradSum
,
gradSquare
);
Expectation_Backward_Inp_kernel
<
scalar_t
>
<<<
blocks
,
threads
,
0
,
stream
>>>
(
gradInput
,
input
,
gradEx
,
gradExs
,
ex
,
std
,
gamma
,
beta
,
norm
);
}));
AT_ASSERT
(
cudaGetLastError
()
==
cudaSuccess
);
return
gradInput_
;
...
...
encoding/models/__init__.py
View file @
ce461dae
from
.model_zoo
import
get_model
from
.model_store
import
get_model_file
from
.resnet
import
*
from
.cifarresnet
import
*
from
.base
import
*
from
.fcn
import
*
from
.psp
import
*
from
.encnet
import
*
from
.deeplab
import
*
def
get_segmentation_model
(
name
,
**
kwargs
):
from
.fcn
import
get_fcn
models
=
{
'fcn'
:
get_fcn
,
'psp'
:
get_psp
,
'atten'
:
get_atten
,
'encnet'
:
get_encnet
,
'encnetv2'
:
get_encnetv2
,
'deeplab'
:
get_deeplab
,
}
return
models
[
name
.
lower
()](
**
kwargs
)
encoding/models/base.py
View file @
ce461dae
...
...
@@ -10,12 +10,11 @@ import numpy as np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.nn.functional
import
upsample
from
torch.nn.parallel.data_parallel
import
DataParallel
from
torch.nn.parallel.parallel_apply
import
parallel_apply
from
torch.nn.parallel.scatter_gather
import
scatter
from
.
.
import
dilated
as
resnet
from
.
import
resnet
from
..utils
import
batch_pix_accuracy
,
batch_intersection_union
up_kwargs
=
{
'mode'
:
'bilinear'
,
'align_corners'
:
True
}
...
...
@@ -35,6 +34,7 @@ class BaseNet(nn.Module):
self
.
base_size
=
base_size
self
.
crop_size
=
crop_size
# copying modules from pretrained models
self
.
backbone
=
backbone
if
backbone
==
'resnet50'
:
self
.
pretrained
=
resnet
.
resnet50
(
pretrained
=
True
,
dilated
=
dilated
,
norm_layer
=
norm_layer
,
root
=
root
)
...
...
@@ -50,6 +50,20 @@ class BaseNet(nn.Module):
self
.
_up_kwargs
=
up_kwargs
def
base_forward
(
self
,
x
):
if
self
.
backbone
.
startswith
(
'wideresnet'
):
x
=
self
.
pretrained
.
mod1
(
x
)
x
=
self
.
pretrained
.
pool2
(
x
)
x
=
self
.
pretrained
.
mod2
(
x
)
x
=
self
.
pretrained
.
pool3
(
x
)
x
=
self
.
pretrained
.
mod3
(
x
)
x
=
self
.
pretrained
.
mod4
(
x
)
x
=
self
.
pretrained
.
mod5
(
x
)
c3
=
x
.
clone
()
x
=
self
.
pretrained
.
mod6
(
x
)
x
=
self
.
pretrained
.
mod7
(
x
)
x
=
self
.
pretrained
.
bn_out
(
x
)
return
None
,
None
,
c3
,
x
else
:
x
=
self
.
pretrained
.
conv1
(
x
)
x
=
self
.
pretrained
.
bn1
(
x
)
x
=
self
.
pretrained
.
relu
(
x
)
...
...
@@ -124,6 +138,17 @@ class MultiEvalModule(DataParallel):
width
=
long_size
height
=
int
(
1.0
*
h
*
long_size
/
w
+
0.5
)
short_size
=
height
"""
short_size = int(math.ceil(self.base_size * scale))
if h > w:
width = short_size
height = int(1.0 * h * short_size / w)
long_size = height
else:
height = short_size
width = int(1.0 * w * short_size / h)
long_size = width
"""
# resize image to current size
cur_img
=
resize_image
(
image
,
height
,
width
,
**
self
.
module
.
_up_kwargs
)
if
long_size
<=
crop_size
:
...
...
@@ -180,7 +205,7 @@ def module_inference(module, image, flip=True):
return
output
.
exp
()
def
resize_image
(
img
,
h
,
w
,
**
up_kwargs
):
return
F
.
upsampl
e
(
img
,
(
h
,
w
),
**
up_kwargs
)
return
F
.
interpolat
e
(
img
,
(
h
,
w
),
**
up_kwargs
)
def
pad_image
(
img
,
mean
,
std
,
crop_size
):
b
,
c
,
h
,
w
=
img
.
size
()
...
...
encoding/models/cifarresnet.py
0 → 100644
View file @
ce461dae
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import
torch
import
torch.nn
as
nn
from
torch.autograd
import
Variable
from
..nn
import
View
__all__
=
[
'cifar_resnet20'
]
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
):
return
nn
.
Conv2d
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)
class
Basicblock
(
nn
.
Module
):
""" Pre-activation residual block
Identity Mapping in Deep Residual Networks
ref https://arxiv.org/abs/1603.05027
"""
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
norm_layer
=
nn
.
BatchNorm2d
):
super
(
Basicblock
,
self
).
__init__
()
if
inplanes
!=
planes
or
stride
!=
1
:
self
.
downsample
=
True
self
.
residual_layer
=
nn
.
Conv2d
(
inplanes
,
planes
,
kernel_size
=
1
,
stride
=
stride
)
else
:
self
.
downsample
=
False
conv_block
=
[]
conv_block
+=
[
norm_layer
(
inplanes
),
nn
.
ReLU
(
inplace
=
True
),
conv3x3
(
inplanes
,
planes
,
stride
=
stride
),
norm_layer
(
planes
),
nn
.
ReLU
(
inplace
=
True
),
conv3x3
(
planes
,
planes
)]
self
.
conv_block
=
nn
.
Sequential
(
*
conv_block
)
def
forward
(
self
,
input
):
if
self
.
downsample
:
residual
=
self
.
residual_layer
(
input
)
else
:
residual
=
input
return
residual
+
self
.
conv_block
(
input
)
class
Bottleneck
(
nn
.
Module
):
""" Pre-activation residual block
Identity Mapping in Deep Residual Networks
ref https://arxiv.org/abs/1603.05027
"""
expansion
=
4
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
norm_layer
=
nn
.
BatchNorm2d
):
super
(
Bottleneck
,
self
).
__init__
()
if
inplanes
!=
planes
*
self
.
expansion
or
stride
!=
1
:
self
.
downsample
=
True
self
.
residual_layer
=
nn
.
Conv2d
(
inplanes
,
planes
*
self
.
expansion
,
kernel_size
=
1
,
stride
=
stride
)
else
:
self
.
downsample
=
False
conv_block
=
[]
conv_block
+=
[
norm_layer
(
inplanes
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
inplanes
,
planes
,
kernel_size
=
1
,
stride
=
1
,
bias
=
False
)]
conv_block
+=
[
norm_layer
(
planes
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
planes
,
planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias
=
False
)]
conv_block
+=
[
norm_layer
(
planes
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
Conv2d
(
planes
,
planes
*
self
.
expansion
,
kernel_size
=
1
,
stride
=
1
,
bias
=
False
)]
self
.
conv_block
=
nn
.
Sequential
(
*
conv_block
)
def
forward
(
self
,
x
):
if
self
.
downsample
:
residual
=
self
.
residual_layer
(
x
)
else
:
residual
=
x
return
residual
+
self
.
conv_block
(
x
)
class
CIFAR_ResNet
(
nn
.
Module
):
def
__init__
(
self
,
block
=
Basicblock
,
num_blocks
=
[
2
,
2
,
2
],
width_factor
=
1
,
num_classes
=
10
,
norm_layer
=
torch
.
nn
.
BatchNorm2d
):
super
(
CIFAR_ResNet
,
self
).
__init__
()
self
.
expansion
=
block
.
expansion
self
.
inplanes
=
int
(
width_factor
*
16
)
strides
=
[
1
,
2
,
2
]
model
=
[]
# Conv_1
model
+=
[
nn
.
Conv2d
(
3
,
self
.
inplanes
,
kernel_size
=
3
,
padding
=
1
),
norm_layer
(
self
.
inplanes
),
nn
.
ReLU
(
inplace
=
True
)]
# Residual units
model
+=
[
self
.
_residual_unit
(
block
,
self
.
inplanes
,
num_blocks
[
0
],
strides
[
0
],
norm_layer
=
norm_layer
)]
for
i
in
range
(
2
):
model
+=
[
self
.
_residual_unit
(
block
,
int
(
2
*
self
.
inplanes
/
self
.
expansion
),
num_blocks
[
i
+
1
],
strides
[
i
+
1
],
norm_layer
=
norm_layer
)]
# Last conv layer
model
+=
[
norm_layer
(
self
.
inplanes
),
nn
.
ReLU
(
inplace
=
True
),
nn
.
AvgPool2d
(
8
),
View
(
-
1
,
self
.
inplanes
),
nn
.
Linear
(
self
.
inplanes
,
num_classes
)]
self
.
model
=
nn
.
Sequential
(
*
model
)
def
_residual_unit
(
self
,
block
,
planes
,
n_blocks
,
stride
,
norm_layer
):
strides
=
[
stride
]
+
[
1
]
*
(
n_blocks
-
1
)
layers
=
[]
for
i
in
range
(
n_blocks
):
layers
+=
[
block
(
self
.
inplanes
,
planes
,
strides
[
i
],
norm_layer
=
norm_layer
)]
self
.
inplanes
=
self
.
expansion
*
planes
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
input
):
return
self
.
model
(
input
)
def
cifar_resnet20
(
pretrained
=
False
,
root
=
'~/.encoding/models'
,
**
kwargs
):
"""Constructs a CIFAR ResNet-18 model.
Args:
pretrained (bool): If True, returns a model pre-trained on ImageNet
"""
model
=
CIFAR_ResNet
(
Bottleneck
,
[
3
,
3
,
3
],
**
kwargs
)
if
pretrained
:
model
.
load_state_dict
(
torch
.
load
(
get_model_file
(
'cifar_resnet20'
,
root
=
root
)),
strict
=
False
)
return
model
encoding/models/deeplab.py
0 → 100644
View file @
ce461dae
###########################################################################
# Created by: Hang Zhang
# Email: zhang.hang@rutgers.edu
# Copyright (c) 2017
###########################################################################
from
__future__
import
division
import
os
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
torch.nn.functional
import
interpolate
from
.base
import
BaseNet
from
.fcn
import
FCNHead
class
DeepLabV3
(
BaseNet
):
def
__init__
(
self
,
nclass
,
backbone
,
aux
=
True
,
se_loss
=
False
,
norm_layer
=
nn
.
BatchNorm2d
,
**
kwargs
):
super
(
DeepLabV3
,
self
).
__init__
(
nclass
,
backbone
,
aux
,
se_loss
,
norm_layer
=
norm_layer
,
**
kwargs
)
self
.
head
=
DeepLabV3Head
(
2048
,
nclass
,
norm_layer
,
self
.
_up_kwargs
)
if
aux
:
self
.
auxlayer
=
FCNHead
(
1024
,
nclass
,
norm_layer
)
def
forward
(
self
,
x
):
_
,
_
,
h
,
w
=
x
.
size
()
_
,
_
,
c3
,
c4
=
self
.
base_forward
(
x
)
outputs
=
[]
x
=
self
.
head
(
c4
)
x
=
interpolate
(
x
,
(
h
,
w
),
**
self
.
_up_kwargs
)
outputs
.
append
(
x
)
if
self
.
aux
:
auxout
=
self
.
auxlayer
(
c3
)
auxout
=
interpolate
(
auxout
,
(
h
,
w
),
**
self
.
_up_kwargs
)
outputs
.
append
(
auxout
)
return
tuple
(
outputs
)
class
DeepLabV3Head
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
norm_layer
,
up_kwargs
,
atrous_rates
=
[
12
,
24
,
36
],
**
kwargs
):
super
(
DeepLabV3Head
,
self
).
__init__
()
inter_channels
=
in_channels
//
8
self
.
aspp
=
ASPP_Module
(
in_channels
,
atrous_rates
,
norm_layer
,
up_kwargs
,
**
kwargs
)
self
.
block
=
nn
.
Sequential
(
nn
.
Conv2d
(
inter_channels
,
inter_channels
,
3
,
padding
=
1
,
bias
=
False
),
norm_layer
(
inter_channels
),
nn
.
ReLU
(
True
),
nn
.
Dropout2d
(
0.1
,
False
),
nn
.
Conv2d
(
inter_channels
,
out_channels
,
1
))
def
forward
(
self
,
x
):
x
=
self
.
aspp
(
x
)
x
=
self
.
block
(
x
)
return
x
def
ASPPConv
(
in_channels
,
out_channels
,
atrous_rate
,
norm_layer
):
block
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
out_channels
,
3
,
padding
=
atrous_rate
,
dilation
=
atrous_rate
,
bias
=
False
),
norm_layer
(
out_channels
),
nn
.
ReLU
(
True
))
return
block
class
AsppPooling
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
norm_layer
,
up_kwargs
):
super
(
AsppPooling
,
self
).
__init__
()
self
.
_up_kwargs
=
up_kwargs
self
.
gap
=
nn
.
Sequential
(
nn
.
AdaptiveAvgPool2d
(
1
),
nn
.
Conv2d
(
in_channels
,
out_channels
,
1
,
bias
=
False
),
norm_layer
(
out_channels
),
nn
.
ReLU
(
True
))
def
forward
(
self
,
x
):
_
,
_
,
h
,
w
=
x
.
size
()
pool
=
self
.
gap
(
x
)
return
interpolate
(
pool
,
(
h
,
w
),
**
self
.
_up_kwargs
)
class
ASPP_Module
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
atrous_rates
,
norm_layer
,
up_kwargs
):
super
(
ASPP_Module
,
self
).
__init__
()
out_channels
=
in_channels
//
8
rate1
,
rate2
,
rate3
=
tuple
(
atrous_rates
)
self
.
b0
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
,
out_channels
,
1
,
bias
=
False
),
norm_layer
(
out_channels
),
nn
.
ReLU
(
True
))
self
.
b1
=
ASPPConv
(
in_channels
,
out_channels
,
rate1
,
norm_layer
)
self
.
b2
=
ASPPConv
(
in_channels
,
out_channels
,
rate2
,
norm_layer
)
self
.
b3
=
ASPPConv
(
in_channels
,
out_channels
,
rate3
,
norm_layer
)
self
.
b4
=
AsppPooling
(
in_channels
,
out_channels
,
norm_layer
,
up_kwargs
)
self
.
project
=
nn
.
Sequential
(
nn
.
Conv2d
(
5
*
out_channels
,
out_channels
,
1
,
bias
=
False
),
norm_layer
(
out_channels
),
nn
.
ReLU
(
True
),
nn
.
Dropout2d
(
0.5
,
False
))
def
forward
(
self
,
x
):
feat0
=
self
.
b0
(
x
)
feat1
=
self
.
b1
(
x
)
feat2
=
self
.
b2
(
x
)
feat3
=
self
.
b3
(
x
)
feat4
=
self
.
b4
(
x
)
y
=
torch
.
cat
((
feat0
,
feat1
,
feat2
,
feat3
,
feat4
),
1
)
return
self
.
project
(
y
)
def
get_deeplab
(
dataset
=
'pascal_voc'
,
backbone
=
'resnet50'
,
pretrained
=
False
,
root
=
'~/.encoding/models'
,
**
kwargs
):
acronyms
=
{
'pascal_voc'
:
'voc'
,
'pascal_aug'
:
'voc'
,
'ade20k'
:
'ade'
,
}
# infer number of classes
from
..datasets
import
datasets
,
VOCSegmentation
,
VOCAugSegmentation
,
ADE20KSegmentation
model
=
DeepLabV3
(
datasets
[
dataset
.
lower
()].
NUM_CLASS
,
backbone
=
backbone
,
root
=
root
,
**
kwargs
)
if
pretrained
:
from
.model_store
import
get_model_file
model
.
load_state_dict
(
torch
.
load
(
get_model_file
(
'deeplab_%s_%s'
%
(
backbone
,
acronyms
[
dataset
]),
root
=
root
)))
return
model
def
get_deeplab_resnet50_ade
(
pretrained
=
False
,
root
=
'~/.encoding/models'
,
**
kwargs
):
r
"""DeepLabV3 model from the paper `"Context Encoding for Semantic Segmentation"
<https://arxiv.org/pdf/1803.08904.pdf>`_
Parameters
----------
pretrained : bool, default False
Whether to load the pretrained weights for model.
root : str, default '~/.encoding/models'
Location for keeping the model parameters.
Examples
--------
>>> model = get_deeplab_resnet50_ade(pretrained=True)
>>> print(model)
"""
return
get_deeplab
(
'ade20k'
,
'resnet50'
,
pretrained
,
root
=
root
,
**
kwargs
)
encoding/models/deepten.py
0 → 100644
View file @
ce461dae
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
import
torch
import
torch.nn
as
nn
from
..nn
import
Encoding
,
View
,
Normalize
from
.
import
resnet
__all__
=
[
'DeepTen'
,
'get_deepten'
,
'get_deepten_resnet50_minc'
]
class
DeepTen
(
nn
.
Module
):
def
__init__
(
self
,
nclass
,
backbone
):
super
(
DeepTen
,
self
).
__init__
()
self
.
backbone
=
backbone
# copying modules from pretrained models
if
self
.
backbone
==
'resnet50'
:
self
.
pretrained
=
resnet
.
resnet50
(
pretrained
=
True
,
dilated
=
False
)
elif
self
.
backbone
==
'resnet101'
:
self
.
pretrained
=
resnet
.
resnet101
(
pretrained
=
True
,
dilated
=
False
)
elif
self
.
backbone
==
'resnet152'
:
self
.
pretrained
=
resnet
.
resnet152
(
pretrained
=
True
,
dilated
=
False
)
else
:
raise
RuntimeError
(
'unknown backbone: {}'
.
format
(
self
.
backbone
))
n_codes
=
32
self
.
head
=
nn
.
Sequential
(
nn
.
Conv2d
(
2048
,
128
,
1
),
nn
.
BatchNorm2d
(
128
),
nn
.
ReLU
(
inplace
=
True
),
Encoding
(
D
=
128
,
K
=
n_codes
),
View
(
-
1
,
128
*
n_codes
),
Normalize
(),
nn
.
Linear
(
128
*
n_codes
,
nclass
),
)
def
forward
(
self
,
x
):
_
,
_
,
h
,
w
=
x
.
size
()
x
=
self
.
pretrained
.
conv1
(
x
)
x
=
self
.
pretrained
.
bn1
(
x
)
x
=
self
.
pretrained
.
relu
(
x
)
x
=
self
.
pretrained
.
maxpool
(
x
)
x
=
self
.
pretrained
.
layer1
(
x
)
x
=
self
.
pretrained
.
layer2
(
x
)
x
=
self
.
pretrained
.
layer3
(
x
)
x
=
self
.
pretrained
.
layer4
(
x
)
return
self
.
head
(
x
)
def
get_deepten
(
dataset
=
'pascal_voc'
,
backbone
=
'resnet50'
,
pretrained
=
False
,
root
=
'~/.encoding/models'
,
**
kwargs
):
r
"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network"
<https://arxiv.org/pdf/1612.02844v1.pdf>`_
Parameters
----------
dataset : str, default pascal_voc
The dataset that model pretrained on. (pascal_voc, ade20k)
pretrained : bool, default False
Whether to load the pretrained weights for model.
root : str, default '~/.encoding/models'
Location for keeping the model parameters.
Examples
--------
>>> model = get_deepten(dataset='minc', backbone='resnet50', pretrained=False)
>>> print(model)
"""
from
..datasets
import
datasets
,
acronyms
model
=
DeepTen
(
datasets
[
dataset
.
lower
()].
NUM_CLASS
,
backbone
=
backbone
,
**
kwargs
)
if
pretrained
:
from
.model_store
import
get_model_file
model
.
load_state_dict
(
torch
.
load
(
get_model_file
(
'deepten_%s_%s'
%
(
backbone
,
acronyms
[
dataset
]),
root
=
root
)))
return
model
def
get_deepten_resnet50_minc
(
pretrained
=
False
,
root
=
'~/.encoding/models'
,
**
kwargs
):
r
"""DeepTen model from the paper `"Deep TEN: Texture Encoding Network"
<https://arxiv.org/pdf/1612.02844v1.pdf>`_
Parameters
----------
pretrained : bool, default False
Whether to load the pretrained weights for model.
root : str, default '~/.encoding/models'
Location for keeping the model parameters.
Examples
--------
>>> model = get_deepten_resnet50_minc(pretrained=True)
>>> print(model)
"""
return
get_deepten
(
dataset
=
'minc'
,
backbone
=
'resnet50'
,
pretrained
=
pretrained
,
root
=
root
,
**
kwargs
)
encoding/models/encnet.py
View file @
ce461dae
...
...
@@ -9,9 +9,9 @@ from torch.autograd import Variable
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
encoding
from
.base
import
BaseNet
from
.fcn
import
FCNHead
from
..nn
import
SyncBatchNorm
,
Encoding
,
Mean
__all__
=
[
'EncNet'
,
'EncModule'
,
'get_encnet'
,
'get_encnet_resnet50_pcontext'
,
'get_encnet_resnet101_pcontext'
,
'get_encnet_resnet50_ade'
,
...
...
@@ -19,7 +19,7 @@ __all__ = ['EncNet', 'EncModule', 'get_encnet', 'get_encnet_resnet50_pcontext',
class
EncNet
(
BaseNet
):
def
__init__
(
self
,
nclass
,
backbone
,
aux
=
True
,
se_loss
=
True
,
lateral
=
False
,
norm_layer
=
nn
.
BatchNorm
2d
,
**
kwargs
):
norm_layer
=
Sync
BatchNorm
,
**
kwargs
):
super
(
EncNet
,
self
).
__init__
(
nclass
,
backbone
,
aux
,
se_loss
,
norm_layer
=
norm_layer
,
**
kwargs
)
self
.
head
=
EncHead
(
2048
,
self
.
nclass
,
se_loss
=
se_loss
,
...
...
@@ -33,10 +33,10 @@ class EncNet(BaseNet):
features
=
self
.
base_forward
(
x
)
x
=
list
(
self
.
head
(
*
features
))
x
[
0
]
=
F
.
upsampl
e
(
x
[
0
],
imsize
,
**
self
.
_up_kwargs
)
x
[
0
]
=
F
.
interpolat
e
(
x
[
0
],
imsize
,
**
self
.
_up_kwargs
)
if
self
.
aux
:
auxout
=
self
.
auxlayer
(
features
[
2
])
auxout
=
F
.
upsampl
e
(
auxout
,
imsize
,
**
self
.
_up_kwargs
)
auxout
=
F
.
interpolat
e
(
auxout
,
imsize
,
**
self
.
_up_kwargs
)
x
.
append
(
auxout
)
return
tuple
(
x
)
...
...
@@ -49,10 +49,10 @@ class EncModule(nn.Module):
nn
.
Conv2d
(
in_channels
,
in_channels
,
1
,
bias
=
False
),
norm_layer
(
in_channels
),
nn
.
ReLU
(
inplace
=
True
),
encoding
.
nn
.
Encoding
(
D
=
in_channels
,
K
=
ncodes
),
encoding
.
nn
.
BatchNorm1d
(
ncodes
),
Encoding
(
D
=
in_channels
,
K
=
ncodes
),
norm_layer
(
ncodes
),
nn
.
ReLU
(
inplace
=
True
),
encoding
.
nn
.
Mean
(
dim
=
1
))
Mean
(
dim
=
1
))
self
.
fc
=
nn
.
Sequential
(
nn
.
Linear
(
in_channels
,
in_channels
),
nn
.
Sigmoid
())
...
...
@@ -134,14 +134,9 @@ def get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False,
>>> model = get_encnet(dataset='pascal_voc', backbone='resnet50', pretrained=False)
>>> print(model)
"""
acronyms
=
{
'pascal_voc'
:
'voc'
,
'ade20k'
:
'ade'
,
'pcontext'
:
'pcontext'
,
}
kwargs
[
'lateral'
]
=
True
if
dataset
.
lower
().
startswith
(
'p'
)
else
False
# infer number of classes
from
..datasets
import
datasets
from
..datasets
import
datasets
,
acronyms
model
=
EncNet
(
datasets
[
dataset
.
lower
()].
NUM_CLASS
,
backbone
=
backbone
,
root
=
root
,
**
kwargs
)
if
pretrained
:
from
.model_store
import
get_model_file
...
...
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment