Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
UNet_paddle
Commits
0d97cc8c
Commit
0d97cc8c
authored
Jun 07, 2023
by
Sugon_ldc
Browse files
add new model
parents
Pipeline
#316
failed with stages
in 0 seconds
Changes
613
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
5047 additions
and
0 deletions
+5047
-0
Matting/ppmatting/models/backbone/mobilenet_v2.py
Matting/ppmatting/models/backbone/mobilenet_v2.py
+243
-0
Matting/ppmatting/models/backbone/resnet_vd.py
Matting/ppmatting/models/backbone/resnet_vd.py
+369
-0
Matting/ppmatting/models/backbone/stdcnet.py
Matting/ppmatting/models/backbone/stdcnet.py
+285
-0
Matting/ppmatting/models/backbone/vgg.py
Matting/ppmatting/models/backbone/vgg.py
+167
-0
Matting/ppmatting/models/dim.py
Matting/ppmatting/models/dim.py
+208
-0
Matting/ppmatting/models/gca.py
Matting/ppmatting/models/gca.py
+305
-0
Matting/ppmatting/models/human_matting.py
Matting/ppmatting/models/human_matting.py
+454
-0
Matting/ppmatting/models/layers/__init__.py
Matting/ppmatting/models/layers/__init__.py
+17
-0
Matting/ppmatting/models/layers/gca_module.py
Matting/ppmatting/models/layers/gca_module.py
+211
-0
Matting/ppmatting/models/layers/tensor_fusion.py
Matting/ppmatting/models/layers/tensor_fusion.py
+117
-0
Matting/ppmatting/models/layers/tensor_fusion_helper.py
Matting/ppmatting/models/layers/tensor_fusion_helper.py
+44
-0
Matting/ppmatting/models/losses/__init__.py
Matting/ppmatting/models/losses/__init__.py
+1
-0
Matting/ppmatting/models/losses/loss.py
Matting/ppmatting/models/losses/loss.py
+163
-0
Matting/ppmatting/models/modnet.py
Matting/ppmatting/models/modnet.py
+494
-0
Matting/ppmatting/models/ppmatting.py
Matting/ppmatting/models/ppmatting.py
+338
-0
Matting/ppmatting/models/ppmattingv2.py
Matting/ppmatting/models/ppmattingv2.py
+601
-0
Matting/ppmatting/transforms/__init__.py
Matting/ppmatting/transforms/__init__.py
+1
-0
Matting/ppmatting/transforms/transforms.py
Matting/ppmatting/transforms/transforms.py
+791
-0
Matting/ppmatting/utils/__init__.py
Matting/ppmatting/utils/__init__.py
+2
-0
Matting/ppmatting/utils/estimate_foreground_ml.py
Matting/ppmatting/utils/estimate_foreground_ml.py
+236
-0
No files found.
Too many changes to show.
To preserve performance only
613 of 613+
files are displayed.
Plain diff
Email patch
Matting/ppmatting/models/backbone/mobilenet_v2.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddleseg.cvlibs
import
manager
import
ppmatting
MODEL_URLS
=
{
"MobileNetV2_x0_25"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_25_pretrained.pdparams"
,
"MobileNetV2_x0_5"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_5_pretrained.pdparams"
,
"MobileNetV2_x0_75"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x0_75_pretrained.pdparams"
,
"MobileNetV2"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_pretrained.pdparams"
,
"MobileNetV2_x1_5"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x1_5_pretrained.pdparams"
,
"MobileNetV2_x2_0"
:
"https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV2_x2_0_pretrained.pdparams"
}
__all__
=
[
"MobileNetV2"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
filter_size
,
num_filters
,
stride
,
padding
,
channels
=
None
,
num_groups
=
1
,
name
=
None
,
use_cudnn
=
True
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
self
.
_batch_norm
=
BatchNorm
(
num_filters
,
param_attr
=
ParamAttr
(
name
=
name
+
"_bn_scale"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_bn_offset"
),
moving_mean_name
=
name
+
"_bn_mean"
,
moving_variance_name
=
name
+
"_bn_variance"
)
def
forward
(
self
,
inputs
,
if_act
=
True
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
if
if_act
:
y
=
F
.
relu6
(
y
)
return
y
class
InvertedResidualUnit
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_in_filter
,
num_filters
,
stride
,
filter_size
,
padding
,
expansion_factor
,
name
):
super
(
InvertedResidualUnit
,
self
).
__init__
()
num_expfilter
=
int
(
round
(
num_in_filter
*
expansion_factor
))
self
.
_expand_conv
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_filters
=
num_expfilter
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
num_groups
=
1
,
name
=
name
+
"_expand"
)
self
.
_bottleneck_conv
=
ConvBNLayer
(
num_channels
=
num_expfilter
,
num_filters
=
num_expfilter
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
num_groups
=
num_expfilter
,
use_cudnn
=
False
,
name
=
name
+
"_dwise"
)
self
.
_linear_conv
=
ConvBNLayer
(
num_channels
=
num_expfilter
,
num_filters
=
num_filters
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
num_groups
=
1
,
name
=
name
+
"_linear"
)
def
forward
(
self
,
inputs
,
ifshortcut
):
y
=
self
.
_expand_conv
(
inputs
,
if_act
=
True
)
y
=
self
.
_bottleneck_conv
(
y
,
if_act
=
True
)
y
=
self
.
_linear_conv
(
y
,
if_act
=
False
)
if
ifshortcut
:
y
=
paddle
.
add
(
inputs
,
y
)
return
y
class
InvresiBlocks
(
nn
.
Layer
):
def
__init__
(
self
,
in_c
,
t
,
c
,
n
,
s
,
name
):
super
(
InvresiBlocks
,
self
).
__init__
()
self
.
_first_block
=
InvertedResidualUnit
(
num_channels
=
in_c
,
num_in_filter
=
in_c
,
num_filters
=
c
,
stride
=
s
,
filter_size
=
3
,
padding
=
1
,
expansion_factor
=
t
,
name
=
name
+
"_1"
)
self
.
_block_list
=
[]
for
i
in
range
(
1
,
n
):
block
=
self
.
add_sublayer
(
name
+
"_"
+
str
(
i
+
1
),
sublayer
=
InvertedResidualUnit
(
num_channels
=
c
,
num_in_filter
=
c
,
num_filters
=
c
,
stride
=
1
,
filter_size
=
3
,
padding
=
1
,
expansion_factor
=
t
,
name
=
name
+
"_"
+
str
(
i
+
1
)))
self
.
_block_list
.
append
(
block
)
def
forward
(
self
,
inputs
):
y
=
self
.
_first_block
(
inputs
,
ifshortcut
=
False
)
for
block
in
self
.
_block_list
:
y
=
block
(
y
,
ifshortcut
=
True
)
return
y
@
manager
.
BACKBONES
.
add_component
class
MobileNet
(
nn
.
Layer
):
def
__init__
(
self
,
input_channels
=
3
,
scale
=
1.0
,
pretrained
=
None
,
prefix_name
=
""
):
super
(
MobileNet
,
self
).
__init__
()
self
.
scale
=
scale
bottleneck_params_list
=
[
(
1
,
16
,
1
,
1
),
(
6
,
24
,
2
,
2
),
(
6
,
32
,
3
,
2
),
(
6
,
64
,
4
,
2
),
(
6
,
96
,
3
,
1
),
(
6
,
160
,
3
,
2
),
(
6
,
320
,
1
,
1
),
]
self
.
conv1
=
ConvBNLayer
(
num_channels
=
input_channels
,
num_filters
=
int
(
32
*
scale
),
filter_size
=
3
,
stride
=
2
,
padding
=
1
,
name
=
prefix_name
+
"conv1_1"
)
self
.
block_list
=
[]
i
=
1
in_c
=
int
(
32
*
scale
)
for
layer_setting
in
bottleneck_params_list
:
t
,
c
,
n
,
s
=
layer_setting
i
+=
1
block
=
self
.
add_sublayer
(
prefix_name
+
"conv"
+
str
(
i
),
sublayer
=
InvresiBlocks
(
in_c
=
in_c
,
t
=
t
,
c
=
int
(
c
*
scale
),
n
=
n
,
s
=
s
,
name
=
prefix_name
+
"conv"
+
str
(
i
)))
self
.
block_list
.
append
(
block
)
in_c
=
int
(
c
*
scale
)
self
.
out_c
=
int
(
1280
*
scale
)
if
scale
>
1.0
else
1280
self
.
conv9
=
ConvBNLayer
(
num_channels
=
in_c
,
num_filters
=
self
.
out_c
,
filter_size
=
1
,
stride
=
1
,
padding
=
0
,
name
=
prefix_name
+
"conv9"
)
self
.
feat_channels
=
[
int
(
i
*
scale
)
for
i
in
[
16
,
24
,
32
,
96
,
1280
]]
self
.
pretrained
=
pretrained
self
.
init_weight
()
def
forward
(
self
,
inputs
):
feat_list
=
[]
y
=
self
.
conv1
(
inputs
,
if_act
=
True
)
block_index
=
0
for
block
in
self
.
block_list
:
y
=
block
(
y
)
if
block_index
in
[
0
,
1
,
2
,
4
]:
feat_list
.
append
(
y
)
block_index
+=
1
y
=
self
.
conv9
(
y
,
if_act
=
True
)
feat_list
.
append
(
y
)
return
feat_list
def
init_weight
(
self
):
ppmatting
.
utils
.
load_pretrained_model
(
self
,
self
.
pretrained
)
@
manager
.
BACKBONES
.
add_component
def
MobileNetV2
(
**
kwargs
):
model
=
MobileNet
(
scale
=
1.0
,
**
kwargs
)
return
model
Matting/ppmatting/models/backbone/resnet_vd.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleseg.cvlibs
import
manager
from
paddleseg.models
import
layers
import
ppmatting
__all__
=
[
"ResNet18_vd"
,
"ResNet34_vd"
,
"ResNet50_vd"
,
"ResNet101_vd"
,
"ResNet152_vd"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
dilation
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
if
dilation
==
1
else
0
,
dilation
=
dilation
,
groups
=
groups
,
bias_attr
=
False
)
self
.
_batch_norm
=
layers
.
SyncBatchNorm
(
out_channels
)
self
.
_act_op
=
layers
.
Activation
(
act
=
act
)
def
forward
(
self
,
inputs
):
if
self
.
is_vd_mode
:
inputs
=
self
.
_pool2d_avg
(
inputs
)
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
y
=
self
.
_act_op
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
dilation
=
1
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
)
self
.
dilation
=
dilation
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
dilation
=
dilation
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
or
stride
==
1
else
True
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
####################################################################
# If given dilation rate > 1, using corresponding padding.
# The performance drops down without the follow padding.
if
self
.
dilation
>
1
:
padding
=
self
.
dilation
y
=
F
.
pad
(
y
,
[
padding
,
padding
,
padding
,
padding
])
#####################################################################
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
or
stride
==
1
else
True
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet_vd
(
nn
.
Layer
):
"""
The ResNet_vd implementation based on PaddlePaddle.
The original article refers to Jingdong
Tong He, et, al. "Bag of Tricks for Image Classification with Convolutional Neural Networks"
(https://arxiv.org/pdf/1812.01187.pdf).
Args:
layers (int, optional): The layers of ResNet_vd. The supported layers are (18, 34, 50, 101, 152, 200). Default: 50.
output_stride (int, optional): The stride of output features compared to input images. It is 8 or 16. Default: 8.
multi_grid (tuple|list, optional): The grid of stage4. Defult: (1, 1, 1).
pretrained (str, optional): The path of pretrained model.
"""
def
__init__
(
self
,
input_channels
=
3
,
layers
=
50
,
output_stride
=
32
,
multi_grid
=
(
1
,
1
,
1
),
pretrained
=
None
):
super
(
ResNet_vd
,
self
).
__init__
()
self
.
conv1_logit
=
None
# for gscnn shape stream
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_channels
=
[
64
,
256
,
512
,
1024
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
]
# for channels of four returned stages
self
.
feat_channels
=
[
c
*
4
for
c
in
num_filters
]
if
layers
>=
50
else
num_filters
self
.
feat_channels
=
[
64
]
+
self
.
feat_channels
dilation_dict
=
None
if
output_stride
==
8
:
dilation_dict
=
{
2
:
2
,
3
:
4
}
elif
output_stride
==
16
:
dilation_dict
=
{
3
:
2
}
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
input_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
2
,
act
=
'relu'
)
self
.
conv1_2
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
)
self
.
conv1_3
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
# self.block_list = []
self
.
stage_list
=
[]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
block_list
=
[]
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
###############################################################################
# Add dilation rate for some segmentation tasks, if dilation_dict is not None.
dilation_rate
=
dilation_dict
[
block
]
if
dilation_dict
and
block
in
dilation_dict
else
1
# Actually block here is 'stage', and i is 'block' in 'stage'
# At the stage 4, expand the the dilation_rate if given multi_grid
if
block
==
3
:
dilation_rate
=
dilation_rate
*
multi_grid
[
i
]
###############################################################################
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
and
dilation_rate
==
1
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
dilation
=
dilation_rate
))
block_list
.
append
(
bottleneck_block
)
shortcut
=
True
self
.
stage_list
.
append
(
block_list
)
else
:
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
block_list
=
[]
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
))
block_list
.
append
(
basic_block
)
shortcut
=
True
self
.
stage_list
.
append
(
block_list
)
self
.
pretrained
=
pretrained
self
.
init_weight
()
def
forward
(
self
,
inputs
):
feat_list
=
[]
y
=
self
.
conv1_1
(
inputs
)
y
=
self
.
conv1_2
(
y
)
y
=
self
.
conv1_3
(
y
)
feat_list
.
append
(
y
)
y
=
self
.
pool2d_max
(
y
)
# A feature list saves the output feature map of each stage.
for
stage
in
self
.
stage_list
:
for
block
in
stage
:
y
=
block
(
y
)
feat_list
.
append
(
y
)
return
feat_list
def
init_weight
(
self
):
ppmatting
.
utils
.
load_pretrained_model
(
self
,
self
.
pretrained
)
@
manager
.
BACKBONES
.
add_component
def
ResNet18_vd
(
**
args
):
model
=
ResNet_vd
(
layers
=
18
,
**
args
)
return
model
@
manager
.
BACKBONES
.
add_component
def
ResNet34_vd
(
**
args
):
model
=
ResNet_vd
(
layers
=
34
,
**
args
)
return
model
@
manager
.
BACKBONES
.
add_component
def
ResNet50_vd
(
**
args
):
model
=
ResNet_vd
(
layers
=
50
,
**
args
)
return
model
@
manager
.
BACKBONES
.
add_component
def
ResNet101_vd
(
**
args
):
model
=
ResNet_vd
(
layers
=
101
,
**
args
)
return
model
def
ResNet152_vd
(
**
args
):
model
=
ResNet_vd
(
layers
=
152
,
**
args
)
return
model
def
ResNet200_vd
(
**
args
):
model
=
ResNet_vd
(
layers
=
200
,
**
args
)
return
model
Matting/ppmatting/models/backbone/stdcnet.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
paddle
import
paddle.nn
as
nn
from
paddleseg.utils
import
utils
from
paddleseg.cvlibs
import
manager
,
param_init
from
paddleseg.models.layers.layer_libs
import
SyncBatchNorm
__all__
=
[
"STDC1"
,
"STDC2"
,
"STDC_Small"
,
"STDC_Tiny"
]
class
STDCNet
(
nn
.
Layer
):
"""
The STDCNet implementation based on PaddlePaddle.
The original article refers to Meituan
Fan, Mingyuan, et al. "Rethinking BiSeNet For Real-time Semantic Segmentation."
(https://arxiv.org/abs/2104.13188)
Args:
base(int, optional): base channels. Default: 64.
layers(list, optional): layers numbers list. It determines STDC block numbers of STDCNet's stage3
\4\5
. Defualt: [4, 5, 3].
block_num(int,optional): block_num of features block. Default: 4.
type(str,optional): feature fusion method "cat"/"add". Default: "cat".
pretrained(str, optional): the path of pretrained model.
"""
def
__init__
(
self
,
input_channels
=
3
,
channels
=
[
32
,
64
,
256
,
512
,
1024
],
layers
=
[
4
,
5
,
3
],
block_num
=
4
,
type
=
"cat"
,
pretrained
=
None
):
super
(
STDCNet
,
self
).
__init__
()
if
type
==
"cat"
:
block
=
CatBottleneck
elif
type
==
"add"
:
block
=
AddBottleneck
self
.
input_channels
=
input_channels
self
.
layers
=
layers
self
.
feat_channels
=
channels
self
.
features
=
self
.
_make_layers
(
channels
,
layers
,
block_num
,
block
)
self
.
pretrained
=
pretrained
self
.
init_weight
()
def
forward
(
self
,
x
):
"""
forward function for feature extract.
"""
out_feats
=
[]
x
=
self
.
features
[
0
](
x
)
out_feats
.
append
(
x
)
x
=
self
.
features
[
1
](
x
)
out_feats
.
append
(
x
)
idx
=
[[
2
,
2
+
self
.
layers
[
0
]],
[
2
+
self
.
layers
[
0
],
2
+
sum
(
self
.
layers
[
0
:
2
])],
[
2
+
sum
(
self
.
layers
[
0
:
2
]),
2
+
sum
(
self
.
layers
)]]
for
start_idx
,
end_idx
in
idx
:
for
i
in
range
(
start_idx
,
end_idx
):
x
=
self
.
features
[
i
](
x
)
out_feats
.
append
(
x
)
return
out_feats
def
_make_layers
(
self
,
channels
,
layers
,
block_num
,
block
):
features
=
[]
features
+=
[
ConvBNRelu
(
self
.
input_channels
,
channels
[
0
],
3
,
2
)]
features
+=
[
ConvBNRelu
(
channels
[
0
],
channels
[
1
],
3
,
2
)]
for
i
,
layer
in
enumerate
(
layers
):
for
j
in
range
(
layer
):
if
i
==
0
and
j
==
0
:
features
.
append
(
block
(
channels
[
i
+
1
],
channels
[
i
+
2
],
block_num
,
2
))
elif
j
==
0
:
features
.
append
(
block
(
channels
[
i
+
1
],
channels
[
i
+
2
],
block_num
,
2
))
else
:
features
.
append
(
block
(
channels
[
i
+
2
],
channels
[
i
+
2
],
block_num
,
1
))
return
nn
.
Sequential
(
*
features
)
def
init_weight
(
self
):
for
layer
in
self
.
sublayers
():
if
isinstance
(
layer
,
nn
.
Conv2D
):
param_init
.
normal_init
(
layer
.
weight
,
std
=
0.001
)
elif
isinstance
(
layer
,
(
nn
.
BatchNorm
,
nn
.
SyncBatchNorm
)):
param_init
.
constant_init
(
layer
.
weight
,
value
=
1.0
)
param_init
.
constant_init
(
layer
.
bias
,
value
=
0.0
)
if
self
.
pretrained
is
not
None
:
utils
.
load_pretrained_model
(
self
,
self
.
pretrained
)
class
ConvBNRelu
(
nn
.
Layer
):
def
__init__
(
self
,
in_planes
,
out_planes
,
kernel
=
3
,
stride
=
1
):
super
(
ConvBNRelu
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
kernel
,
stride
=
stride
,
padding
=
kernel
//
2
,
bias_attr
=
False
)
self
.
bn
=
SyncBatchNorm
(
out_planes
,
data_format
=
'NCHW'
)
self
.
relu
=
nn
.
ReLU
()
def
forward
(
self
,
x
):
out
=
self
.
relu
(
self
.
bn
(
self
.
conv
(
x
)))
return
out
class
AddBottleneck
(
nn
.
Layer
):
def
__init__
(
self
,
in_planes
,
out_planes
,
block_num
=
3
,
stride
=
1
):
super
(
AddBottleneck
,
self
).
__init__
()
assert
block_num
>
1
,
"block number should be larger than 1."
self
.
conv_list
=
nn
.
LayerList
()
self
.
stride
=
stride
if
stride
==
2
:
self
.
avd_layer
=
nn
.
Sequential
(
nn
.
Conv2D
(
out_planes
//
2
,
out_planes
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
out_planes
//
2
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
out_planes
//
2
),
)
self
.
skip
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_planes
,
in_planes
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
in_planes
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
in_planes
),
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
out_planes
),
)
stride
=
1
for
idx
in
range
(
block_num
):
if
idx
==
0
:
self
.
conv_list
.
append
(
ConvBNRelu
(
in_planes
,
out_planes
//
2
,
kernel
=
1
))
elif
idx
==
1
and
block_num
==
2
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
2
,
out_planes
//
2
,
stride
=
stride
))
elif
idx
==
1
and
block_num
>
2
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
2
,
out_planes
//
4
,
stride
=
stride
))
elif
idx
<
block_num
-
1
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
int
(
math
.
pow
(
2
,
idx
)),
out_planes
//
int
(
math
.
pow
(
2
,
idx
+
1
))))
else
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
int
(
math
.
pow
(
2
,
idx
)),
out_planes
//
int
(
math
.
pow
(
2
,
idx
))))
def
forward
(
self
,
x
):
out_list
=
[]
out
=
x
for
idx
,
conv
in
enumerate
(
self
.
conv_list
):
if
idx
==
0
and
self
.
stride
==
2
:
out
=
self
.
avd_layer
(
conv
(
out
))
else
:
out
=
conv
(
out
)
out_list
.
append
(
out
)
if
self
.
stride
==
2
:
x
=
self
.
skip
(
x
)
return
paddle
.
concat
(
out_list
,
axis
=
1
)
+
x
class
CatBottleneck
(
nn
.
Layer
):
def
__init__
(
self
,
in_planes
,
out_planes
,
block_num
=
3
,
stride
=
1
):
super
(
CatBottleneck
,
self
).
__init__
()
assert
block_num
>
1
,
"block number should be larger than 1."
self
.
conv_list
=
nn
.
LayerList
()
self
.
stride
=
stride
if
stride
==
2
:
self
.
avd_layer
=
nn
.
Sequential
(
nn
.
Conv2D
(
out_planes
//
2
,
out_planes
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
out_planes
//
2
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
out_planes
//
2
),
)
self
.
skip
=
nn
.
AvgPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
stride
=
1
for
idx
in
range
(
block_num
):
if
idx
==
0
:
self
.
conv_list
.
append
(
ConvBNRelu
(
in_planes
,
out_planes
//
2
,
kernel
=
1
))
elif
idx
==
1
and
block_num
==
2
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
2
,
out_planes
//
2
,
stride
=
stride
))
elif
idx
==
1
and
block_num
>
2
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
2
,
out_planes
//
4
,
stride
=
stride
))
elif
idx
<
block_num
-
1
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
int
(
math
.
pow
(
2
,
idx
)),
out_planes
//
int
(
math
.
pow
(
2
,
idx
+
1
))))
else
:
self
.
conv_list
.
append
(
ConvBNRelu
(
out_planes
//
int
(
math
.
pow
(
2
,
idx
)),
out_planes
//
int
(
math
.
pow
(
2
,
idx
))))
def
forward
(
self
,
x
):
out_list
=
[]
out1
=
self
.
conv_list
[
0
](
x
)
for
idx
,
conv
in
enumerate
(
self
.
conv_list
[
1
:]):
if
idx
==
0
:
if
self
.
stride
==
2
:
out
=
conv
(
self
.
avd_layer
(
out1
))
else
:
out
=
conv
(
out1
)
else
:
out
=
conv
(
out
)
out_list
.
append
(
out
)
if
self
.
stride
==
2
:
out1
=
self
.
skip
(
out1
)
out_list
.
insert
(
0
,
out1
)
out
=
paddle
.
concat
(
out_list
,
axis
=
1
)
return
out
@
manager
.
BACKBONES
.
add_component
def
STDC2
(
**
kwargs
):
model
=
STDCNet
(
channels
=
[
32
,
64
,
256
,
512
,
1024
],
layers
=
[
4
,
5
,
3
],
**
kwargs
)
return
model
@
manager
.
BACKBONES
.
add_component
def
STDC1
(
**
kwargs
):
model
=
STDCNet
(
channels
=
[
32
,
64
,
256
,
512
,
1024
],
layers
=
[
2
,
2
,
2
],
**
kwargs
)
return
model
@
manager
.
BACKBONES
.
add_component
def
STDC_Small
(
**
kwargs
):
model
=
STDCNet
(
channels
=
[
32
,
32
,
64
,
128
,
256
],
layers
=
[
4
,
5
,
3
],
**
kwargs
)
return
model
@
manager
.
BACKBONES
.
add_component
def
STDC_Tiny
(
**
kwargs
):
model
=
STDCNet
(
channels
=
[
32
,
32
,
64
,
128
,
256
],
layers
=
[
2
,
2
,
2
],
**
kwargs
)
return
model
Matting/ppmatting/models/backbone/vgg.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddleseg.cvlibs
import
manager
import
ppmatting
class
ConvBlock
(
nn
.
Layer
):
def
__init__
(
self
,
input_channels
,
output_channels
,
groups
,
name
=
None
):
super
(
ConvBlock
,
self
).
__init__
()
self
.
groups
=
groups
self
.
_conv_1
=
Conv2D
(
in_channels
=
input_channels
,
out_channels
=
output_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
weight_attr
=
ParamAttr
(
name
=
name
+
"1_weights"
),
bias_attr
=
False
)
if
groups
==
2
or
groups
==
3
or
groups
==
4
:
self
.
_conv_2
=
Conv2D
(
in_channels
=
output_channels
,
out_channels
=
output_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
weight_attr
=
ParamAttr
(
name
=
name
+
"2_weights"
),
bias_attr
=
False
)
if
groups
==
3
or
groups
==
4
:
self
.
_conv_3
=
Conv2D
(
in_channels
=
output_channels
,
out_channels
=
output_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
weight_attr
=
ParamAttr
(
name
=
name
+
"3_weights"
),
bias_attr
=
False
)
if
groups
==
4
:
self
.
_conv_4
=
Conv2D
(
in_channels
=
output_channels
,
out_channels
=
output_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
weight_attr
=
ParamAttr
(
name
=
name
+
"4_weights"
),
bias_attr
=
False
)
self
.
_pool
=
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
return_mask
=
True
)
def
forward
(
self
,
inputs
):
x
=
self
.
_conv_1
(
inputs
)
x
=
F
.
relu
(
x
)
if
self
.
groups
==
2
or
self
.
groups
==
3
or
self
.
groups
==
4
:
x
=
self
.
_conv_2
(
x
)
x
=
F
.
relu
(
x
)
if
self
.
groups
==
3
or
self
.
groups
==
4
:
x
=
self
.
_conv_3
(
x
)
x
=
F
.
relu
(
x
)
if
self
.
groups
==
4
:
x
=
self
.
_conv_4
(
x
)
x
=
F
.
relu
(
x
)
skip
=
x
x
,
max_indices
=
self
.
_pool
(
x
)
return
x
,
max_indices
,
skip
class
VGGNet
(
nn
.
Layer
):
def
__init__
(
self
,
input_channels
=
3
,
layers
=
11
,
pretrained
=
None
):
super
(
VGGNet
,
self
).
__init__
()
self
.
pretrained
=
pretrained
self
.
layers
=
layers
self
.
vgg_configure
=
{
11
:
[
1
,
1
,
2
,
2
,
2
],
13
:
[
2
,
2
,
2
,
2
,
2
],
16
:
[
2
,
2
,
3
,
3
,
3
],
19
:
[
2
,
2
,
4
,
4
,
4
]
}
assert
self
.
layers
in
self
.
vgg_configure
.
keys
(),
\
"supported layers are {} but input layer is {}"
.
format
(
self
.
vgg_configure
.
keys
(),
layers
)
self
.
groups
=
self
.
vgg_configure
[
self
.
layers
]
# matting的第一层卷积输入为4通道,初始化是直接初始化为0
self
.
_conv_block_1
=
ConvBlock
(
input_channels
,
64
,
self
.
groups
[
0
],
name
=
"conv1_"
)
self
.
_conv_block_2
=
ConvBlock
(
64
,
128
,
self
.
groups
[
1
],
name
=
"conv2_"
)
self
.
_conv_block_3
=
ConvBlock
(
128
,
256
,
self
.
groups
[
2
],
name
=
"conv3_"
)
self
.
_conv_block_4
=
ConvBlock
(
256
,
512
,
self
.
groups
[
3
],
name
=
"conv4_"
)
self
.
_conv_block_5
=
ConvBlock
(
512
,
512
,
self
.
groups
[
4
],
name
=
"conv5_"
)
# 这一层的初始化需要利用vgg fc6的参数转换后进行初始化,可以暂时不考虑初始化
self
.
_conv_6
=
Conv2D
(
512
,
512
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
self
.
init_weight
()
def
forward
(
self
,
inputs
):
fea_list
=
[]
ids_list
=
[]
x
,
ids
,
skip
=
self
.
_conv_block_1
(
inputs
)
fea_list
.
append
(
skip
)
ids_list
.
append
(
ids
)
x
,
ids
,
skip
=
self
.
_conv_block_2
(
x
)
fea_list
.
append
(
skip
)
ids_list
.
append
(
ids
)
x
,
ids
,
skip
=
self
.
_conv_block_3
(
x
)
fea_list
.
append
(
skip
)
ids_list
.
append
(
ids
)
x
,
ids
,
skip
=
self
.
_conv_block_4
(
x
)
fea_list
.
append
(
skip
)
ids_list
.
append
(
ids
)
x
,
ids
,
skip
=
self
.
_conv_block_5
(
x
)
fea_list
.
append
(
skip
)
ids_list
.
append
(
ids
)
x
=
F
.
relu
(
self
.
_conv_6
(
x
))
fea_list
.
append
(
x
)
return
fea_list
def
init_weight
(
self
):
if
self
.
pretrained
is
not
None
:
ppmatting
.
utils
.
load_pretrained_model
(
self
,
self
.
pretrained
)
@
manager
.
BACKBONES
.
add_component
def
VGG11
(
**
args
):
model
=
VGGNet
(
layers
=
11
,
**
args
)
return
model
@
manager
.
BACKBONES
.
add_component
def
VGG13
(
**
args
):
model
=
VGGNet
(
layers
=
13
,
**
args
)
return
model
@
manager
.
BACKBONES
.
add_component
def
VGG16
(
**
args
):
model
=
VGGNet
(
layers
=
16
,
**
args
)
return
model
@
manager
.
BACKBONES
.
add_component
def
VGG19
(
**
args
):
model
=
VGGNet
(
layers
=
19
,
**
args
)
return
model
Matting/ppmatting/models/dim.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections
import
defaultdict
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleseg.models
import
layers
from
paddleseg
import
utils
from
paddleseg.cvlibs
import
manager
from
ppmatting.models.losses
import
MRSD
@
manager
.
MODELS
.
add_component
class
DIM
(
nn
.
Layer
):
"""
The DIM implementation based on PaddlePaddle.
The original article refers to
Ning Xu, et, al. "Deep Image Matting"
(https://arxiv.org/pdf/1908.07919.pdf).
Args:
backbone: backbone model.
stage (int, optional): The stage of model. Defautl: 3.
decoder_input_channels(int, optional): The channel of decoder input. Default: 512.
pretrained(str, optional): The path of pretrianed model. Defautl: None.
"""
def
__init__
(
self
,
backbone
,
stage
=
3
,
decoder_input_channels
=
512
,
pretrained
=
None
):
super
().
__init__
()
self
.
backbone
=
backbone
self
.
pretrained
=
pretrained
self
.
stage
=
stage
self
.
loss_func_dict
=
None
decoder_output_channels
=
[
64
,
128
,
256
,
512
]
self
.
decoder
=
Decoder
(
input_channels
=
decoder_input_channels
,
output_channels
=
decoder_output_channels
)
if
self
.
stage
==
2
:
for
param
in
self
.
backbone
.
parameters
():
param
.
stop_gradient
=
True
for
param
in
self
.
decoder
.
parameters
():
param
.
stop_gradient
=
True
if
self
.
stage
>=
2
:
self
.
refine
=
Refine
()
self
.
init_weight
()
def
forward
(
self
,
inputs
):
input_shape
=
paddle
.
shape
(
inputs
[
'img'
])[
-
2
:]
x
=
paddle
.
concat
([
inputs
[
'img'
],
inputs
[
'trimap'
]
/
255
],
axis
=
1
)
fea_list
=
self
.
backbone
(
x
)
# decoder stage
up_shape
=
[]
for
i
in
range
(
5
):
up_shape
.
append
(
paddle
.
shape
(
fea_list
[
i
])[
-
2
:])
alpha_raw
=
self
.
decoder
(
fea_list
,
up_shape
)
alpha_raw
=
F
.
interpolate
(
alpha_raw
,
input_shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
logit_dict
=
{
'alpha_raw'
:
alpha_raw
}
if
self
.
stage
<
2
:
return
logit_dict
if
self
.
stage
>=
2
:
# refine stage
refine_input
=
paddle
.
concat
([
inputs
[
'img'
],
alpha_raw
],
axis
=
1
)
alpha_refine
=
self
.
refine
(
refine_input
)
# finally alpha
alpha_pred
=
alpha_refine
+
alpha_raw
alpha_pred
=
F
.
interpolate
(
alpha_pred
,
input_shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
if
not
self
.
training
:
alpha_pred
=
paddle
.
clip
(
alpha_pred
,
min
=
0
,
max
=
1
)
logit_dict
[
'alpha_pred'
]
=
alpha_pred
if
self
.
training
:
loss_dict
=
self
.
loss
(
logit_dict
,
inputs
)
return
logit_dict
,
loss_dict
else
:
return
alpha_pred
def
loss
(
self
,
logit_dict
,
label_dict
,
loss_func_dict
=
None
):
if
loss_func_dict
is
None
:
if
self
.
loss_func_dict
is
None
:
self
.
loss_func_dict
=
defaultdict
(
list
)
self
.
loss_func_dict
[
'alpha_raw'
].
append
(
MRSD
())
self
.
loss_func_dict
[
'comp'
].
append
(
MRSD
())
self
.
loss_func_dict
[
'alpha_pred'
].
append
(
MRSD
())
else
:
self
.
loss_func_dict
=
loss_func_dict
loss
=
{}
mask
=
label_dict
[
'trimap'
]
==
128
loss
[
'all'
]
=
0
if
self
.
stage
!=
2
:
loss
[
'alpha_raw'
]
=
self
.
loss_func_dict
[
'alpha_raw'
][
0
](
logit_dict
[
'alpha_raw'
],
label_dict
[
'alpha'
],
mask
)
loss
[
'alpha_raw'
]
=
0.5
*
loss
[
'alpha_raw'
]
loss
[
'all'
]
=
loss
[
'all'
]
+
loss
[
'alpha_raw'
]
if
self
.
stage
==
1
or
self
.
stage
==
3
:
comp_pred
=
logit_dict
[
'alpha_raw'
]
*
label_dict
[
'fg'
]
+
\
(
1
-
logit_dict
[
'alpha_raw'
])
*
label_dict
[
'bg'
]
loss
[
'comp'
]
=
self
.
loss_func_dict
[
'comp'
][
0
](
comp_pred
,
label_dict
[
'img'
],
mask
)
loss
[
'comp'
]
=
0.5
*
loss
[
'comp'
]
loss
[
'all'
]
=
loss
[
'all'
]
+
loss
[
'comp'
]
if
self
.
stage
==
2
or
self
.
stage
==
3
:
loss
[
'alpha_pred'
]
=
self
.
loss_func_dict
[
'alpha_pred'
][
0
](
logit_dict
[
'alpha_pred'
],
label_dict
[
'alpha'
],
mask
)
loss
[
'all'
]
=
loss
[
'all'
]
+
loss
[
'alpha_pred'
]
return
loss
def
init_weight
(
self
):
if
self
.
pretrained
is
not
None
:
utils
.
load_entire_model
(
self
,
self
.
pretrained
)
# bilinear interpolate skip connect
class
Up
(
nn
.
Layer
):
def
__init__
(
self
,
input_channels
,
output_channels
):
super
().
__init__
()
self
.
conv
=
layers
.
ConvBNReLU
(
input_channels
,
output_channels
,
kernel_size
=
5
,
padding
=
2
,
bias_attr
=
False
)
def
forward
(
self
,
x
,
skip
,
output_shape
):
x
=
F
.
interpolate
(
x
,
size
=
output_shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
x
=
x
+
skip
x
=
self
.
conv
(
x
)
x
=
F
.
relu
(
x
)
return
x
class
Decoder
(
nn
.
Layer
):
def
__init__
(
self
,
input_channels
,
output_channels
=
(
64
,
128
,
256
,
512
)):
super
().
__init__
()
self
.
deconv6
=
nn
.
Conv2D
(
input_channels
,
input_channels
,
kernel_size
=
1
,
bias_attr
=
False
)
self
.
deconv5
=
Up
(
input_channels
,
output_channels
[
-
1
])
self
.
deconv4
=
Up
(
output_channels
[
-
1
],
output_channels
[
-
2
])
self
.
deconv3
=
Up
(
output_channels
[
-
2
],
output_channels
[
-
3
])
self
.
deconv2
=
Up
(
output_channels
[
-
3
],
output_channels
[
-
4
])
self
.
deconv1
=
Up
(
output_channels
[
-
4
],
64
)
self
.
alpha_conv
=
nn
.
Conv2D
(
64
,
1
,
kernel_size
=
5
,
padding
=
2
,
bias_attr
=
False
)
def
forward
(
self
,
fea_list
,
shape_list
):
x
=
fea_list
[
-
1
]
x
=
self
.
deconv6
(
x
)
x
=
self
.
deconv5
(
x
,
fea_list
[
4
],
shape_list
[
4
])
x
=
self
.
deconv4
(
x
,
fea_list
[
3
],
shape_list
[
3
])
x
=
self
.
deconv3
(
x
,
fea_list
[
2
],
shape_list
[
2
])
x
=
self
.
deconv2
(
x
,
fea_list
[
1
],
shape_list
[
1
])
x
=
self
.
deconv1
(
x
,
fea_list
[
0
],
shape_list
[
0
])
alpha
=
self
.
alpha_conv
(
x
)
alpha
=
F
.
sigmoid
(
alpha
)
return
alpha
class
Refine
(
nn
.
Layer
):
def
__init__
(
self
):
super
().
__init__
()
self
.
conv1
=
layers
.
ConvBNReLU
(
4
,
64
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
self
.
conv2
=
layers
.
ConvBNReLU
(
64
,
64
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
self
.
conv3
=
layers
.
ConvBNReLU
(
64
,
64
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
self
.
alpha_pred
=
layers
.
ConvBNReLU
(
64
,
1
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
conv3
(
x
)
alpha
=
self
.
alpha_pred
(
x
)
return
alpha
Matting/ppmatting/models/gca.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The gca code was heavily based on https://github.com/Yaoyi-Li/GCA-Matting
# and https://github.com/open-mmlab/mmediting
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleseg.models
import
layers
from
paddleseg
import
utils
from
paddleseg.cvlibs
import
manager
,
param_init
from
ppmatting.models.layers
import
GuidedCxtAtten
@
manager
.
MODELS
.
add_component
class
GCABaseline
(
nn
.
Layer
):
def
__init__
(
self
,
backbone
,
pretrained
=
None
):
super
().
__init__
()
self
.
encoder
=
backbone
self
.
decoder
=
ResShortCut_D_Dec
([
2
,
3
,
3
,
2
])
def
forward
(
self
,
inputs
):
x
=
paddle
.
concat
([
inputs
[
'img'
],
inputs
[
'trimap'
]
/
255
],
axis
=
1
)
embedding
,
mid_fea
=
self
.
encoder
(
x
)
alpha_pred
=
self
.
decoder
(
embedding
,
mid_fea
)
if
self
.
training
:
logit_dict
=
{
'alpha_pred'
:
alpha_pred
,
}
loss_dict
=
{}
alpha_gt
=
inputs
[
'alpha'
]
loss_dict
[
"alpha"
]
=
F
.
l1_loss
(
alpha_pred
,
alpha_gt
)
loss_dict
[
"all"
]
=
loss_dict
[
"alpha"
]
return
logit_dict
,
loss_dict
return
alpha_pred
@
manager
.
MODELS
.
add_component
class
GCA
(
GCABaseline
):
def
__init__
(
self
,
backbone
,
pretrained
=
None
):
super
().
__init__
(
backbone
,
pretrained
)
self
.
decoder
=
ResGuidedCxtAtten_Dec
([
2
,
3
,
3
,
2
])
def
conv5x5
(
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
dilation
=
1
):
"""5x5 convolution with padding"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
5
,
stride
=
stride
,
padding
=
2
,
groups
=
groups
,
bias_attr
=
False
,
dilation
=
dilation
)
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
,
groups
=
1
,
dilation
=
1
):
"""3x3 convolution with padding"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
dilation
,
groups
=
groups
,
bias_attr
=
False
,
dilation
=
dilation
)
def
conv1x1
(
in_planes
,
out_planes
,
stride
=
1
):
"""1x1 convolution"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
1
,
stride
=
stride
,
bias_attr
=
False
)
class
BasicBlock
(
nn
.
Layer
):
expansion
=
1
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
upsample
=
None
,
norm_layer
=
None
,
large_kernel
=
False
):
super
().
__init__
()
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm
self
.
stride
=
stride
conv
=
conv5x5
if
large_kernel
else
conv3x3
# Both self.conv1 and self.downsample layers downsample the input when stride != 1
if
self
.
stride
>
1
:
self
.
conv1
=
nn
.
utils
.
spectral_norm
(
nn
.
Conv2DTranspose
(
inplanes
,
inplanes
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
bias_attr
=
False
))
else
:
self
.
conv1
=
nn
.
utils
.
spectral_norm
(
conv
(
inplanes
,
inplanes
))
self
.
bn1
=
norm_layer
(
inplanes
)
self
.
activation
=
nn
.
LeakyReLU
(
0.2
)
self
.
conv2
=
nn
.
utils
.
spectral_norm
(
conv
(
inplanes
,
planes
))
self
.
bn2
=
norm_layer
(
planes
)
self
.
upsample
=
upsample
def
forward
(
self
,
x
):
identity
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
activation
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
upsample
is
not
None
:
identity
=
self
.
upsample
(
x
)
out
+=
identity
out
=
self
.
activation
(
out
)
return
out
class
ResNet_D_Dec
(
nn
.
Layer
):
def
__init__
(
self
,
layers
=
[
3
,
4
,
4
,
2
],
norm_layer
=
None
,
large_kernel
=
False
,
late_downsample
=
False
):
super
().
__init__
()
if
norm_layer
is
None
:
norm_layer
=
nn
.
BatchNorm
self
.
_norm_layer
=
norm_layer
self
.
large_kernel
=
large_kernel
self
.
kernel_size
=
5
if
self
.
large_kernel
else
3
self
.
inplanes
=
512
if
layers
[
0
]
>
0
else
256
self
.
late_downsample
=
late_downsample
self
.
midplanes
=
64
if
late_downsample
else
32
self
.
conv1
=
nn
.
utils
.
spectral_norm
(
nn
.
Conv2DTranspose
(
self
.
midplanes
,
32
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
bias_attr
=
False
))
self
.
bn1
=
norm_layer
(
32
)
self
.
leaky_relu
=
nn
.
LeakyReLU
(
0.2
)
self
.
conv2
=
nn
.
Conv2D
(
32
,
1
,
kernel_size
=
self
.
kernel_size
,
stride
=
1
,
padding
=
self
.
kernel_size
//
2
)
self
.
upsample
=
nn
.
UpsamplingNearest2D
(
scale_factor
=
2
)
self
.
tanh
=
nn
.
Tanh
()
self
.
layer1
=
self
.
_make_layer
(
BasicBlock
,
256
,
layers
[
0
],
stride
=
2
)
self
.
layer2
=
self
.
_make_layer
(
BasicBlock
,
128
,
layers
[
1
],
stride
=
2
)
self
.
layer3
=
self
.
_make_layer
(
BasicBlock
,
64
,
layers
[
2
],
stride
=
2
)
self
.
layer4
=
self
.
_make_layer
(
BasicBlock
,
self
.
midplanes
,
layers
[
3
],
stride
=
2
)
self
.
init_weight
()
def
_make_layer
(
self
,
block
,
planes
,
blocks
,
stride
=
1
):
if
blocks
==
0
:
return
nn
.
Sequential
(
nn
.
Identity
())
norm_layer
=
self
.
_norm_layer
upsample
=
None
if
stride
!=
1
:
upsample
=
nn
.
Sequential
(
nn
.
UpsamplingNearest2D
(
scale_factor
=
2
),
nn
.
utils
.
spectral_norm
(
conv1x1
(
self
.
inplanes
,
planes
*
block
.
expansion
)),
norm_layer
(
planes
*
block
.
expansion
),
)
elif
self
.
inplanes
!=
planes
*
block
.
expansion
:
upsample
=
nn
.
Sequential
(
nn
.
utils
.
spectral_norm
(
conv1x1
(
self
.
inplanes
,
planes
*
block
.
expansion
)),
norm_layer
(
planes
*
block
.
expansion
),
)
layers
=
[
block
(
self
.
inplanes
,
planes
,
stride
,
upsample
,
norm_layer
,
self
.
large_kernel
)
]
self
.
inplanes
=
planes
*
block
.
expansion
for
_
in
range
(
1
,
blocks
):
layers
.
append
(
block
(
self
.
inplanes
,
planes
,
norm_layer
=
norm_layer
,
large_kernel
=
self
.
large_kernel
))
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
,
mid_fea
):
x
=
self
.
layer1
(
x
)
# N x 256 x 32 x 32
print
(
x
.
shape
)
x
=
self
.
layer2
(
x
)
# N x 128 x 64 x 64
print
(
x
.
shape
)
x
=
self
.
layer3
(
x
)
# N x 64 x 128 x 128
print
(
x
.
shape
)
x
=
self
.
layer4
(
x
)
# N x 32 x 256 x 256
print
(
x
.
shape
)
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
leaky_relu
(
x
)
x
=
self
.
conv2
(
x
)
alpha
=
(
self
.
tanh
(
x
)
+
1.0
)
/
2.0
return
alpha
def
init_weight
(
self
):
for
layer
in
self
.
sublayers
():
if
isinstance
(
layer
,
nn
.
Conv2D
):
if
hasattr
(
layer
,
"weight_orig"
):
param
=
layer
.
weight_orig
else
:
param
=
layer
.
weight
param_init
.
xavier_uniform
(
param
)
elif
isinstance
(
layer
,
(
nn
.
BatchNorm
,
nn
.
SyncBatchNorm
)):
param_init
.
constant_init
(
layer
.
weight
,
value
=
1.0
)
param_init
.
constant_init
(
layer
.
bias
,
value
=
0.0
)
elif
isinstance
(
layer
,
BasicBlock
):
param_init
.
constant_init
(
layer
.
bn2
.
weight
,
value
=
0.0
)
class
ResShortCut_D_Dec
(
ResNet_D_Dec
):
def
__init__
(
self
,
layers
,
norm_layer
=
None
,
large_kernel
=
False
,
late_downsample
=
False
):
super
().
__init__
(
layers
,
norm_layer
,
large_kernel
,
late_downsample
=
late_downsample
)
def
forward
(
self
,
x
,
mid_fea
):
fea1
,
fea2
,
fea3
,
fea4
,
fea5
=
mid_fea
[
'shortcut'
]
x
=
self
.
layer1
(
x
)
+
fea5
x
=
self
.
layer2
(
x
)
+
fea4
x
=
self
.
layer3
(
x
)
+
fea3
x
=
self
.
layer4
(
x
)
+
fea2
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
leaky_relu
(
x
)
+
fea1
x
=
self
.
conv2
(
x
)
alpha
=
(
self
.
tanh
(
x
)
+
1.0
)
/
2.0
return
alpha
class
ResGuidedCxtAtten_Dec
(
ResNet_D_Dec
):
def
__init__
(
self
,
layers
,
norm_layer
=
None
,
large_kernel
=
False
,
late_downsample
=
False
):
super
().
__init__
(
layers
,
norm_layer
,
large_kernel
,
late_downsample
=
late_downsample
)
self
.
gca
=
GuidedCxtAtten
(
128
,
128
)
def
forward
(
self
,
x
,
mid_fea
):
fea1
,
fea2
,
fea3
,
fea4
,
fea5
=
mid_fea
[
'shortcut'
]
im
=
mid_fea
[
'image_fea'
]
x
=
self
.
layer1
(
x
)
+
fea5
# N x 256 x 32 x 32
x
=
self
.
layer2
(
x
)
+
fea4
# N x 128 x 64 x 64
x
=
self
.
gca
(
im
,
x
,
mid_fea
[
'unknown'
])
# contextual attention
x
=
self
.
layer3
(
x
)
+
fea3
# N x 64 x 128 x 128
x
=
self
.
layer4
(
x
)
+
fea2
# N x 32 x 256 x 256
x
=
self
.
conv1
(
x
)
x
=
self
.
bn1
(
x
)
x
=
self
.
leaky_relu
(
x
)
+
fea1
x
=
self
.
conv2
(
x
)
alpha
=
(
self
.
tanh
(
x
)
+
1.0
)
/
2.0
return
alpha
Matting/ppmatting/models/human_matting.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections
import
defaultdict
import
time
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
paddleseg
from
paddleseg.models
import
layers
from
paddleseg
import
utils
from
paddleseg.cvlibs
import
manager
from
ppmatting.models.losses
import
MRSD
def
conv_up_psp
(
in_channels
,
out_channels
,
up_sample
):
return
nn
.
Sequential
(
layers
.
ConvBNReLU
(
in_channels
,
out_channels
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
up_sample
,
mode
=
'bilinear'
,
align_corners
=
False
))
@
manager
.
MODELS
.
add_component
class
HumanMatting
(
nn
.
Layer
):
"""A model for """
def
__init__
(
self
,
backbone
,
pretrained
=
None
,
backbone_scale
=
0.25
,
refine_kernel_size
=
3
,
if_refine
=
True
):
super
().
__init__
()
if
if_refine
:
if
backbone_scale
>
0.5
:
raise
ValueError
(
'Backbone_scale should not be greater than 1/2, but it is {}'
.
format
(
backbone_scale
))
else
:
backbone_scale
=
1
self
.
backbone
=
backbone
self
.
backbone_scale
=
backbone_scale
self
.
pretrained
=
pretrained
self
.
if_refine
=
if_refine
if
if_refine
:
self
.
refiner
=
Refiner
(
kernel_size
=
refine_kernel_size
)
self
.
loss_func_dict
=
None
self
.
backbone_channels
=
backbone
.
feat_channels
######################
### Decoder part - Glance
######################
self
.
psp_module
=
layers
.
PPModule
(
self
.
backbone_channels
[
-
1
],
512
,
bin_sizes
=
(
1
,
3
,
5
),
dim_reduction
=
False
,
align_corners
=
False
)
self
.
psp4
=
conv_up_psp
(
512
,
256
,
2
)
self
.
psp3
=
conv_up_psp
(
512
,
128
,
4
)
self
.
psp2
=
conv_up_psp
(
512
,
64
,
8
)
self
.
psp1
=
conv_up_psp
(
512
,
64
,
16
)
# stage 5g
self
.
decoder5_g
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
512
+
self
.
backbone_channels
[
-
1
],
512
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
512
,
512
,
3
,
padding
=
2
,
dilation
=
2
),
layers
.
ConvBNReLU
(
512
,
256
,
3
,
padding
=
2
,
dilation
=
2
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 4g
self
.
decoder4_g
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
512
,
256
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
256
,
256
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
256
,
128
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 3g
self
.
decoder3_g
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
256
,
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
64
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 2g
self
.
decoder2_g
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
128
,
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
64
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 1g
self
.
decoder1_g
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
128
,
64
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 0g
self
.
decoder0_g
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
nn
.
Conv2D
(
64
,
3
,
3
,
padding
=
1
))
##########################
### Decoder part - FOCUS
##########################
self
.
bridge_block
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
self
.
backbone_channels
[
-
1
],
512
,
3
,
dilation
=
2
,
padding
=
2
),
layers
.
ConvBNReLU
(
512
,
512
,
3
,
dilation
=
2
,
padding
=
2
),
layers
.
ConvBNReLU
(
512
,
512
,
3
,
dilation
=
2
,
padding
=
2
))
# stage 5f
self
.
decoder5_f
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
512
+
self
.
backbone_channels
[
-
1
],
512
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
512
,
512
,
3
,
padding
=
2
,
dilation
=
2
),
layers
.
ConvBNReLU
(
512
,
256
,
3
,
padding
=
2
,
dilation
=
2
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 4f
self
.
decoder4_f
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
256
+
self
.
backbone_channels
[
-
2
],
256
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
256
,
256
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
256
,
128
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 3f
self
.
decoder3_f
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
128
+
self
.
backbone_channels
[
-
3
],
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
64
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 2f
self
.
decoder2_f
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
64
+
self
.
backbone_channels
[
-
4
],
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
128
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
128
,
64
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 1f
self
.
decoder1_f
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
64
+
self
.
backbone_channels
[
-
5
],
64
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
))
# stage 0f
self
.
decoder0_f
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
64
,
64
,
3
,
padding
=
1
),
nn
.
Conv2D
(
64
,
1
+
1
+
32
,
3
,
padding
=
1
))
self
.
init_weight
()
def
forward
(
self
,
data
):
src
=
data
[
'img'
]
src_h
,
src_w
=
paddle
.
shape
(
src
)[
2
:]
if
self
.
if_refine
:
# It is not need when exporting.
if
isinstance
(
src_h
,
paddle
.
Tensor
):
if
(
src_h
%
4
!=
0
)
or
(
src_w
%
4
)
!=
0
:
raise
ValueError
(
'The input image must have width and height that are divisible by 4'
)
# Downsample src for backbone
src_sm
=
F
.
interpolate
(
src
,
scale_factor
=
self
.
backbone_scale
,
mode
=
'bilinear'
,
align_corners
=
False
)
# Base
fea_list
=
self
.
backbone
(
src_sm
)
##########################
### Decoder part - GLANCE
##########################
#psp: N, 512, H/32, W/32
psp
=
self
.
psp_module
(
fea_list
[
-
1
])
#d6_g: N, 512, H/16, W/16
d5_g
=
self
.
decoder5_g
(
paddle
.
concat
((
psp
,
fea_list
[
-
1
]),
1
))
#d5_g: N, 512, H/8, W/8
d4_g
=
self
.
decoder4_g
(
paddle
.
concat
((
self
.
psp4
(
psp
),
d5_g
),
1
))
#d4_g: N, 256, H/4, W/4
d3_g
=
self
.
decoder3_g
(
paddle
.
concat
((
self
.
psp3
(
psp
),
d4_g
),
1
))
#d4_g: N, 128, H/2, W/2
d2_g
=
self
.
decoder2_g
(
paddle
.
concat
((
self
.
psp2
(
psp
),
d3_g
),
1
))
#d2_g: N, 64, H, W
d1_g
=
self
.
decoder1_g
(
paddle
.
concat
((
self
.
psp1
(
psp
),
d2_g
),
1
))
#d0_g: N, 3, H, W
d0_g
=
self
.
decoder0_g
(
d1_g
)
# The 1st channel is foreground. The 2nd is transition region. The 3rd is background.
# glance_sigmoid = F.sigmoid(d0_g)
glance_sigmoid
=
F
.
softmax
(
d0_g
,
axis
=
1
)
##########################
### Decoder part - FOCUS
##########################
bb
=
self
.
bridge_block
(
fea_list
[
-
1
])
#bg: N, 512, H/32, W/32
d5_f
=
self
.
decoder5_f
(
paddle
.
concat
((
bb
,
fea_list
[
-
1
]),
1
))
#d5_f: N, 256, H/16, W/16
d4_f
=
self
.
decoder4_f
(
paddle
.
concat
((
d5_f
,
fea_list
[
-
2
]),
1
))
#d4_f: N, 128, H/8, W/8
d3_f
=
self
.
decoder3_f
(
paddle
.
concat
((
d4_f
,
fea_list
[
-
3
]),
1
))
#d3_f: N, 64, H/4, W/4
d2_f
=
self
.
decoder2_f
(
paddle
.
concat
((
d3_f
,
fea_list
[
-
4
]),
1
))
#d2_f: N, 64, H/2, W/2
d1_f
=
self
.
decoder1_f
(
paddle
.
concat
((
d2_f
,
fea_list
[
-
5
]),
1
))
#d1_f: N, 64, H, W
d0_f
=
self
.
decoder0_f
(
d1_f
)
#d0_f: N, 1, H, W
focus_sigmoid
=
F
.
sigmoid
(
d0_f
[:,
0
:
1
,
:,
:])
pha_sm
=
self
.
fusion
(
glance_sigmoid
,
focus_sigmoid
)
err_sm
=
d0_f
[:,
1
:
2
,
:,
:]
err_sm
=
paddle
.
clip
(
err_sm
,
0.
,
1.
)
hid_sm
=
F
.
relu
(
d0_f
[:,
2
:,
:,
:])
# Refiner
if
self
.
if_refine
:
pha
=
self
.
refiner
(
src
=
src
,
pha
=
pha_sm
,
err
=
err_sm
,
hid
=
hid_sm
,
tri
=
glance_sigmoid
)
# Clamp outputs
pha
=
paddle
.
clip
(
pha
,
0.
,
1.
)
if
self
.
training
:
logit_dict
=
{
'glance'
:
glance_sigmoid
,
'focus'
:
focus_sigmoid
,
'fusion'
:
pha_sm
,
'error'
:
err_sm
}
if
self
.
if_refine
:
logit_dict
[
'refine'
]
=
pha
loss_dict
=
self
.
loss
(
logit_dict
,
data
)
return
logit_dict
,
loss_dict
else
:
return
pha
if
self
.
if_refine
else
pha_sm
def
loss
(
self
,
logit_dict
,
label_dict
,
loss_func_dict
=
None
):
if
loss_func_dict
is
None
:
if
self
.
loss_func_dict
is
None
:
self
.
loss_func_dict
=
defaultdict
(
list
)
self
.
loss_func_dict
[
'glance'
].
append
(
nn
.
NLLLoss
())
self
.
loss_func_dict
[
'focus'
].
append
(
MRSD
())
self
.
loss_func_dict
[
'cm'
].
append
(
MRSD
())
self
.
loss_func_dict
[
'err'
].
append
(
paddleseg
.
models
.
MSELoss
())
self
.
loss_func_dict
[
'refine'
].
append
(
paddleseg
.
models
.
L1Loss
())
else
:
self
.
loss_func_dict
=
loss_func_dict
loss
=
{}
# glance loss computation
# get glance label
glance_label
=
F
.
interpolate
(
label_dict
[
'trimap'
],
logit_dict
[
'glance'
].
shape
[
2
:],
mode
=
'nearest'
,
align_corners
=
False
)
glance_label_trans
=
(
glance_label
==
128
).
astype
(
'int64'
)
glance_label_bg
=
(
glance_label
==
0
).
astype
(
'int64'
)
glance_label
=
glance_label_trans
+
glance_label_bg
*
2
loss_glance
=
self
.
loss_func_dict
[
'glance'
][
0
](
paddle
.
log
(
logit_dict
[
'glance'
]
+
1e-6
),
glance_label
.
squeeze
(
1
))
loss
[
'glance'
]
=
loss_glance
# focus loss computation
focus_label
=
F
.
interpolate
(
label_dict
[
'alpha'
],
logit_dict
[
'focus'
].
shape
[
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
loss_focus
=
self
.
loss_func_dict
[
'focus'
][
0
](
logit_dict
[
'focus'
],
focus_label
,
glance_label_trans
)
loss
[
'focus'
]
=
loss_focus
# collaborative matting loss
loss_cm_func
=
self
.
loss_func_dict
[
'cm'
]
# fusion_sigmoid loss
loss_cm
=
loss_cm_func
[
0
](
logit_dict
[
'fusion'
],
focus_label
)
loss
[
'cm'
]
=
loss_cm
# error loss
err
=
F
.
interpolate
(
logit_dict
[
'error'
],
label_dict
[
'alpha'
].
shape
[
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
err_label
=
(
F
.
interpolate
(
logit_dict
[
'fusion'
],
label_dict
[
'alpha'
].
shape
[
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
-
label_dict
[
'alpha'
]).
abs
()
loss_err
=
self
.
loss_func_dict
[
'err'
][
0
](
err
,
err_label
)
loss
[
'err'
]
=
loss_err
loss_all
=
0.25
*
loss_glance
+
0.25
*
loss_focus
+
0.25
*
loss_cm
+
loss_err
# refine loss
if
self
.
if_refine
:
loss_refine
=
self
.
loss_func_dict
[
'refine'
][
0
](
logit_dict
[
'refine'
],
label_dict
[
'alpha'
])
loss
[
'refine'
]
=
loss_refine
loss_all
=
loss_all
+
loss_refine
loss
[
'all'
]
=
loss_all
return
loss
def
fusion
(
self
,
glance_sigmoid
,
focus_sigmoid
):
# glance_sigmoid [N, 3, H, W].
# In index, 0 is foreground, 1 is transition, 2 is backbone.
# After fusion, the foreground is 1, the background is 0, and the transion is between (0, 1).
index
=
paddle
.
argmax
(
glance_sigmoid
,
axis
=
1
,
keepdim
=
True
)
transition_mask
=
(
index
==
1
).
astype
(
'float32'
)
fg
=
(
index
==
0
).
astype
(
'float32'
)
fusion_sigmoid
=
focus_sigmoid
*
transition_mask
+
fg
return
fusion_sigmoid
def
init_weight
(
self
):
if
self
.
pretrained
is
not
None
:
utils
.
load_entire_model
(
self
,
self
.
pretrained
)
class
Refiner
(
nn
.
Layer
):
'''
Refiner refines the coarse output to full resolution.
Args:
kernel_size: The convolution kernel_size. Options: [1, 3]. Default: 3.
'''
def
__init__
(
self
,
kernel_size
=
3
):
super
().
__init__
()
if
kernel_size
not
in
[
1
,
3
]:
raise
ValueError
(
"kernel_size must be in [1, 3]"
)
self
.
kernel_size
=
kernel_size
channels
=
[
32
,
24
,
16
,
12
,
1
]
self
.
conv1
=
layers
.
ConvBNReLU
(
channels
[
0
]
+
4
+
3
,
channels
[
1
],
kernel_size
,
padding
=
0
,
bias_attr
=
False
)
self
.
conv2
=
layers
.
ConvBNReLU
(
channels
[
1
],
channels
[
2
],
kernel_size
,
padding
=
0
,
bias_attr
=
False
)
self
.
conv3
=
layers
.
ConvBNReLU
(
channels
[
2
]
+
3
,
channels
[
3
],
kernel_size
,
padding
=
0
,
bias_attr
=
False
)
self
.
conv4
=
nn
.
Conv2D
(
channels
[
3
],
channels
[
4
],
kernel_size
,
padding
=
0
,
bias_attr
=
True
)
def
forward
(
self
,
src
,
pha
,
err
,
hid
,
tri
):
'''
Args:
src: (B, 3, H, W) full resolution source image.
pha: (B, 1, Hc, Wc) coarse alpha prediction.
err: (B, 1, Hc, Hc) coarse error prediction.
hid: (B, 32, Hc, Hc) coarse hidden encoding.
tri: (B, 1, Hc, Hc) trimap prediction.
'''
h_full
,
w_full
=
paddle
.
shape
(
src
)[
2
:]
h_half
,
w_half
=
h_full
//
2
,
w_full
//
2
h_quat
,
w_quat
=
h_full
//
4
,
w_full
//
4
x
=
paddle
.
concat
([
hid
,
pha
,
tri
],
axis
=
1
)
x
=
F
.
interpolate
(
x
,
paddle
.
concat
((
h_half
,
w_half
)),
mode
=
'bilinear'
,
align_corners
=
False
)
y
=
F
.
interpolate
(
src
,
paddle
.
concat
((
h_half
,
w_half
)),
mode
=
'bilinear'
,
align_corners
=
False
)
if
self
.
kernel_size
==
3
:
x
=
F
.
pad
(
x
,
[
3
,
3
,
3
,
3
])
y
=
F
.
pad
(
y
,
[
3
,
3
,
3
,
3
])
x
=
self
.
conv1
(
paddle
.
concat
([
x
,
y
],
axis
=
1
))
x
=
self
.
conv2
(
x
)
if
self
.
kernel_size
==
3
:
x
=
F
.
interpolate
(
x
,
paddle
.
concat
((
h_full
+
4
,
w_full
+
4
)))
y
=
F
.
pad
(
src
,
[
2
,
2
,
2
,
2
])
else
:
x
=
F
.
interpolate
(
x
,
paddle
.
concat
((
h_full
,
w_full
)),
mode
=
'nearest'
)
y
=
src
x
=
self
.
conv3
(
paddle
.
concat
([
x
,
y
],
axis
=
1
))
x
=
self
.
conv4
(
x
)
pha
=
x
return
pha
Matting/ppmatting/models/layers/__init__.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.gca_module
import
GuidedCxtAtten
from
.tensor_fusion
import
MLFF
\ No newline at end of file
Matting/ppmatting/models/layers/gca_module.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# The gca code was heavily based on https://github.com/Yaoyi-Li/GCA-Matting
# and https://github.com/open-mmlab/mmediting
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleseg.cvlibs
import
param_init
class
GuidedCxtAtten
(
nn
.
Layer
):
def
__init__
(
self
,
out_channels
,
guidance_channels
,
kernel_size
=
3
,
stride
=
1
,
rate
=
2
):
super
().
__init__
()
self
.
kernel_size
=
kernel_size
self
.
rate
=
rate
self
.
stride
=
stride
self
.
guidance_conv
=
nn
.
Conv2D
(
in_channels
=
guidance_channels
,
out_channels
=
guidance_channels
//
2
,
kernel_size
=
1
)
self
.
out_conv
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm
(
out_channels
))
self
.
init_weight
()
def
init_weight
(
self
):
param_init
.
xavier_uniform
(
self
.
guidance_conv
.
weight
)
param_init
.
constant_init
(
self
.
guidance_conv
.
bias
,
value
=
0.0
)
param_init
.
xavier_uniform
(
self
.
out_conv
[
0
].
weight
)
param_init
.
constant_init
(
self
.
out_conv
[
1
].
weight
,
value
=
1e-3
)
param_init
.
constant_init
(
self
.
out_conv
[
1
].
bias
,
value
=
0.0
)
def
forward
(
self
,
img_feat
,
alpha_feat
,
unknown
=
None
,
softmax_scale
=
1.
):
img_feat
=
self
.
guidance_conv
(
img_feat
)
img_feat
=
F
.
interpolate
(
img_feat
,
scale_factor
=
1
/
self
.
rate
,
mode
=
'nearest'
)
# process unknown mask
unknown
,
softmax_scale
=
self
.
process_unknown_mask
(
unknown
,
img_feat
,
softmax_scale
)
img_ps
,
alpha_ps
,
unknown_ps
=
self
.
extract_feature_maps_patches
(
img_feat
,
alpha_feat
,
unknown
)
self_mask
=
self
.
get_self_correlation_mask
(
img_feat
)
# split tensors by batch dimension; tuple is returned
img_groups
=
paddle
.
split
(
img_feat
,
1
,
axis
=
0
)
img_ps_groups
=
paddle
.
split
(
img_ps
,
1
,
axis
=
0
)
alpha_ps_groups
=
paddle
.
split
(
alpha_ps
,
1
,
axis
=
0
)
unknown_ps_groups
=
paddle
.
split
(
unknown_ps
,
1
,
axis
=
0
)
scale_groups
=
paddle
.
split
(
softmax_scale
,
1
,
axis
=
0
)
groups
=
(
img_groups
,
img_ps_groups
,
alpha_ps_groups
,
unknown_ps_groups
,
scale_groups
)
y
=
[]
for
img_i
,
img_ps_i
,
alpha_ps_i
,
unknown_ps_i
,
scale_i
in
zip
(
*
groups
):
# conv for compare
similarity_map
=
self
.
compute_similarity_map
(
img_i
,
img_ps_i
)
gca_score
=
self
.
compute_guided_attention_score
(
similarity_map
,
unknown_ps_i
,
scale_i
,
self_mask
)
yi
=
self
.
propagate_alpha_feature
(
gca_score
,
alpha_ps_i
)
y
.
append
(
yi
)
y
=
paddle
.
concat
(
y
,
axis
=
0
)
# back to the mini-batch
y
=
paddle
.
reshape
(
y
,
alpha_feat
.
shape
)
y
=
self
.
out_conv
(
y
)
+
alpha_feat
return
y
def
extract_feature_maps_patches
(
self
,
img_feat
,
alpha_feat
,
unknown
):
# extract image feature patches with shape:
# (N, img_h*img_w, img_c, img_ks, img_ks)
img_ks
=
self
.
kernel_size
img_ps
=
self
.
extract_patches
(
img_feat
,
img_ks
,
self
.
stride
)
# extract alpha feature patches with shape:
# (N, img_h*img_w, alpha_c, alpha_ks, alpha_ks)
alpha_ps
=
self
.
extract_patches
(
alpha_feat
,
self
.
rate
*
2
,
self
.
rate
)
# extract unknown mask patches with shape: (N, img_h*img_w, 1, 1)
unknown_ps
=
self
.
extract_patches
(
unknown
,
img_ks
,
self
.
stride
)
unknown_ps
=
unknown_ps
.
squeeze
(
axis
=
2
)
# squeeze channel dimension
unknown_ps
=
unknown_ps
.
mean
(
axis
=
[
2
,
3
],
keepdim
=
True
)
return
img_ps
,
alpha_ps
,
unknown_ps
def
extract_patches
(
self
,
x
,
kernel_size
,
stride
):
n
,
c
,
_
,
_
=
x
.
shape
x
=
self
.
pad
(
x
,
kernel_size
,
stride
)
x
=
F
.
unfold
(
x
,
[
kernel_size
,
kernel_size
],
strides
=
[
stride
,
stride
])
x
=
paddle
.
transpose
(
x
,
(
0
,
2
,
1
))
x
=
paddle
.
reshape
(
x
,
(
n
,
-
1
,
c
,
kernel_size
,
kernel_size
))
return
x
def
pad
(
self
,
x
,
kernel_size
,
stride
):
left
=
(
kernel_size
-
stride
+
1
)
//
2
right
=
(
kernel_size
-
stride
)
//
2
pad
=
(
left
,
right
,
left
,
right
)
return
F
.
pad
(
x
,
pad
,
mode
=
'reflect'
)
def
compute_guided_attention_score
(
self
,
similarity_map
,
unknown_ps
,
scale
,
self_mask
):
# scale the correlation with predicted scale factor for known and
# unknown area
unknown_scale
,
known_scale
=
scale
[
0
]
out
=
similarity_map
*
(
unknown_scale
*
paddle
.
greater_than
(
unknown_ps
,
paddle
.
to_tensor
([
0.
]))
+
known_scale
*
paddle
.
less_equal
(
unknown_ps
,
paddle
.
to_tensor
([
0.
])))
# mask itself, self-mask only applied to unknown area
out
=
out
+
self_mask
*
unknown_ps
gca_score
=
F
.
softmax
(
out
,
axis
=
1
)
return
gca_score
def
propagate_alpha_feature
(
self
,
gca_score
,
alpha_ps
):
alpha_ps
=
alpha_ps
[
0
]
# squeeze dim 0
if
self
.
rate
==
1
:
gca_score
=
self
.
pad
(
gca_score
,
kernel_size
=
2
,
stride
=
1
)
alpha_ps
=
paddle
.
transpose
(
alpha_ps
,
(
1
,
0
,
2
,
3
))
out
=
F
.
conv2d
(
gca_score
,
alpha_ps
)
/
4.
else
:
out
=
F
.
conv2d_transpose
(
gca_score
,
alpha_ps
,
stride
=
self
.
rate
,
padding
=
1
)
/
4.
return
out
def
compute_similarity_map
(
self
,
img_feat
,
img_ps
):
img_ps
=
img_ps
[
0
]
# squeeze dim 0
# convolve the feature to get correlation (similarity) map
img_ps_normed
=
img_ps
/
paddle
.
clip
(
self
.
l2_norm
(
img_ps
),
1e-4
)
img_feat
=
F
.
pad
(
img_feat
,
(
1
,
1
,
1
,
1
),
mode
=
'reflect'
)
similarity_map
=
F
.
conv2d
(
img_feat
,
img_ps_normed
)
return
similarity_map
def
get_self_correlation_mask
(
self
,
img_feat
):
_
,
_
,
h
,
w
=
img_feat
.
shape
self_mask
=
F
.
one_hot
(
paddle
.
reshape
(
paddle
.
arange
(
h
*
w
),
(
h
,
w
)),
num_classes
=
int
(
h
*
w
))
self_mask
=
paddle
.
transpose
(
self_mask
,
(
2
,
0
,
1
))
self_mask
=
paddle
.
reshape
(
self_mask
,
(
1
,
h
*
w
,
h
,
w
))
return
self_mask
*
(
-
1e4
)
def
process_unknown_mask
(
self
,
unknown
,
img_feat
,
softmax_scale
):
n
,
_
,
h
,
w
=
img_feat
.
shape
if
unknown
is
not
None
:
unknown
=
unknown
.
clone
()
unknown
=
F
.
interpolate
(
unknown
,
scale_factor
=
1
/
self
.
rate
,
mode
=
'nearest'
)
unknown_mean
=
unknown
.
mean
(
axis
=
[
2
,
3
])
known_mean
=
1
-
unknown_mean
unknown_scale
=
paddle
.
clip
(
paddle
.
sqrt
(
unknown_mean
/
known_mean
),
0.1
,
10
)
known_scale
=
paddle
.
clip
(
paddle
.
sqrt
(
known_mean
/
unknown_mean
),
0.1
,
10
)
softmax_scale
=
paddle
.
concat
([
unknown_scale
,
known_scale
],
axis
=
1
)
else
:
unknown
=
paddle
.
ones
([
n
,
1
,
h
,
w
])
softmax_scale
=
paddle
.
reshape
(
paddle
.
to_tensor
([
softmax_scale
,
softmax_scale
]),
(
1
,
2
))
softmax_scale
=
paddle
.
expand
(
softmax_scale
,
(
n
,
2
))
return
unknown
,
softmax_scale
@
staticmethod
def
l2_norm
(
x
):
x
=
x
**
2
x
=
x
.
sum
(
axis
=
[
1
,
2
,
3
],
keepdim
=
True
)
return
paddle
.
sqrt
(
x
)
Matting/ppmatting/models/layers/tensor_fusion.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleseg.models
import
layers
from
ppmatting.models.layers
import
tensor_fusion_helper
as
helper
class
MLFF
(
nn
.
Layer
):
"""
Multi-level features are fused adaptively by obtaining spatial attention.
Args:
in_channels(list): The channels of input tensors.
mid_channles(list): The middle channels while fusing the features.
out_channel(int): The output channel after fusing.
merge_type(str): Which type to merge the multi features before output.
It should be one of ('add', 'concat'). Default: 'concat'.
"""
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channel
,
merge_type
=
'concat'
):
super
().
__init__
()
self
.
merge_type
=
merge_type
# Check arguments
if
len
(
in_channels
)
!=
len
(
mid_channels
):
raise
ValueError
(
"`mid_channels` should have the same length as `in_channels`, but they are {} and {}"
.
format
(
mid_channels
,
in_channels
))
if
self
.
merge_type
==
'add'
and
len
(
np
.
unique
(
np
.
array
(
mid_channels
)))
!=
1
:
raise
ValueError
(
"if `merge_type='add', `mid_channels` should be same of all input features, but it is {}."
.
format
(
mid_channels
))
self
.
pwconvs
=
nn
.
LayerList
()
self
.
dwconvs
=
nn
.
LayerList
()
for
in_channel
,
mid_channel
in
zip
(
in_channels
,
mid_channels
):
self
.
pwconvs
.
append
(
layers
.
ConvBN
(
in_channel
,
mid_channel
,
1
,
bias_attr
=
False
))
self
.
dwconvs
.
append
(
layers
.
ConvBNReLU
(
mid_channel
,
mid_channel
,
3
,
padding
=
1
,
groups
=
mid_channel
,
bias_attr
=
False
))
num_feas
=
len
(
in_channels
)
self
.
conv_atten
=
nn
.
Sequential
(
layers
.
ConvBNReLU
(
2
*
num_feas
,
num_feas
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
),
layers
.
ConvBN
(
num_feas
,
num_feas
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
))
if
self
.
merge_type
==
'add'
:
in_chan
=
mid_channels
[
0
]
else
:
in_chan
=
sum
(
mid_channels
)
self
.
conv_out
=
layers
.
ConvBNReLU
(
in_chan
,
out_channel
,
kernel_size
=
3
,
padding
=
1
,
bias_attr
=
False
)
def
forward
(
self
,
inputs
,
shape
):
"""
args:
inputs(list): List of tensor to be fused.
shape(Tensor): A tensor with two elements like (H, W).
"""
feas
=
[]
for
i
,
input
in
enumerate
(
inputs
):
x
=
self
.
pwconvs
[
i
](
input
)
x
=
F
.
interpolate
(
x
,
size
=
shape
,
mode
=
'bilinear'
,
align_corners
=
False
)
x
=
self
.
dwconvs
[
i
](
x
)
feas
.
append
(
x
)
atten
=
helper
.
avg_max_reduce_channel
(
feas
)
atten
=
F
.
sigmoid
(
self
.
conv_atten
(
atten
))
feas_att
=
[]
for
i
,
fea
in
enumerate
(
feas
):
fea
=
fea
*
(
atten
[:,
i
,
:,
:].
unsqueeze
(
1
))
feas_att
.
append
(
fea
)
if
self
.
merge_type
==
'concat'
:
out
=
paddle
.
concat
(
feas_att
,
axis
=
1
)
else
:
out
=
sum
(
feas_att
)
out
=
self
.
conv_out
(
out
)
return
out
Matting/ppmatting/models/layers/tensor_fusion_helper.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
def
avg_max_reduce_channel_helper
(
x
,
use_concat
=
True
):
# Reduce hw by avg and max, only support single input
assert
not
isinstance
(
x
,
(
list
,
tuple
))
mean_value
=
paddle
.
mean
(
x
,
axis
=
1
,
keepdim
=
True
)
max_value
=
paddle
.
max
(
x
,
axis
=
1
,
keepdim
=
True
)
if
use_concat
:
res
=
paddle
.
concat
([
mean_value
,
max_value
],
axis
=
1
)
else
:
res
=
[
mean_value
,
max_value
]
return
res
def
avg_max_reduce_channel
(
x
):
# Reduce hw by avg and max
# Return cat([avg_ch_0, max_ch_0, avg_ch_1, max_ch_1, ...])
if
not
isinstance
(
x
,
(
list
,
tuple
)):
return
avg_max_reduce_channel_helper
(
x
)
elif
len
(
x
)
==
1
:
return
avg_max_reduce_channel_helper
(
x
[
0
])
else
:
res
=
[]
for
xi
in
x
:
res
.
extend
(
avg_max_reduce_channel_helper
(
xi
,
False
))
return
paddle
.
concat
(
res
,
axis
=
1
)
Matting/ppmatting/models/losses/__init__.py
0 → 100644
View file @
0d97cc8c
from
.loss
import
*
Matting/ppmatting/models/losses/loss.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddleseg.cvlibs
import
manager
import
cv2
@
manager
.
LOSSES
.
add_component
class
MRSD
(
nn
.
Layer
):
def
__init__
(
self
,
eps
=
1e-6
):
super
().
__init__
()
self
.
eps
=
eps
def
forward
(
self
,
logit
,
label
,
mask
=
None
):
"""
Forward computation.
Args:
logit (Tensor): Logit tensor, the data type is float32, float64.
label (Tensor): Label tensor, the data type is float32, float64. The shape should equal to logit.
mask (Tensor, optional): The mask where the loss valid. Default: None.
"""
if
len
(
label
.
shape
)
==
3
:
label
=
label
.
unsqueeze
(
1
)
sd
=
paddle
.
square
(
logit
-
label
)
loss
=
paddle
.
sqrt
(
sd
+
self
.
eps
)
if
mask
is
not
None
:
mask
=
mask
.
astype
(
'float32'
)
if
len
(
mask
.
shape
)
==
3
:
mask
=
mask
.
unsqueeze
(
1
)
loss
=
loss
*
mask
loss
=
loss
.
sum
()
/
(
mask
.
sum
()
+
self
.
eps
)
mask
.
stop_gradient
=
True
else
:
loss
=
loss
.
mean
()
return
loss
@
manager
.
LOSSES
.
add_component
class
GradientLoss
(
nn
.
Layer
):
def
__init__
(
self
,
eps
=
1e-6
):
super
().
__init__
()
self
.
kernel_x
,
self
.
kernel_y
=
self
.
sobel_kernel
()
self
.
eps
=
eps
def
forward
(
self
,
logit
,
label
,
mask
=
None
):
if
len
(
label
.
shape
)
==
3
:
label
=
label
.
unsqueeze
(
1
)
if
mask
is
not
None
:
if
len
(
mask
.
shape
)
==
3
:
mask
=
mask
.
unsqueeze
(
1
)
logit
=
logit
*
mask
label
=
label
*
mask
loss
=
paddle
.
sum
(
F
.
l1_loss
(
self
.
sobel
(
logit
),
self
.
sobel
(
label
),
'none'
))
/
(
mask
.
sum
()
+
self
.
eps
)
else
:
loss
=
F
.
l1_loss
(
self
.
sobel
(
logit
),
self
.
sobel
(
label
),
'mean'
)
return
loss
def
sobel
(
self
,
input
):
"""Using Sobel to compute gradient. Return the magnitude."""
if
not
len
(
input
.
shape
)
==
4
:
raise
ValueError
(
"Invalid input shape, we expect NCHW, but it is "
,
input
.
shape
)
n
,
c
,
h
,
w
=
input
.
shape
input_pad
=
paddle
.
reshape
(
input
,
(
n
*
c
,
1
,
h
,
w
))
input_pad
=
F
.
pad
(
input_pad
,
pad
=
[
1
,
1
,
1
,
1
],
mode
=
'replicate'
)
grad_x
=
F
.
conv2d
(
input_pad
,
self
.
kernel_x
,
padding
=
0
)
grad_y
=
F
.
conv2d
(
input_pad
,
self
.
kernel_y
,
padding
=
0
)
mag
=
paddle
.
sqrt
(
grad_x
*
grad_x
+
grad_y
*
grad_y
+
self
.
eps
)
mag
=
paddle
.
reshape
(
mag
,
(
n
,
c
,
h
,
w
))
return
mag
def
sobel_kernel
(
self
):
kernel_x
=
paddle
.
to_tensor
([[
-
1.0
,
0.0
,
1.0
],
[
-
2.0
,
0.0
,
2.0
],
[
-
1.0
,
0.0
,
1.0
]]).
astype
(
'float32'
)
kernel_x
=
kernel_x
/
kernel_x
.
abs
().
sum
()
kernel_y
=
kernel_x
.
transpose
([
1
,
0
])
kernel_x
=
kernel_x
.
unsqueeze
(
0
).
unsqueeze
(
0
)
kernel_y
=
kernel_y
.
unsqueeze
(
0
).
unsqueeze
(
0
)
kernel_x
.
stop_gradient
=
True
kernel_y
.
stop_gradient
=
True
return
kernel_x
,
kernel_y
@
manager
.
LOSSES
.
add_component
class
LaplacianLoss
(
nn
.
Layer
):
"""
Laplacian loss is refer to
https://github.com/JizhiziLi/AIM/blob/master/core/evaluate.py#L83
"""
def
__init__
(
self
):
super
().
__init__
()
self
.
gauss_kernel
=
self
.
build_gauss_kernel
(
size
=
5
,
sigma
=
1.0
,
n_channels
=
1
)
def
forward
(
self
,
logit
,
label
,
mask
=
None
):
if
len
(
label
.
shape
)
==
3
:
label
=
label
.
unsqueeze
(
1
)
if
mask
is
not
None
:
if
len
(
mask
.
shape
)
==
3
:
mask
=
mask
.
unsqueeze
(
1
)
logit
=
logit
*
mask
label
=
label
*
mask
pyr_label
=
self
.
laplacian_pyramid
(
label
,
self
.
gauss_kernel
,
5
)
pyr_logit
=
self
.
laplacian_pyramid
(
logit
,
self
.
gauss_kernel
,
5
)
loss
=
sum
(
F
.
l1_loss
(
a
,
b
)
for
a
,
b
in
zip
(
pyr_label
,
pyr_logit
))
return
loss
def
build_gauss_kernel
(
self
,
size
=
5
,
sigma
=
1.0
,
n_channels
=
1
):
if
size
%
2
!=
1
:
raise
ValueError
(
"kernel size must be uneven"
)
grid
=
np
.
float32
(
np
.
mgrid
[
0
:
size
,
0
:
size
].
T
)
gaussian
=
lambda
x
:
np
.
exp
((
x
-
size
//
2
)
**
2
/
(
-
2
*
sigma
**
2
))
**
2
kernel
=
np
.
sum
(
gaussian
(
grid
),
axis
=
2
)
kernel
/=
np
.
sum
(
kernel
)
kernel
=
np
.
tile
(
kernel
,
(
n_channels
,
1
,
1
))
kernel
=
paddle
.
to_tensor
(
kernel
[:,
None
,
:,
:])
kernel
.
stop_gradient
=
True
return
kernel
def
conv_gauss
(
self
,
input
,
kernel
):
n_channels
,
_
,
kh
,
kw
=
kernel
.
shape
x
=
F
.
pad
(
input
,
(
kh
//
2
,
kw
//
2
,
kh
//
2
,
kh
//
2
),
mode
=
'replicate'
)
x
=
F
.
conv2d
(
x
,
kernel
,
groups
=
n_channels
)
return
x
def
laplacian_pyramid
(
self
,
input
,
kernel
,
max_levels
=
5
):
current
=
input
pyr
=
[]
for
level
in
range
(
max_levels
):
filtered
=
self
.
conv_gauss
(
current
,
kernel
)
diff
=
current
-
filtered
pyr
.
append
(
diff
)
current
=
F
.
avg_pool2d
(
filtered
,
2
)
pyr
.
append
(
current
)
return
pyr
Matting/ppmatting/models/modnet.py
0 → 100644
View file @
0d97cc8c
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections
import
defaultdict
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
numpy
as
np
import
scipy
import
paddleseg
from
paddleseg.models
import
layers
,
losses
from
paddleseg
import
utils
from
paddleseg.cvlibs
import
manager
,
param_init
@
manager
.
MODELS
.
add_component
class
MODNet
(
nn
.
Layer
):
"""
The MODNet implementation based on PaddlePaddle.
The original article refers to
Zhanghan Ke, et, al. "Is a Green Screen Really Necessary for Real-Time Portrait Matting?"
(https://arxiv.org/pdf/2011.11961.pdf).
Args:
backbone: backbone model.
hr(int, optional): The channels of high resolutions branch. Defautl: None.
pretrained(str, optional): The path of pretrianed model. Defautl: None.
"""
def
__init__
(
self
,
backbone
,
hr_channels
=
32
,
pretrained
=
None
):
super
().
__init__
()
self
.
backbone
=
backbone
self
.
pretrained
=
pretrained
self
.
head
=
MODNetHead
(
hr_channels
=
hr_channels
,
backbone_channels
=
backbone
.
feat_channels
)
self
.
init_weight
()
self
.
blurer
=
GaussianBlurLayer
(
1
,
3
)
self
.
loss_func_dict
=
None
def
forward
(
self
,
inputs
):
"""
If training, return a dict.
If evaluation, return the final alpha prediction.
"""
x
=
inputs
[
'img'
]
feat_list
=
self
.
backbone
(
x
)
y
=
self
.
head
(
inputs
=
inputs
,
feat_list
=
feat_list
)
if
self
.
training
:
loss
=
self
.
loss
(
y
,
inputs
)
return
y
,
loss
else
:
return
y
def
loss
(
self
,
logit_dict
,
label_dict
,
loss_func_dict
=
None
):
if
loss_func_dict
is
None
:
if
self
.
loss_func_dict
is
None
:
self
.
loss_func_dict
=
defaultdict
(
list
)
self
.
loss_func_dict
[
'semantic'
].
append
(
paddleseg
.
models
.
MSELoss
(
))
self
.
loss_func_dict
[
'detail'
].
append
(
paddleseg
.
models
.
L1Loss
())
self
.
loss_func_dict
[
'fusion'
].
append
(
paddleseg
.
models
.
L1Loss
())
self
.
loss_func_dict
[
'fusion'
].
append
(
paddleseg
.
models
.
L1Loss
())
else
:
self
.
loss_func_dict
=
loss_func_dict
loss
=
{}
# semantic loss
semantic_gt
=
F
.
interpolate
(
label_dict
[
'alpha'
],
scale_factor
=
1
/
16
,
mode
=
'bilinear'
,
align_corners
=
False
)
semantic_gt
=
self
.
blurer
(
semantic_gt
)
# semantic_gt.stop_gradient=True
loss
[
'semantic'
]
=
self
.
loss_func_dict
[
'semantic'
][
0
](
logit_dict
[
'semantic'
],
semantic_gt
)
# detail loss
trimap
=
label_dict
[
'trimap'
]
mask
=
(
trimap
==
128
).
astype
(
'float32'
)
logit_detail
=
logit_dict
[
'detail'
]
*
mask
label_detail
=
label_dict
[
'alpha'
]
*
mask
loss_detail
=
self
.
loss_func_dict
[
'detail'
][
0
](
logit_detail
,
label_detail
)
loss_detail
=
loss_detail
/
(
mask
.
mean
()
+
1e-6
)
loss
[
'detail'
]
=
10
*
loss_detail
# fusion loss
matte
=
logit_dict
[
'matte'
]
alpha
=
label_dict
[
'alpha'
]
transition_mask
=
label_dict
[
'trimap'
]
==
128
matte_boundary
=
paddle
.
where
(
transition_mask
,
matte
,
alpha
)
# l1 loss
loss_fusion_l1
=
self
.
loss_func_dict
[
'fusion'
][
0
](
matte
,
alpha
)
+
4
*
self
.
loss_func_dict
[
'fusion'
][
0
](
matte_boundary
,
alpha
)
# composition loss
loss_fusion_comp
=
self
.
loss_func_dict
[
'fusion'
][
1
](
matte
*
label_dict
[
'img'
],
alpha
*
label_dict
[
'img'
])
+
4
*
self
.
loss_func_dict
[
'fusion'
][
1
](
matte_boundary
*
label_dict
[
'img'
],
alpha
*
label_dict
[
'img'
])
# consisten loss with semantic
transition_mask
=
F
.
interpolate
(
label_dict
[
'trimap'
],
scale_factor
=
1
/
16
,
mode
=
'nearest'
,
align_corners
=
False
)
transition_mask
=
transition_mask
==
128
matte_con_sem
=
F
.
interpolate
(
matte
,
scale_factor
=
1
/
16
,
mode
=
'bilinear'
,
align_corners
=
False
)
matte_con_sem
=
self
.
blurer
(
matte_con_sem
)
logit_semantic
=
logit_dict
[
'semantic'
].
clone
()
logit_semantic
.
stop_gradient
=
True
matte_con_sem
=
paddle
.
where
(
transition_mask
,
logit_semantic
,
matte_con_sem
)
if
False
:
import
cv2
matte_con_sem_num
=
matte_con_sem
.
numpy
()
matte_con_sem_num
=
matte_con_sem_num
[
0
].
squeeze
()
matte_con_sem_num
=
(
matte_con_sem_num
*
255
).
astype
(
'uint8'
)
semantic
=
logit_dict
[
'semantic'
].
numpy
()
semantic
=
semantic
[
0
].
squeeze
()
semantic
=
(
semantic
*
255
).
astype
(
'uint8'
)
transition_mask
=
transition_mask
.
astype
(
'uint8'
)
transition_mask
=
transition_mask
.
numpy
()
transition_mask
=
(
transition_mask
[
0
].
squeeze
())
*
255
cv2
.
imwrite
(
'matte_con.png'
,
matte_con_sem_num
)
cv2
.
imwrite
(
'semantic.png'
,
semantic
)
cv2
.
imwrite
(
'transition.png'
,
transition_mask
)
mse_loss
=
paddleseg
.
models
.
MSELoss
()
loss_fusion_con_sem
=
mse_loss
(
matte_con_sem
,
logit_dict
[
'semantic'
])
loss_fusion
=
loss_fusion_l1
+
loss_fusion_comp
+
loss_fusion_con_sem
loss
[
'fusion'
]
=
loss_fusion
loss
[
'fusion_l1'
]
=
loss_fusion_l1
loss
[
'fusion_comp'
]
=
loss_fusion_comp
loss
[
'fusion_con_sem'
]
=
loss_fusion_con_sem
loss
[
'all'
]
=
loss
[
'semantic'
]
+
loss
[
'detail'
]
+
loss
[
'fusion'
]
return
loss
def
init_weight
(
self
):
if
self
.
pretrained
is
not
None
:
utils
.
load_entire_model
(
self
,
self
.
pretrained
)
class
MODNetHead
(
nn
.
Layer
):
def
__init__
(
self
,
hr_channels
,
backbone_channels
):
super
().
__init__
()
self
.
lr_branch
=
LRBranch
(
backbone_channels
)
self
.
hr_branch
=
HRBranch
(
hr_channels
,
backbone_channels
)
self
.
f_branch
=
FusionBranch
(
hr_channels
,
backbone_channels
)
self
.
init_weight
()
def
forward
(
self
,
inputs
,
feat_list
):
pred_semantic
,
lr8x
,
[
enc2x
,
enc4x
]
=
self
.
lr_branch
(
feat_list
)
pred_detail
,
hr2x
=
self
.
hr_branch
(
inputs
[
'img'
],
enc2x
,
enc4x
,
lr8x
)
pred_matte
=
self
.
f_branch
(
inputs
[
'img'
],
lr8x
,
hr2x
)
if
self
.
training
:
logit_dict
=
{
'semantic'
:
pred_semantic
,
'detail'
:
pred_detail
,
'matte'
:
pred_matte
}
return
logit_dict
else
:
return
pred_matte
def
init_weight
(
self
):
for
layer
in
self
.
sublayers
():
if
isinstance
(
layer
,
nn
.
Conv2D
):
param_init
.
kaiming_uniform
(
layer
.
weight
)
class
FusionBranch
(
nn
.
Layer
):
def
__init__
(
self
,
hr_channels
,
enc_channels
):
super
().
__init__
()
self
.
conv_lr4x
=
Conv2dIBNormRelu
(
enc_channels
[
2
],
hr_channels
,
5
,
stride
=
1
,
padding
=
2
)
self
.
conv_f2x
=
Conv2dIBNormRelu
(
2
*
hr_channels
,
hr_channels
,
3
,
stride
=
1
,
padding
=
1
)
self
.
conv_f
=
nn
.
Sequential
(
Conv2dIBNormRelu
(
hr_channels
+
3
,
int
(
hr_channels
/
2
),
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
int
(
hr_channels
/
2
),
1
,
1
,
stride
=
1
,
padding
=
0
,
with_ibn
=
False
,
with_relu
=
False
))
def
forward
(
self
,
img
,
lr8x
,
hr2x
):
lr4x
=
F
.
interpolate
(
lr8x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
lr4x
=
self
.
conv_lr4x
(
lr4x
)
lr2x
=
F
.
interpolate
(
lr4x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
f2x
=
self
.
conv_f2x
(
paddle
.
concat
((
lr2x
,
hr2x
),
axis
=
1
))
f
=
F
.
interpolate
(
f2x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
f
=
self
.
conv_f
(
paddle
.
concat
((
f
,
img
),
axis
=
1
))
pred_matte
=
F
.
sigmoid
(
f
)
return
pred_matte
class
HRBranch
(
nn
.
Layer
):
"""
High Resolution Branch of MODNet
"""
def
__init__
(
self
,
hr_channels
,
enc_channels
):
super
().
__init__
()
self
.
tohr_enc2x
=
Conv2dIBNormRelu
(
enc_channels
[
0
],
hr_channels
,
1
,
stride
=
1
,
padding
=
0
)
self
.
conv_enc2x
=
Conv2dIBNormRelu
(
hr_channels
+
3
,
hr_channels
,
3
,
stride
=
2
,
padding
=
1
)
self
.
tohr_enc4x
=
Conv2dIBNormRelu
(
enc_channels
[
1
],
hr_channels
,
1
,
stride
=
1
,
padding
=
0
)
self
.
conv_enc4x
=
Conv2dIBNormRelu
(
2
*
hr_channels
,
2
*
hr_channels
,
3
,
stride
=
1
,
padding
=
1
)
self
.
conv_hr4x
=
nn
.
Sequential
(
Conv2dIBNormRelu
(
2
*
hr_channels
+
enc_channels
[
2
]
+
3
,
2
*
hr_channels
,
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
2
*
hr_channels
,
2
*
hr_channels
,
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
2
*
hr_channels
,
hr_channels
,
3
,
stride
=
1
,
padding
=
1
))
self
.
conv_hr2x
=
nn
.
Sequential
(
Conv2dIBNormRelu
(
2
*
hr_channels
,
2
*
hr_channels
,
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
2
*
hr_channels
,
hr_channels
,
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
hr_channels
,
hr_channels
,
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
hr_channels
,
hr_channels
,
3
,
stride
=
1
,
padding
=
1
))
self
.
conv_hr
=
nn
.
Sequential
(
Conv2dIBNormRelu
(
hr_channels
+
3
,
hr_channels
,
3
,
stride
=
1
,
padding
=
1
),
Conv2dIBNormRelu
(
hr_channels
,
1
,
1
,
stride
=
1
,
padding
=
0
,
with_ibn
=
False
,
with_relu
=
False
))
def
forward
(
self
,
img
,
enc2x
,
enc4x
,
lr8x
):
img2x
=
F
.
interpolate
(
img
,
scale_factor
=
1
/
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
img4x
=
F
.
interpolate
(
img
,
scale_factor
=
1
/
4
,
mode
=
'bilinear'
,
align_corners
=
False
)
enc2x
=
self
.
tohr_enc2x
(
enc2x
)
hr4x
=
self
.
conv_enc2x
(
paddle
.
concat
((
img2x
,
enc2x
),
axis
=
1
))
enc4x
=
self
.
tohr_enc4x
(
enc4x
)
hr4x
=
self
.
conv_enc4x
(
paddle
.
concat
((
hr4x
,
enc4x
),
axis
=
1
))
lr4x
=
F
.
interpolate
(
lr8x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
hr4x
=
self
.
conv_hr4x
(
paddle
.
concat
((
hr4x
,
lr4x
,
img4x
),
axis
=
1
))
hr2x
=
F
.
interpolate
(
hr4x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
hr2x
=
self
.
conv_hr2x
(
paddle
.
concat
((
hr2x
,
enc2x
),
axis
=
1
))
pred_detail
=
None
if
self
.
training
:
hr
=
F
.
interpolate
(
hr2x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
hr
=
self
.
conv_hr
(
paddle
.
concat
((
hr
,
img
),
axis
=
1
))
pred_detail
=
F
.
sigmoid
(
hr
)
return
pred_detail
,
hr2x
class
LRBranch
(
nn
.
Layer
):
def
__init__
(
self
,
backbone_channels
):
super
().
__init__
()
self
.
se_block
=
SEBlock
(
backbone_channels
[
4
],
reduction
=
4
)
self
.
conv_lr16x
=
Conv2dIBNormRelu
(
backbone_channels
[
4
],
backbone_channels
[
3
],
5
,
stride
=
1
,
padding
=
2
)
self
.
conv_lr8x
=
Conv2dIBNormRelu
(
backbone_channels
[
3
],
backbone_channels
[
2
],
5
,
stride
=
1
,
padding
=
2
)
self
.
conv_lr
=
Conv2dIBNormRelu
(
backbone_channels
[
2
],
1
,
3
,
stride
=
2
,
padding
=
1
,
with_ibn
=
False
,
with_relu
=
False
)
def
forward
(
self
,
feat_list
):
enc2x
,
enc4x
,
enc32x
=
feat_list
[
0
],
feat_list
[
1
],
feat_list
[
4
]
enc32x
=
self
.
se_block
(
enc32x
)
lr16x
=
F
.
interpolate
(
enc32x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
lr16x
=
self
.
conv_lr16x
(
lr16x
)
lr8x
=
F
.
interpolate
(
lr16x
,
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
lr8x
=
self
.
conv_lr8x
(
lr8x
)
pred_semantic
=
None
if
self
.
training
:
lr
=
self
.
conv_lr
(
lr8x
)
pred_semantic
=
F
.
sigmoid
(
lr
)
return
pred_semantic
,
lr8x
,
[
enc2x
,
enc4x
]
class
IBNorm
(
nn
.
Layer
):
"""
Combine Instance Norm and Batch Norm into One Layer
"""
def
__init__
(
self
,
in_channels
):
super
().
__init__
()
self
.
bnorm_channels
=
in_channels
//
2
self
.
inorm_channels
=
in_channels
-
self
.
bnorm_channels
self
.
bnorm
=
nn
.
BatchNorm2D
(
self
.
bnorm_channels
)
self
.
inorm
=
nn
.
InstanceNorm2D
(
self
.
inorm_channels
)
def
forward
(
self
,
x
):
bn_x
=
self
.
bnorm
(
x
[:,
:
self
.
bnorm_channels
,
:,
:])
in_x
=
self
.
inorm
(
x
[:,
self
.
bnorm_channels
:,
:,
:])
return
paddle
.
concat
((
bn_x
,
in_x
),
1
)
class
Conv2dIBNormRelu
(
nn
.
Layer
):
"""
Convolution + IBNorm + Relu
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias_attr
=
None
,
with_ibn
=
True
,
with_relu
=
True
):
super
().
__init__
()
layers
=
[
nn
.
Conv2D
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
,
groups
=
groups
,
bias_attr
=
bias_attr
)
]
if
with_ibn
:
layers
.
append
(
IBNorm
(
out_channels
))
if
with_relu
:
layers
.
append
(
nn
.
ReLU
())
self
.
layers
=
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
return
self
.
layers
(
x
)
class
SEBlock
(
nn
.
Layer
):
"""
SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
"""
def
__init__
(
self
,
num_channels
,
reduction
=
1
):
super
().
__init__
()
self
.
pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
self
.
conv
=
nn
.
Sequential
(
nn
.
Conv2D
(
num_channels
,
int
(
num_channels
//
reduction
),
1
,
bias_attr
=
False
),
nn
.
ReLU
(),
nn
.
Conv2D
(
int
(
num_channels
//
reduction
),
num_channels
,
1
,
bias_attr
=
False
),
nn
.
Sigmoid
())
def
forward
(
self
,
x
):
w
=
self
.
pool
(
x
)
w
=
self
.
conv
(
w
)
return
w
*
x
class
GaussianBlurLayer
(
nn
.
Layer
):
""" Add Gaussian Blur to a 4D tensors
This layer takes a 4D tensor of {N, C, H, W} as input.
The Gaussian blur will be performed in given channel number (C) splitly.
"""
def
__init__
(
self
,
channels
,
kernel_size
):
"""
Args:
channels (int): Channel for input tensor
kernel_size (int): Size of the kernel used in blurring
"""
super
(
GaussianBlurLayer
,
self
).
__init__
()
self
.
channels
=
channels
self
.
kernel_size
=
kernel_size
assert
self
.
kernel_size
%
2
!=
0
self
.
op
=
nn
.
Sequential
(
nn
.
Pad2D
(
int
(
self
.
kernel_size
/
2
),
mode
=
'reflect'
),
nn
.
Conv2D
(
channels
,
channels
,
self
.
kernel_size
,
stride
=
1
,
padding
=
0
,
bias_attr
=
False
,
groups
=
channels
))
self
.
_init_kernel
()
self
.
op
[
1
].
weight
.
stop_gradient
=
True
def
forward
(
self
,
x
):
"""
Args:
x (paddle.Tensor): input 4D tensor
Returns:
paddle.Tensor: Blurred version of the input
"""
if
not
len
(
list
(
x
.
shape
))
==
4
:
print
(
'
\'
GaussianBlurLayer
\'
requires a 4D tensor as input
\n
'
)
exit
()
elif
not
x
.
shape
[
1
]
==
self
.
channels
:
print
(
'In
\'
GaussianBlurLayer
\'
, the required channel ({0}) is'
'not the same as input ({1})
\n
'
.
format
(
self
.
channels
,
x
.
shape
[
1
]))
exit
()
return
self
.
op
(
x
)
def
_init_kernel
(
self
):
sigma
=
0.3
*
((
self
.
kernel_size
-
1
)
*
0.5
-
1
)
+
0.8
n
=
np
.
zeros
((
self
.
kernel_size
,
self
.
kernel_size
))
i
=
int
(
self
.
kernel_size
/
2
)
n
[
i
,
i
]
=
1
kernel
=
scipy
.
ndimage
.
gaussian_filter
(
n
,
sigma
)
kernel
=
kernel
.
astype
(
'float32'
)
kernel
=
kernel
[
np
.
newaxis
,
np
.
newaxis
,
:,
:]
paddle
.
assign
(
kernel
,
self
.
op
[
1
].
weight
)
Matting/ppmatting/models/ppmatting.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
collections
import
defaultdict
import
time
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
paddleseg
from
paddleseg.models
import
layers
from
paddleseg
import
utils
from
paddleseg.cvlibs
import
manager
from
ppmatting.models.losses
import
MRSD
,
GradientLoss
from
ppmatting.models.backbone
import
resnet_vd
@
manager
.
MODELS
.
add_component
class
PPMatting
(
nn
.
Layer
):
"""
The PPMattinh implementation based on PaddlePaddle.
The original article refers to
Guowei Chen, et, al. "PP-Matting: High-Accuracy Natural Image Matting"
(https://arxiv.org/pdf/2204.09433.pdf).
Args:
backbone: backbone model.
pretrained(str, optional): The path of pretrianed model. Defautl: None.
"""
def
__init__
(
self
,
backbone
,
pretrained
=
None
):
super
().
__init__
()
self
.
backbone
=
backbone
self
.
pretrained
=
pretrained
self
.
loss_func_dict
=
self
.
get_loss_func_dict
()
self
.
backbone_channels
=
backbone
.
feat_channels
self
.
scb
=
SCB
(
self
.
backbone_channels
[
-
1
])
self
.
hrdb
=
HRDB
(
self
.
backbone_channels
[
0
]
+
self
.
backbone_channels
[
1
],
scb_channels
=
self
.
scb
.
out_channels
,
gf_index
=
[
0
,
2
,
4
])
self
.
init_weight
()
def
forward
(
self
,
inputs
):
x
=
inputs
[
'img'
]
input_shape
=
paddle
.
shape
(
x
)
fea_list
=
self
.
backbone
(
x
)
scb_logits
=
self
.
scb
(
fea_list
[
-
1
])
semantic_map
=
F
.
softmax
(
scb_logits
[
-
1
],
axis
=
1
)
fea0
=
F
.
interpolate
(
fea_list
[
0
],
input_shape
[
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
fea1
=
F
.
interpolate
(
fea_list
[
1
],
input_shape
[
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
hrdb_input
=
paddle
.
concat
([
fea0
,
fea1
],
1
)
hrdb_logit
=
self
.
hrdb
(
hrdb_input
,
scb_logits
)
detail_map
=
F
.
sigmoid
(
hrdb_logit
)
fusion
=
self
.
fusion
(
semantic_map
,
detail_map
)
if
self
.
training
:
logit_dict
=
{
'semantic'
:
semantic_map
,
'detail'
:
detail_map
,
'fusion'
:
fusion
}
loss_dict
=
self
.
loss
(
logit_dict
,
inputs
)
return
logit_dict
,
loss_dict
else
:
return
fusion
def
get_loss_func_dict
(
self
):
loss_func_dict
=
defaultdict
(
list
)
loss_func_dict
[
'semantic'
].
append
(
nn
.
NLLLoss
())
loss_func_dict
[
'detail'
].
append
(
MRSD
())
loss_func_dict
[
'detail'
].
append
(
GradientLoss
())
loss_func_dict
[
'fusion'
].
append
(
MRSD
())
loss_func_dict
[
'fusion'
].
append
(
MRSD
())
loss_func_dict
[
'fusion'
].
append
(
GradientLoss
())
return
loss_func_dict
def
loss
(
self
,
logit_dict
,
label_dict
):
loss
=
{}
# semantic loss computation
# get semantic label
semantic_label
=
label_dict
[
'trimap'
]
semantic_label_trans
=
(
semantic_label
==
128
).
astype
(
'int64'
)
semantic_label_bg
=
(
semantic_label
==
0
).
astype
(
'int64'
)
semantic_label
=
semantic_label_trans
+
semantic_label_bg
*
2
loss_semantic
=
self
.
loss_func_dict
[
'semantic'
][
0
](
paddle
.
log
(
logit_dict
[
'semantic'
]
+
1e-6
),
semantic_label
.
squeeze
(
1
))
loss
[
'semantic'
]
=
loss_semantic
# detail loss computation
transparent
=
label_dict
[
'trimap'
]
==
128
detail_alpha_loss
=
self
.
loss_func_dict
[
'detail'
][
0
](
logit_dict
[
'detail'
],
label_dict
[
'alpha'
],
transparent
)
# gradient loss
detail_gradient_loss
=
self
.
loss_func_dict
[
'detail'
][
1
](
logit_dict
[
'detail'
],
label_dict
[
'alpha'
],
transparent
)
loss_detail
=
detail_alpha_loss
+
detail_gradient_loss
loss
[
'detail'
]
=
loss_detail
loss
[
'detail_alpha'
]
=
detail_alpha_loss
loss
[
'detail_gradient'
]
=
detail_gradient_loss
# fusion loss
loss_fusion_func
=
self
.
loss_func_dict
[
'fusion'
]
# fusion_sigmoid loss
fusion_alpha_loss
=
loss_fusion_func
[
0
](
logit_dict
[
'fusion'
],
label_dict
[
'alpha'
])
# composion loss
comp_pred
=
logit_dict
[
'fusion'
]
*
label_dict
[
'fg'
]
+
(
1
-
logit_dict
[
'fusion'
])
*
label_dict
[
'bg'
]
comp_gt
=
label_dict
[
'alpha'
]
*
label_dict
[
'fg'
]
+
(
1
-
label_dict
[
'alpha'
])
*
label_dict
[
'bg'
]
fusion_composition_loss
=
loss_fusion_func
[
1
](
comp_pred
,
comp_gt
)
# grandient loss
fusion_grad_loss
=
loss_fusion_func
[
2
](
logit_dict
[
'fusion'
],
label_dict
[
'alpha'
])
# fusion loss
loss_fusion
=
fusion_alpha_loss
+
fusion_composition_loss
+
fusion_grad_loss
loss
[
'fusion'
]
=
loss_fusion
loss
[
'fusion_alpha'
]
=
fusion_alpha_loss
loss
[
'fusion_composition'
]
=
fusion_composition_loss
loss
[
'fusion_gradient'
]
=
fusion_grad_loss
loss
[
'all'
]
=
0.25
*
loss_semantic
+
0.25
*
loss_detail
+
0.25
*
loss_fusion
return
loss
def
fusion
(
self
,
semantic_map
,
detail_map
):
# semantic_map [N, 3, H, W]
# In index, 0 is foreground, 1 is transition, 2 is backbone
# After fusion, the foreground is 1, the background is 0, and the transion is between [0, 1]
index
=
paddle
.
argmax
(
semantic_map
,
axis
=
1
,
keepdim
=
True
)
transition_mask
=
(
index
==
1
).
astype
(
'float32'
)
fg
=
(
index
==
0
).
astype
(
'float32'
)
alpha
=
detail_map
*
transition_mask
+
fg
return
alpha
def
init_weight
(
self
):
if
self
.
pretrained
is
not
None
:
utils
.
load_entire_model
(
self
,
self
.
pretrained
)
class
SCB
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
().
__init__
()
self
.
in_channels
=
[
512
+
in_channels
,
512
,
256
,
128
,
128
,
64
]
self
.
mid_channels
=
[
512
,
256
,
128
,
128
,
64
,
64
]
self
.
out_channels
=
[
256
,
128
,
64
,
64
,
64
,
3
]
self
.
psp_module
=
layers
.
PPModule
(
in_channels
,
512
,
bin_sizes
=
(
1
,
3
,
5
),
dim_reduction
=
False
,
align_corners
=
False
)
psp_upsamples
=
[
2
,
4
,
8
,
16
]
self
.
psps
=
nn
.
LayerList
([
self
.
conv_up_psp
(
512
,
self
.
out_channels
[
i
],
psp_upsamples
[
i
])
for
i
in
range
(
4
)
])
scb_list
=
[
self
.
_make_stage
(
self
.
in_channels
[
i
],
self
.
mid_channels
[
i
],
self
.
out_channels
[
i
],
padding
=
int
(
i
==
0
)
+
1
,
dilation
=
int
(
i
==
0
)
+
1
)
for
i
in
range
(
len
(
self
.
in_channels
)
-
1
)
]
scb_list
+=
[
nn
.
Sequential
(
layers
.
ConvBNReLU
(
self
.
in_channels
[
-
1
],
self
.
mid_channels
[
-
1
],
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
self
.
mid_channels
[
-
1
],
self
.
mid_channels
[
-
1
],
3
,
padding
=
1
),
nn
.
Conv2D
(
self
.
mid_channels
[
-
1
],
self
.
out_channels
[
-
1
],
3
,
padding
=
1
))
]
self
.
scb_stages
=
nn
.
LayerList
(
scb_list
)
def
forward
(
self
,
x
):
psp_x
=
self
.
psp_module
(
x
)
psps
=
[
psp
(
psp_x
)
for
psp
in
self
.
psps
]
scb_logits
=
[]
for
i
,
scb_stage
in
enumerate
(
self
.
scb_stages
):
if
i
==
0
:
x
=
scb_stage
(
paddle
.
concat
((
psp_x
,
x
),
1
))
elif
i
<=
len
(
psps
):
x
=
scb_stage
(
paddle
.
concat
((
psps
[
i
-
1
],
x
),
1
))
else
:
x
=
scb_stage
(
x
)
scb_logits
.
append
(
x
)
return
scb_logits
def
conv_up_psp
(
self
,
in_channels
,
out_channels
,
up_sample
):
return
nn
.
Sequential
(
layers
.
ConvBNReLU
(
in_channels
,
out_channels
,
3
,
padding
=
1
),
nn
.
Upsample
(
scale_factor
=
up_sample
,
mode
=
'bilinear'
,
align_corners
=
False
))
def
_make_stage
(
self
,
in_channels
,
mid_channels
,
out_channels
,
padding
=
1
,
dilation
=
1
):
layer_list
=
[
layers
.
ConvBNReLU
(
in_channels
,
mid_channels
,
3
,
padding
=
1
),
layers
.
ConvBNReLU
(
mid_channels
,
mid_channels
,
3
,
padding
=
padding
,
dilation
=
dilation
),
layers
.
ConvBNReLU
(
mid_channels
,
out_channels
,
3
,
padding
=
padding
,
dilation
=
dilation
),
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'bilinear'
,
align_corners
=
False
)
]
return
nn
.
Sequential
(
*
layer_list
)
class
HRDB
(
nn
.
Layer
):
"""
The High-Resolution Detail Branch
Args:
in_channels(int): The number of input channels.
scb_channels(list|tuple): The channels of scb logits
gf_index(list|tuple, optional): Which logit is selected as guidance flow from scb logits. Default: (0, 2, 4)
"""
def
__init__
(
self
,
in_channels
,
scb_channels
,
gf_index
=
(
0
,
2
,
4
)):
super
().
__init__
()
self
.
gf_index
=
gf_index
self
.
gf_list
=
nn
.
LayerList
(
[
nn
.
Conv2D
(
scb_channels
[
i
],
1
,
1
)
for
i
in
gf_index
])
channels
=
[
64
,
32
,
16
,
8
]
self
.
res_list
=
[
resnet_vd
.
BasicBlock
(
in_channels
,
channels
[
0
],
stride
=
1
,
shortcut
=
False
)
]
self
.
res_list
+=
[
resnet_vd
.
BasicBlock
(
i
,
i
,
stride
=
1
)
for
i
in
channels
[
1
:
-
1
]
]
self
.
res_list
=
nn
.
LayerList
(
self
.
res_list
)
self
.
convs
=
nn
.
LayerList
([
nn
.
Conv2D
(
channels
[
i
],
channels
[
i
+
1
],
kernel_size
=
1
)
for
i
in
range
(
len
(
channels
)
-
1
)
])
self
.
gates
=
nn
.
LayerList
(
[
GatedSpatailConv2d
(
i
,
i
)
for
i
in
channels
[
1
:]])
self
.
detail_conv
=
nn
.
Conv2D
(
channels
[
-
1
],
1
,
1
,
bias_attr
=
False
)
def
forward
(
self
,
x
,
scb_logits
):
for
i
in
range
(
len
(
self
.
res_list
)):
x
=
self
.
res_list
[
i
](
x
)
x
=
self
.
convs
[
i
](
x
)
gf
=
self
.
gf_list
[
i
](
scb_logits
[
self
.
gf_index
[
i
]])
gf
=
F
.
interpolate
(
gf
,
paddle
.
shape
(
x
)[
-
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
x
=
self
.
gates
[
i
](
x
,
gf
)
return
self
.
detail_conv
(
x
)
class
GatedSpatailConv2d
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
dilation
=
1
,
groups
=
1
,
bias_attr
=
False
):
super
().
__init__
()
self
.
_gate_conv
=
nn
.
Sequential
(
layers
.
SyncBatchNorm
(
in_channels
+
1
),
nn
.
Conv2D
(
in_channels
+
1
,
in_channels
+
1
,
kernel_size
=
1
),
nn
.
ReLU
(),
nn
.
Conv2D
(
in_channels
+
1
,
1
,
kernel_size
=
1
),
layers
.
SyncBatchNorm
(
1
),
nn
.
Sigmoid
())
self
.
conv
=
nn
.
Conv2D
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
dilation
=
dilation
,
groups
=
groups
,
bias_attr
=
bias_attr
)
def
forward
(
self
,
input_features
,
gating_features
):
cat
=
paddle
.
concat
([
input_features
,
gating_features
],
axis
=
1
)
alphas
=
self
.
_gate_conv
(
cat
)
x
=
input_features
*
(
alphas
+
1
)
x
=
self
.
conv
(
x
)
return
x
Matting/ppmatting/models/ppmattingv2.py
0 → 100644
View file @
0d97cc8c
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
functools
import
partial
from
collections
import
defaultdict
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
paddleseg
from
paddleseg
import
utils
from
paddleseg.models
import
layers
from
paddleseg.cvlibs
import
manager
from
paddleseg.models.backbones.transformer_utils
import
Identity
,
DropPath
from
ppmatting.models.layers
import
MLFF
from
ppmatting.models.losses
import
MRSD
,
GradientLoss
@
manager
.
MODELS
.
add_component
class
PPMattingV2
(
nn
.
Layer
):
"""
The PPMattingV2 implementation based on PaddlePaddle.
The original article refers to
TODO Guowei Chen, et, al. "" ().
Args:
backbone: backobne model.
pretrained(str, optional): The path of pretrianed model. Defautl: None.
dpp_len_trans(int, optional): The depth of transformer block in dpp(DoublePyramidPoolModule). Default: 1.
dpp_index(list, optional): The index of backone output which as the input in dpp. Default: [1, 2, 3, 4].
dpp_mid_channel(int, optional): The output channels of the first pyramid pool in dpp. Default: 256.
dpp_out_channel(int, optional): The output channels of dpp. Default: 512.
dpp_bin_sizes(list, optional): The output size of the second pyramid pool in dpp. Default: (2, 4, 6).
dpp_mlp_ratios(int, optional): The expandsion ratio of mlp in dpp. Default: 2.
dpp_attn_ratio(int, optional): The expandsion ratio of attention. Default: 2.
dpp_merge_type(str, optional): The merge type of the output of the second pyramid pool in dpp,
which should be one of (`concat`, `add`). Default: 'concat'.
mlff_merge_type(str, optional): The merge type of the multi features before output.
It should be one of ('add', 'concat'). Default: 'concat'.
"""
def
__init__
(
self
,
backbone
,
pretrained
=
None
,
dpp_len_trans
=
1
,
dpp_index
=
[
1
,
2
,
3
,
4
],
dpp_mid_channel
=
256
,
dpp_output_channel
=
512
,
dpp_bin_sizes
=
(
2
,
4
,
6
),
dpp_mlp_ratios
=
2
,
dpp_attn_ratio
=
2
,
dpp_merge_type
=
'concat'
,
mlff_merge_type
=
'concat'
,
decoder_channels
=
[
128
,
96
,
64
,
32
,
32
],
head_channel
=
32
):
super
().
__init__
()
self
.
backbone
=
backbone
self
.
backbone_channels
=
backbone
.
feat_channels
# check
assert
len
(
backbone
.
feat_channels
)
==
5
,
\
"Backbone should return 5 features with different scales"
assert
max
(
dpp_index
)
<
len
(
backbone
.
feat_channels
),
\
"The element of `dpp_index` should be less than the number of return features of backbone."
# dpp module
self
.
dpp_index
=
dpp_index
self
.
dpp
=
DoublePyramidPoolModule
(
stride
=
2
,
input_channel
=
sum
(
self
.
backbone_channels
[
i
]
for
i
in
self
.
dpp_index
),
mid_channel
=
dpp_mid_channel
,
output_channel
=
dpp_output_channel
,
len_trans
=
dpp_len_trans
,
bin_sizes
=
dpp_bin_sizes
,
mlp_ratios
=
dpp_mlp_ratios
,
attn_ratio
=
dpp_attn_ratio
,
merge_type
=
dpp_merge_type
)
# decoder
self
.
mlff32x
=
MLFF
(
in_channels
=
[
self
.
backbone_channels
[
-
1
],
dpp_output_channel
],
mid_channels
=
[
dpp_output_channel
,
dpp_output_channel
],
out_channel
=
decoder_channels
[
0
],
merge_type
=
mlff_merge_type
)
self
.
mlff16x
=
MLFF
(
in_channels
=
[
self
.
backbone_channels
[
-
2
],
decoder_channels
[
0
],
dpp_output_channel
],
mid_channels
=
[
decoder_channels
[
0
],
decoder_channels
[
0
],
decoder_channels
[
0
]
],
out_channel
=
decoder_channels
[
1
],
merge_type
=
mlff_merge_type
)
self
.
mlff8x
=
MLFF
(
in_channels
=
[
self
.
backbone_channels
[
-
3
],
decoder_channels
[
1
],
dpp_output_channel
],
mid_channels
=
[
decoder_channels
[
1
],
decoder_channels
[
1
],
decoder_channels
[
1
]
],
out_channel
=
decoder_channels
[
2
],
merge_type
=
mlff_merge_type
)
self
.
mlff4x
=
MLFF
(
in_channels
=
[
self
.
backbone_channels
[
-
4
],
decoder_channels
[
2
],
3
],
mid_channels
=
[
decoder_channels
[
2
],
decoder_channels
[
2
],
3
],
out_channel
=
decoder_channels
[
3
])
self
.
mlff2x
=
MLFF
(
in_channels
=
[
self
.
backbone_channels
[
-
5
],
decoder_channels
[
3
],
3
],
mid_channels
=
[
decoder_channels
[
3
],
decoder_channels
[
3
],
3
],
out_channel
=
decoder_channels
[
4
])
self
.
matting_head_mlff8x
=
MattingHead
(
in_chan
=
decoder_channels
[
2
],
mid_chan
=
32
)
self
.
matting_head_mlff2x
=
MattingHead
(
in_chan
=
decoder_channels
[
4
]
+
3
,
mid_chan
=
head_channel
,
mid_num
=
2
)
# loss
self
.
loss_func_dict
=
None
# pretrained
self
.
pretrained
=
pretrained
self
.
init_weight
()
def
forward
(
self
,
inputs
):
img
=
inputs
[
'img'
]
input_shape
=
paddle
.
shape
(
img
)
feats_backbone
=
self
.
backbone
(
img
)
# stdc1 [2x, 4x, 8x, 16x, 32x] [32, 64, 256, 512, 1024]
x
=
self
.
dpp
([
feats_backbone
[
i
]
for
i
in
self
.
dpp_index
])
dpp_out
=
x
input_32x
=
[
feats_backbone
[
-
1
],
x
]
x
=
self
.
mlff32x
(
input_32x
,
paddle
.
shape
(
feats_backbone
[
-
1
])[
-
2
:])
# 32x
input_16x
=
[
feats_backbone
[
-
2
],
x
,
dpp_out
]
x
=
self
.
mlff16x
(
input_16x
,
paddle
.
shape
(
feats_backbone
[
-
2
])[
-
2
:])
# 16x
input_8x
=
[
feats_backbone
[
-
3
],
x
,
dpp_out
]
x
=
self
.
mlff8x
(
input_8x
,
paddle
.
shape
(
feats_backbone
[
-
3
])[
-
2
:])
# 8x
mlff8x_output
=
x
input_4x
=
[
feats_backbone
[
-
4
],
x
]
input_4x
.
append
(
F
.
interpolate
(
img
,
feats_backbone
[
-
4
].
shape
[
2
:],
mode
=
'area'
))
x
=
self
.
mlff4x
(
input_4x
,
paddle
.
shape
(
feats_backbone
[
-
4
])[
-
2
:])
# 4x
input_2x
=
[
feats_backbone
[
-
5
],
x
]
input_2x
.
append
(
F
.
interpolate
(
img
,
feats_backbone
[
-
5
].
shape
[
2
:],
mode
=
'area'
))
x
=
self
.
mlff2x
(
input_2x
,
paddle
.
shape
(
feats_backbone
[
-
5
])[
-
2
:])
# 2x
x
=
F
.
interpolate
(
x
,
input_shape
[
-
2
:],
mode
=
'bilinear'
,
align_corners
=
False
)
x
=
paddle
.
concat
([
x
,
img
],
axis
=
1
)
alpha
=
self
.
matting_head_mlff2x
(
x
)
if
self
.
training
:
logit_dict
=
{}
logit_dict
[
'alpha'
]
=
alpha
logit_dict
[
'alpha_8x'
]
=
self
.
matting_head_mlff8x
(
mlff8x_output
)
loss_dict
=
self
.
loss
(
logit_dict
,
inputs
)
return
logit_dict
,
loss_dict
else
:
return
alpha
def
loss
(
self
,
logit_dict
,
label_dict
,
loss_func_dict
=
None
):
if
loss_func_dict
is
None
:
if
self
.
loss_func_dict
is
None
:
self
.
loss_func_dict
=
defaultdict
(
list
)
self
.
loss_func_dict
[
'alpha'
].
append
(
MRSD
())
self
.
loss_func_dict
[
'alpha'
].
append
(
GradientLoss
())
self
.
loss_func_dict
[
'alpha_8x'
].
append
(
MRSD
())
self
.
loss_func_dict
[
'alpha_8x'
].
append
(
GradientLoss
())
else
:
self
.
loss_func_dict
=
loss_func_dict
loss
=
{}
alpha_8x_label
=
F
.
interpolate
(
label_dict
[
'alpha'
],
size
=
logit_dict
[
'alpha_8x'
].
shape
[
-
2
:],
mode
=
'area'
,
align_corners
=
False
)
loss
[
'alpha_8x_mrsd'
]
=
self
.
loss_func_dict
[
'alpha_8x'
][
0
](
logit_dict
[
'alpha_8x'
],
alpha_8x_label
)
loss
[
'alpha_8x_grad'
]
=
self
.
loss_func_dict
[
'alpha_8x'
][
1
](
logit_dict
[
'alpha_8x'
],
alpha_8x_label
)
loss
[
'alpha_8x'
]
=
loss
[
'alpha_8x_mrsd'
]
+
loss
[
'alpha_8x_grad'
]
transition_mask
=
label_dict
[
'trimap'
]
==
128
loss
[
'alpha_mrsd'
]
=
self
.
loss_func_dict
[
'alpha'
][
0
](
logit_dict
[
'alpha'
],
label_dict
[
'alpha'
])
+
2
*
self
.
loss_func_dict
[
'alpha'
][
0
](
logit_dict
[
'alpha'
],
label_dict
[
'alpha'
],
transition_mask
)
loss
[
'alpha_grad'
]
=
self
.
loss_func_dict
[
'alpha'
][
1
](
logit_dict
[
'alpha'
],
label_dict
[
'alpha'
])
+
2
*
self
.
loss_func_dict
[
'alpha'
][
1
](
logit_dict
[
'alpha'
],
label_dict
[
'alpha'
],
transition_mask
)
loss
[
'alpha'
]
=
loss
[
'alpha_mrsd'
]
+
loss
[
'alpha_grad'
]
loss
[
'all'
]
=
loss
[
'alpha'
]
+
loss
[
'alpha_8x'
]
return
loss
def
init_weight
(
self
):
if
self
.
pretrained
is
not
None
:
utils
.
load_entire_model
(
self
,
self
.
pretrained
)
class
MattingHead
(
nn
.
Layer
):
def
__init__
(
self
,
in_chan
,
mid_chan
,
mid_num
=
1
,
out_channels
=
1
):
super
().
__init__
()
self
.
conv
=
layers
.
ConvBNReLU
(
in_chan
,
mid_chan
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias_attr
=
False
)
self
.
mid_conv
=
nn
.
LayerList
([
layers
.
ConvBNReLU
(
mid_chan
,
mid_chan
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias_attr
=
False
)
for
i
in
range
(
mid_num
-
1
)
])
self
.
conv_out
=
nn
.
Conv2D
(
mid_chan
,
out_channels
,
kernel_size
=
1
,
bias_attr
=
False
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
for
mid_conv
in
self
.
mid_conv
:
x
=
mid_conv
(
x
)
x
=
self
.
conv_out
(
x
)
x
=
F
.
sigmoid
(
x
)
return
x
class
DoublePyramidPoolModule
(
nn
.
Layer
):
"""
Extract global information through double pyramid pool structure and attention calculation by transformer block.
Args:
stride(int): The stride for the inputs.
input_channel(int): The total channels of input features.
mid_channel(int, optional): The output channels of the first pyramid pool. Default: 256.
out_channel(int, optional): The output channels. Default: 512.
len_trans(int, optional): The depth of transformer block. Default: 1.
bin_sizes(list, optional): The output size of the second pyramid pool. Default: (2, 4, 6).
mlp_ratios(int, optional): The expandsion ratio of the mlp. Default: 2.
attn_ratio(int, optional): The expandsion ratio of the attention. Default: 2.
merge_type(str, optional): The merge type of the output of the second pyramid pool, which should be one of (`concat`, `add`). Default: 'concat'.
align_corners(bool, optional): Whether to use `align_corners` when interpolating. Default: False.
"""
def
__init__
(
self
,
stride
,
input_channel
,
mid_channel
=
256
,
output_channel
=
512
,
len_trans
=
1
,
bin_sizes
=
(
2
,
4
,
6
),
mlp_ratios
=
2
,
attn_ratio
=
2
,
merge_type
=
'concat'
,
align_corners
=
False
):
super
().
__init__
()
self
.
mid_channel
=
mid_channel
self
.
align_corners
=
align_corners
self
.
mlp_rations
=
mlp_ratios
self
.
attn_ratio
=
attn_ratio
if
isinstance
(
len_trans
,
int
):
self
.
len_trans
=
[
len_trans
]
*
len
(
bin_sizes
)
elif
isinstance
(
len_trans
,
(
list
,
tuple
)):
self
.
len_trans
=
len_trans
if
len
(
len_trans
)
!=
len
(
bin_sizes
):
raise
ValueError
(
'If len_trans is list or tuple, the length should be same as bin_sizes'
)
else
:
raise
ValueError
(
'`len_trans` only support int, list and tuple type'
)
if
merge_type
not
in
[
'add'
,
'concat'
]:
raise
(
'`merge_type only support `add` or `concat`.'
)
self
.
merge_type
=
merge_type
self
.
pp1
=
PyramidPoolAgg
(
stride
=
stride
)
self
.
conv_mid
=
layers
.
ConvBN
(
input_channel
,
mid_channel
,
1
)
self
.
pp2
=
nn
.
LayerList
([
self
.
_make_stage
(
embdeding_channels
=
mid_channel
,
size
=
size
,
block_num
=
block_num
)
for
size
,
block_num
in
zip
(
bin_sizes
,
self
.
len_trans
)
])
if
self
.
merge_type
==
'concat'
:
in_chan
=
mid_channel
+
mid_channel
*
len
(
bin_sizes
)
else
:
in_chan
=
mid_channel
self
.
conv_out
=
layers
.
ConvBNReLU
(
in_chan
,
output_channel
,
kernel_size
=
1
)
def
_make_stage
(
self
,
embdeding_channels
,
size
,
block_num
):
prior
=
nn
.
AdaptiveAvgPool2D
(
output_size
=
size
)
if
size
==
1
:
trans
=
layers
.
ConvBNReLU
(
in_channels
=
embdeding_channels
,
out_channels
=
embdeding_channels
,
kernel_size
=
1
)
else
:
trans
=
BasicLayer
(
block_num
=
block_num
,
embedding_dim
=
embdeding_channels
,
key_dim
=
16
,
num_heads
=
8
,
mlp_ratios
=
self
.
mlp_rations
,
attn_ratio
=
self
.
attn_ratio
,
drop
=
0
,
attn_drop
=
0
,
drop_path
=
0
,
act_layer
=
nn
.
ReLU6
,
lr_mult
=
1.0
)
return
nn
.
Sequential
(
prior
,
trans
)
def
forward
(
self
,
inputs
):
x
=
self
.
pp1
(
inputs
)
pp2_input
=
self
.
conv_mid
(
x
)
cat_layers
=
[]
for
stage
in
self
.
pp2
:
x
=
stage
(
pp2_input
)
x
=
F
.
interpolate
(
x
,
paddle
.
shape
(
pp2_input
)[
2
:],
mode
=
'bilinear'
,
align_corners
=
self
.
align_corners
)
cat_layers
.
append
(
x
)
cat_layers
=
[
pp2_input
]
+
cat_layers
[::
-
1
]
if
self
.
merge_type
==
'concat'
:
cat
=
paddle
.
concat
(
cat_layers
,
axis
=
1
)
else
:
cat
=
sum
(
cat_layers
)
out
=
self
.
conv_out
(
cat
)
return
out
class
Conv2DBN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
ks
=
1
,
stride
=
1
,
pad
=
0
,
dilation
=
1
,
groups
=
1
,
bn_weight_init
=
1
,
lr_mult
=
1.0
):
super
().
__init__
()
conv_weight_attr
=
paddle
.
ParamAttr
(
learning_rate
=
lr_mult
)
self
.
c
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
ks
,
stride
=
stride
,
padding
=
pad
,
dilation
=
dilation
,
groups
=
groups
,
weight_attr
=
conv_weight_attr
,
bias_attr
=
False
)
bn_weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
bn_weight_init
),
learning_rate
=
lr_mult
)
bn_bias_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
Constant
(
0
),
learning_rate
=
lr_mult
)
self
.
bn
=
nn
.
BatchNorm2D
(
out_channels
,
weight_attr
=
bn_weight_attr
,
bias_attr
=
bn_bias_attr
)
def
forward
(
self
,
inputs
):
out
=
self
.
c
(
inputs
)
out
=
self
.
bn
(
out
)
return
out
class
MLP
(
nn
.
Layer
):
def
__init__
(
self
,
in_features
,
hidden_features
=
None
,
out_features
=
None
,
act_layer
=
nn
.
ReLU
,
drop
=
0.
,
lr_mult
=
1.0
):
super
().
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
Conv2DBN
(
in_features
,
hidden_features
,
lr_mult
=
lr_mult
)
param_attr
=
paddle
.
ParamAttr
(
learning_rate
=
lr_mult
)
self
.
dwconv
=
nn
.
Conv2D
(
hidden_features
,
hidden_features
,
3
,
1
,
1
,
groups
=
hidden_features
,
weight_attr
=
param_attr
,
bias_attr
=
param_attr
)
self
.
act
=
act_layer
()
self
.
fc2
=
Conv2DBN
(
hidden_features
,
out_features
,
lr_mult
=
lr_mult
)
self
.
drop
=
nn
.
Dropout
(
drop
)
def
forward
(
self
,
x
):
x
=
self
.
fc1
(
x
)
x
=
self
.
dwconv
(
x
)
x
=
self
.
act
(
x
)
x
=
self
.
drop
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
class
Attention
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
key_dim
,
num_heads
,
attn_ratio
=
4
,
activation
=
None
,
lr_mult
=
1.0
):
super
().
__init__
()
self
.
num_heads
=
num_heads
self
.
scale
=
key_dim
**-
0.5
self
.
key_dim
=
key_dim
self
.
nh_kd
=
nh_kd
=
key_dim
*
num_heads
self
.
d
=
int
(
attn_ratio
*
key_dim
)
self
.
dh
=
int
(
attn_ratio
*
key_dim
)
*
num_heads
self
.
attn_ratio
=
attn_ratio
self
.
to_q
=
Conv2DBN
(
dim
,
nh_kd
,
1
,
lr_mult
=
lr_mult
)
self
.
to_k
=
Conv2DBN
(
dim
,
nh_kd
,
1
,
lr_mult
=
lr_mult
)
self
.
to_v
=
Conv2DBN
(
dim
,
self
.
dh
,
1
,
lr_mult
=
lr_mult
)
self
.
proj
=
nn
.
Sequential
(
activation
(),
Conv2DBN
(
self
.
dh
,
dim
,
bn_weight_init
=
0
,
lr_mult
=
lr_mult
))
def
forward
(
self
,
x
):
x_shape
=
paddle
.
shape
(
x
)
H
,
W
=
x_shape
[
2
],
x_shape
[
3
]
qq
=
self
.
to_q
(
x
).
reshape
(
[
0
,
self
.
num_heads
,
self
.
key_dim
,
-
1
]).
transpose
([
0
,
1
,
3
,
2
])
kk
=
self
.
to_k
(
x
).
reshape
([
0
,
self
.
num_heads
,
self
.
key_dim
,
-
1
])
vv
=
self
.
to_v
(
x
).
reshape
([
0
,
self
.
num_heads
,
self
.
d
,
-
1
]).
transpose
(
[
0
,
1
,
3
,
2
])
attn
=
paddle
.
matmul
(
qq
,
kk
)
attn
=
F
.
softmax
(
attn
,
axis
=-
1
)
xx
=
paddle
.
matmul
(
attn
,
vv
)
xx
=
xx
.
transpose
([
0
,
1
,
3
,
2
]).
reshape
([
0
,
self
.
dh
,
H
,
W
])
xx
=
self
.
proj
(
xx
)
return
xx
class
Block
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
key_dim
,
num_heads
,
mlp_ratios
=
4.
,
attn_ratio
=
2.
,
drop
=
0.
,
drop_path
=
0.
,
act_layer
=
nn
.
ReLU
,
lr_mult
=
1.0
):
super
().
__init__
()
self
.
dim
=
dim
self
.
num_heads
=
num_heads
self
.
mlp_ratios
=
mlp_ratios
self
.
attn
=
Attention
(
dim
,
key_dim
=
key_dim
,
num_heads
=
num_heads
,
attn_ratio
=
attn_ratio
,
activation
=
act_layer
,
lr_mult
=
lr_mult
)
# NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
Identity
()
mlp_hidden_dim
=
int
(
dim
*
mlp_ratios
)
self
.
mlp
=
MLP
(
in_features
=
dim
,
hidden_features
=
mlp_hidden_dim
,
act_layer
=
act_layer
,
drop
=
drop
,
lr_mult
=
lr_mult
)
def
forward
(
self
,
x
):
h
=
x
x
=
self
.
attn
(
x
)
x
=
self
.
drop_path
(
x
)
x
=
h
+
x
h
=
x
x
=
self
.
mlp
(
x
)
x
=
self
.
drop_path
(
x
)
x
=
x
+
h
return
x
class
BasicLayer
(
nn
.
Layer
):
def
__init__
(
self
,
block_num
,
embedding_dim
,
key_dim
,
num_heads
,
mlp_ratios
=
4.
,
attn_ratio
=
2.
,
drop
=
0.
,
attn_drop
=
0.
,
drop_path
=
0.
,
act_layer
=
None
,
lr_mult
=
1.0
):
super
().
__init__
()
self
.
block_num
=
block_num
self
.
transformer_blocks
=
nn
.
LayerList
()
for
i
in
range
(
self
.
block_num
):
self
.
transformer_blocks
.
append
(
Block
(
embedding_dim
,
key_dim
=
key_dim
,
num_heads
=
num_heads
,
mlp_ratios
=
mlp_ratios
,
attn_ratio
=
attn_ratio
,
drop
=
drop
,
drop_path
=
drop_path
[
i
]
if
isinstance
(
drop_path
,
list
)
else
drop_path
,
act_layer
=
act_layer
,
lr_mult
=
lr_mult
))
def
forward
(
self
,
x
):
# token * N
for
i
in
range
(
self
.
block_num
):
x
=
self
.
transformer_blocks
[
i
](
x
)
return
x
class
PyramidPoolAgg
(
nn
.
Layer
):
def
__init__
(
self
,
stride
):
super
().
__init__
()
self
.
stride
=
stride
self
.
tmp
=
Identity
()
# avoid the error of paddle.flops
def
forward
(
self
,
inputs
):
'''
# The F.adaptive_avg_pool2d does not support the (H, W) be Tensor,
# so exporting the inference model will raise error.
_, _, H, W = inputs[-1].shape
H = (H - 1) // self.stride + 1
W = (W - 1) // self.stride + 1
return paddle.concat(
[F.adaptive_avg_pool2d(inp, (H, W)) for inp in inputs], axis=1)
'''
out
=
[]
ks
=
2
**
len
(
inputs
)
stride
=
self
.
stride
**
len
(
inputs
)
for
x
in
inputs
:
x
=
F
.
avg_pool2d
(
x
,
int
(
ks
),
int
(
stride
))
ks
/=
2
stride
/=
2
out
.
append
(
x
)
out
=
paddle
.
concat
(
out
,
axis
=
1
)
return
out
Matting/ppmatting/transforms/__init__.py
0 → 100644
View file @
0d97cc8c
from
.transforms
import
*
Matting/ppmatting/transforms/transforms.py
0 → 100644
View file @
0d97cc8c
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
random
import
string
import
cv2
import
numpy
as
np
from
paddleseg.transforms
import
functional
from
paddleseg.cvlibs
import
manager
from
paddleseg.utils
import
seg_env
from
PIL
import
Image
@
manager
.
TRANSFORMS
.
add_component
class
Compose
:
"""
Do transformation on input data with corresponding pre-processing and augmentation operations.
The shape of input data to all operations is [height, width, channels].
"""
def
__init__
(
self
,
transforms
,
to_rgb
=
True
):
if
not
isinstance
(
transforms
,
list
):
raise
TypeError
(
'The transforms must be a list!'
)
self
.
transforms
=
transforms
self
.
to_rgb
=
to_rgb
def
__call__
(
self
,
data
):
"""
Args:
data (dict): The data to transform.
Returns:
dict: Data after transformation
"""
if
'trans_info'
not
in
data
:
data
[
'trans_info'
]
=
[]
for
op
in
self
.
transforms
:
data
=
op
(
data
)
if
data
is
None
:
return
None
data
[
'img'
]
=
np
.
transpose
(
data
[
'img'
],
(
2
,
0
,
1
))
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
len
(
data
[
key
].
shape
)
==
2
:
continue
data
[
key
]
=
np
.
transpose
(
data
[
key
],
(
2
,
0
,
1
))
return
data
@
manager
.
TRANSFORMS
.
add_component
class
LoadImages
:
def
__init__
(
self
,
to_rgb
=
True
):
self
.
to_rgb
=
to_rgb
def
__call__
(
self
,
data
):
if
isinstance
(
data
[
'img'
],
str
):
data
[
'img'
]
=
cv2
.
imread
(
data
[
'img'
])
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
isinstance
(
data
[
key
],
str
):
data
[
key
]
=
cv2
.
imread
(
data
[
key
],
cv2
.
IMREAD_UNCHANGED
)
# if alpha and trimap has 3 channels, extract one.
if
key
in
[
'alpha'
,
'trimap'
]:
if
len
(
data
[
key
].
shape
)
>
2
:
data
[
key
]
=
data
[
key
][:,
:,
0
]
if
self
.
to_rgb
:
data
[
'img'
]
=
cv2
.
cvtColor
(
data
[
'img'
],
cv2
.
COLOR_BGR2RGB
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
len
(
data
[
key
].
shape
)
==
2
:
continue
data
[
key
]
=
cv2
.
cvtColor
(
data
[
key
],
cv2
.
COLOR_BGR2RGB
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
Resize
:
def
__init__
(
self
,
target_size
=
(
512
,
512
),
random_interp
=
False
):
if
isinstance
(
target_size
,
list
)
or
isinstance
(
target_size
,
tuple
):
if
len
(
target_size
)
!=
2
:
raise
ValueError
(
'`target_size` should include 2 elements, but it is {}'
.
format
(
target_size
))
else
:
raise
TypeError
(
"Type of `target_size` is invalid. It should be list or tuple, but it is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
self
.
random_interp
=
random_interp
self
.
interps
=
[
cv2
.
INTER_NEAREST
,
cv2
.
INTER_LINEAR
,
cv2
.
INTER_CUBIC
]
def
__call__
(
self
,
data
):
if
self
.
random_interp
:
interp
=
np
.
random
.
choice
(
self
.
interps
)
else
:
interp
=
cv2
.
INTER_LINEAR
data
[
'trans_info'
].
append
((
'resize'
,
data
[
'img'
].
shape
[
0
:
2
]))
data
[
'img'
]
=
functional
.
resize
(
data
[
'img'
],
self
.
target_size
,
interp
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize
(
data
[
key
],
self
.
target_size
,
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize
(
data
[
key
],
self
.
target_size
,
interp
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomResize
:
"""
Resize image to a size determinned by `scale` and `size`.
Args:
size(tuple|list): The reference size to resize. A tuple or list with length 2.
scale(tupel|list, optional): A range of scale base on `size`. A tuple or list with length 2. Default: None.
"""
def
__init__
(
self
,
size
=
None
,
scale
=
None
):
if
isinstance
(
size
,
list
)
or
isinstance
(
size
,
tuple
):
if
len
(
size
)
!=
2
:
raise
ValueError
(
'`size` should include 2 elements, but it is {}'
.
format
(
size
))
elif
size
is
not
None
:
raise
TypeError
(
"Type of `size` is invalid. It should be list or tuple, but it is {}"
.
format
(
type
(
size
)))
if
scale
is
not
None
:
if
isinstance
(
scale
,
list
)
or
isinstance
(
scale
,
tuple
):
if
len
(
scale
)
!=
2
:
raise
ValueError
(
'`scale` should include 2 elements, but it is {}'
.
format
(
scale
))
else
:
raise
TypeError
(
"Type of `scale` is invalid. It should be list or tuple, but it is {}"
.
format
(
type
(
scale
)))
self
.
size
=
size
self
.
scale
=
scale
def
__call__
(
self
,
data
):
h
,
w
=
data
[
'img'
].
shape
[:
2
]
if
self
.
scale
is
not
None
:
scale
=
np
.
random
.
uniform
(
self
.
scale
[
0
],
self
.
scale
[
1
])
else
:
scale
=
1.
if
self
.
size
is
not
None
:
scale_factor
=
max
(
self
.
size
[
0
]
/
w
,
self
.
size
[
1
]
/
h
)
else
:
scale_factor
=
1
scale
=
scale
*
scale_factor
w
=
int
(
round
(
w
*
scale
))
h
=
int
(
round
(
h
*
scale
))
data
[
'img'
]
=
functional
.
resize
(
data
[
'img'
],
(
w
,
h
))
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize
(
data
[
key
],
(
w
,
h
),
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize
(
data
[
key
],
(
w
,
h
))
return
data
@
manager
.
TRANSFORMS
.
add_component
class
ResizeByLong
:
"""
Resize the long side of an image to given size, and then scale the other side proportionally.
Args:
long_size (int): The target size of long side.
"""
def
__init__
(
self
,
long_size
):
self
.
long_size
=
long_size
def
__call__
(
self
,
data
):
data
[
'trans_info'
].
append
((
'resize'
,
data
[
'img'
].
shape
[
0
:
2
]))
data
[
'img'
]
=
functional
.
resize_long
(
data
[
'img'
],
self
.
long_size
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize_long
(
data
[
key
],
self
.
long_size
,
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize_long
(
data
[
key
],
self
.
long_size
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
ResizeByShort
:
"""
Resize the short side of an image to given size, and then scale the other side proportionally.
Args:
short_size (int): The target size of short side.
"""
def
__init__
(
self
,
short_size
):
self
.
short_size
=
short_size
def
__call__
(
self
,
data
):
data
[
'trans_info'
].
append
((
'resize'
,
data
[
'img'
].
shape
[
0
:
2
]))
data
[
'img'
]
=
functional
.
resize_short
(
data
[
'img'
],
self
.
short_size
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize_short
(
data
[
key
],
self
.
short_size
,
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize_short
(
data
[
key
],
self
.
short_size
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
ResizeToIntMult
:
"""
Resize to some int muitple, d.g. 32.
"""
def
__init__
(
self
,
mult_int
=
32
):
self
.
mult_int
=
mult_int
def
__call__
(
self
,
data
):
data
[
'trans_info'
].
append
((
'resize'
,
data
[
'img'
].
shape
[
0
:
2
]))
h
,
w
=
data
[
'img'
].
shape
[
0
:
2
]
rw
=
w
-
w
%
self
.
mult_int
rh
=
h
-
h
%
self
.
mult_int
data
[
'img'
]
=
functional
.
resize
(
data
[
'img'
],
(
rw
,
rh
))
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize
(
data
[
key
],
(
rw
,
rh
),
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize
(
data
[
key
],
(
rw
,
rh
))
return
data
@
manager
.
TRANSFORMS
.
add_component
class
Normalize
:
"""
Normalize an image.
Args:
mean (list, optional): The mean value of a data set. Default: [0.5, 0.5, 0.5].
std (list, optional): The standard deviation of a data set. Default: [0.5, 0.5, 0.5].
Raises:
ValueError: When mean/std is not list or any value in std is 0.
"""
def
__init__
(
self
,
mean
=
(
0.5
,
0.5
,
0.5
),
std
=
(
0.5
,
0.5
,
0.5
)):
self
.
mean
=
mean
self
.
std
=
std
if
not
(
isinstance
(
self
.
mean
,
(
list
,
tuple
))
and
isinstance
(
self
.
std
,
(
list
,
tuple
))):
raise
ValueError
(
"{}: input type is invalid. It should be list or tuple"
.
format
(
self
))
from
functools
import
reduce
if
reduce
(
lambda
x
,
y
:
x
*
y
,
self
.
std
)
==
0
:
raise
ValueError
(
'{}: std is invalid!'
.
format
(
self
))
def
__call__
(
self
,
data
):
mean
=
np
.
array
(
self
.
mean
)[
np
.
newaxis
,
np
.
newaxis
,
:]
std
=
np
.
array
(
self
.
std
)[
np
.
newaxis
,
np
.
newaxis
,
:]
data
[
'img'
]
=
functional
.
normalize
(
data
[
'img'
],
mean
,
std
)
if
'fg'
in
data
.
get
(
'gt_fields'
,
[]):
data
[
'fg'
]
=
functional
.
normalize
(
data
[
'fg'
],
mean
,
std
)
if
'bg'
in
data
.
get
(
'gt_fields'
,
[]):
data
[
'bg'
]
=
functional
.
normalize
(
data
[
'bg'
],
mean
,
std
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomCropByAlpha
:
"""
Randomly crop while centered on uncertain area by a certain probability.
Args:
crop_size (tuple|list): The size you want to crop from image.
p (float): The probability centered on uncertain area.
"""
def
__init__
(
self
,
crop_size
=
((
320
,
320
),
(
480
,
480
),
(
640
,
640
)),
prob
=
0.5
):
self
.
crop_size
=
crop_size
self
.
prob
=
prob
def
__call__
(
self
,
data
):
idex
=
np
.
random
.
randint
(
low
=
0
,
high
=
len
(
self
.
crop_size
))
crop_w
,
crop_h
=
self
.
crop_size
[
idex
]
img_h
=
data
[
'img'
].
shape
[
0
]
img_w
=
data
[
'img'
].
shape
[
1
]
if
np
.
random
.
rand
()
<
self
.
prob
:
crop_center
=
np
.
where
((
data
[
'alpha'
]
>
0
)
&
(
data
[
'alpha'
]
<
255
))
center_h_array
,
center_w_array
=
crop_center
if
len
(
center_h_array
)
==
0
:
return
data
rand_ind
=
np
.
random
.
randint
(
len
(
center_h_array
))
center_h
=
center_h_array
[
rand_ind
]
center_w
=
center_w_array
[
rand_ind
]
delta_h
=
crop_h
//
2
delta_w
=
crop_w
//
2
start_h
=
max
(
0
,
center_h
-
delta_h
)
start_w
=
max
(
0
,
center_w
-
delta_w
)
else
:
start_h
=
0
start_w
=
0
if
img_h
>
crop_h
:
start_h
=
np
.
random
.
randint
(
img_h
-
crop_h
+
1
)
if
img_w
>
crop_w
:
start_w
=
np
.
random
.
randint
(
img_w
-
crop_w
+
1
)
end_h
=
min
(
img_h
,
start_h
+
crop_h
)
end_w
=
min
(
img_w
,
start_w
+
crop_w
)
data
[
'img'
]
=
data
[
'img'
][
start_h
:
end_h
,
start_w
:
end_w
]
for
key
in
data
.
get
(
'gt_fields'
,
[]):
data
[
key
]
=
data
[
key
][
start_h
:
end_h
,
start_w
:
end_w
]
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomCrop
:
"""
Randomly crop
Args:
crop_size (tuple|list): The size you want to crop from image.
"""
def
__init__
(
self
,
crop_size
=
((
320
,
320
),
(
480
,
480
),
(
640
,
640
))):
if
not
isinstance
(
crop_size
[
0
],
(
list
,
tuple
)):
crop_size
=
[
crop_size
]
self
.
crop_size
=
crop_size
def
__call__
(
self
,
data
):
idex
=
np
.
random
.
randint
(
low
=
0
,
high
=
len
(
self
.
crop_size
))
crop_w
,
crop_h
=
self
.
crop_size
[
idex
]
img_h
,
img_w
=
data
[
'img'
].
shape
[
0
:
2
]
start_h
=
0
start_w
=
0
if
img_h
>
crop_h
:
start_h
=
np
.
random
.
randint
(
img_h
-
crop_h
+
1
)
if
img_w
>
crop_w
:
start_w
=
np
.
random
.
randint
(
img_w
-
crop_w
+
1
)
end_h
=
min
(
img_h
,
start_h
+
crop_h
)
end_w
=
min
(
img_w
,
start_w
+
crop_w
)
data
[
'img'
]
=
data
[
'img'
][
start_h
:
end_h
,
start_w
:
end_w
]
for
key
in
data
.
get
(
'gt_fields'
,
[]):
data
[
key
]
=
data
[
key
][
start_h
:
end_h
,
start_w
:
end_w
]
return
data
@
manager
.
TRANSFORMS
.
add_component
class
LimitLong
:
"""
Limit the long edge of image.
If the long edge is larger than max_long, resize the long edge
to max_long, while scale the short edge proportionally.
If the long edge is smaller than min_long, resize the long edge
to min_long, while scale the short edge proportionally.
Args:
max_long (int, optional): If the long edge of image is larger than max_long,
it will be resize to max_long. Default: None.
min_long (int, optional): If the long edge of image is smaller than min_long,
it will be resize to min_long. Default: None.
"""
def
__init__
(
self
,
max_long
=
None
,
min_long
=
None
):
if
max_long
is
not
None
:
if
not
isinstance
(
max_long
,
int
):
raise
TypeError
(
"Type of `max_long` is invalid. It should be int, but it is {}"
.
format
(
type
(
max_long
)))
if
min_long
is
not
None
:
if
not
isinstance
(
min_long
,
int
):
raise
TypeError
(
"Type of `min_long` is invalid. It should be int, but it is {}"
.
format
(
type
(
min_long
)))
if
(
max_long
is
not
None
)
and
(
min_long
is
not
None
):
if
min_long
>
max_long
:
raise
ValueError
(
'`max_long should not smaller than min_long, but they are {} and {}'
.
format
(
max_long
,
min_long
))
self
.
max_long
=
max_long
self
.
min_long
=
min_long
def
__call__
(
self
,
data
):
h
,
w
=
data
[
'img'
].
shape
[:
2
]
long_edge
=
max
(
h
,
w
)
target
=
long_edge
if
(
self
.
max_long
is
not
None
)
and
(
long_edge
>
self
.
max_long
):
target
=
self
.
max_long
elif
(
self
.
min_long
is
not
None
)
and
(
long_edge
<
self
.
min_long
):
target
=
self
.
min_long
data
[
'trans_info'
].
append
((
'resize'
,
data
[
'img'
].
shape
[
0
:
2
]))
if
target
!=
long_edge
:
data
[
'img'
]
=
functional
.
resize_long
(
data
[
'img'
],
target
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize_long
(
data
[
key
],
target
,
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize_long
(
data
[
key
],
target
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
LimitShort
:
"""
Limit the short edge of image.
If the short edge is larger than max_short, resize the short edge
to max_short, while scale the long edge proportionally.
If the short edge is smaller than min_short, resize the short edge
to min_short, while scale the long edge proportionally.
Args:
max_short (int, optional): If the short edge of image is larger than max_short,
it will be resize to max_short. Default: None.
min_short (int, optional): If the short edge of image is smaller than min_short,
it will be resize to min_short. Default: None.
"""
def
__init__
(
self
,
max_short
=
None
,
min_short
=
None
):
if
max_short
is
not
None
:
if
not
isinstance
(
max_short
,
int
):
raise
TypeError
(
"Type of `max_short` is invalid. It should be int, but it is {}"
.
format
(
type
(
max_short
)))
if
min_short
is
not
None
:
if
not
isinstance
(
min_short
,
int
):
raise
TypeError
(
"Type of `min_short` is invalid. It should be int, but it is {}"
.
format
(
type
(
min_short
)))
if
(
max_short
is
not
None
)
and
(
min_short
is
not
None
):
if
min_short
>
max_short
:
raise
ValueError
(
'`max_short should not smaller than min_short, but they are {} and {}'
.
format
(
max_short
,
min_short
))
self
.
max_short
=
max_short
self
.
min_short
=
min_short
def
__call__
(
self
,
data
):
h
,
w
=
data
[
'img'
].
shape
[:
2
]
short_edge
=
min
(
h
,
w
)
target
=
short_edge
if
(
self
.
max_short
is
not
None
)
and
(
short_edge
>
self
.
max_short
):
target
=
self
.
max_short
elif
(
self
.
min_short
is
not
None
)
and
(
short_edge
<
self
.
min_short
):
target
=
self
.
min_short
data
[
'trans_info'
].
append
((
'resize'
,
data
[
'img'
].
shape
[
0
:
2
]))
if
target
!=
short_edge
:
data
[
'img'
]
=
functional
.
resize_short
(
data
[
'img'
],
target
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
data
[
key
]
=
functional
.
resize_short
(
data
[
key
],
target
,
cv2
.
INTER_NEAREST
)
else
:
data
[
key
]
=
functional
.
resize_short
(
data
[
key
],
target
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomHorizontalFlip
:
"""
Flip an image horizontally with a certain probability.
Args:
prob (float, optional): A probability of horizontally flipping. Default: 0.5.
"""
def
__init__
(
self
,
prob
=
0.5
):
self
.
prob
=
prob
def
__call__
(
self
,
data
):
if
random
.
random
()
<
self
.
prob
:
data
[
'img'
]
=
functional
.
horizontal_flip
(
data
[
'img'
])
for
key
in
data
.
get
(
'gt_fields'
,
[]):
data
[
key
]
=
functional
.
horizontal_flip
(
data
[
key
])
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomBlur
:
"""
Blurring an image by a Gaussian function with a certain probability.
Args:
prob (float, optional): A probability of blurring an image. Default: 0.1.
"""
def
__init__
(
self
,
prob
=
0.1
):
self
.
prob
=
prob
def
__call__
(
self
,
data
):
if
self
.
prob
<=
0
:
n
=
0
elif
self
.
prob
>=
1
:
n
=
1
else
:
n
=
int
(
1.0
/
self
.
prob
)
if
n
>
0
:
if
np
.
random
.
randint
(
0
,
n
)
==
0
:
radius
=
np
.
random
.
randint
(
3
,
10
)
if
radius
%
2
!=
1
:
radius
=
radius
+
1
if
radius
>
9
:
radius
=
9
data
[
'img'
]
=
cv2
.
GaussianBlur
(
data
[
'img'
],
(
radius
,
radius
),
0
,
0
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
:
continue
data
[
key
]
=
cv2
.
GaussianBlur
(
data
[
key
],
(
radius
,
radius
),
0
,
0
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomDistort
:
"""
Distort an image with random configurations.
Args:
brightness_range (float, optional): A range of brightness. Default: 0.5.
brightness_prob (float, optional): A probability of adjusting brightness. Default: 0.5.
contrast_range (float, optional): A range of contrast. Default: 0.5.
contrast_prob (float, optional): A probability of adjusting contrast. Default: 0.5.
saturation_range (float, optional): A range of saturation. Default: 0.5.
saturation_prob (float, optional): A probability of adjusting saturation. Default: 0.5.
hue_range (int, optional): A range of hue. Default: 18.
hue_prob (float, optional): A probability of adjusting hue. Default: 0.5.
"""
def
__init__
(
self
,
brightness_range
=
0.5
,
brightness_prob
=
0.5
,
contrast_range
=
0.5
,
contrast_prob
=
0.5
,
saturation_range
=
0.5
,
saturation_prob
=
0.5
,
hue_range
=
18
,
hue_prob
=
0.5
):
self
.
brightness_range
=
brightness_range
self
.
brightness_prob
=
brightness_prob
self
.
contrast_range
=
contrast_range
self
.
contrast_prob
=
contrast_prob
self
.
saturation_range
=
saturation_range
self
.
saturation_prob
=
saturation_prob
self
.
hue_range
=
hue_range
self
.
hue_prob
=
hue_prob
def
__call__
(
self
,
data
):
brightness_lower
=
1
-
self
.
brightness_range
brightness_upper
=
1
+
self
.
brightness_range
contrast_lower
=
1
-
self
.
contrast_range
contrast_upper
=
1
+
self
.
contrast_range
saturation_lower
=
1
-
self
.
saturation_range
saturation_upper
=
1
+
self
.
saturation_range
hue_lower
=
-
self
.
hue_range
hue_upper
=
self
.
hue_range
ops
=
[
functional
.
brightness
,
functional
.
contrast
,
functional
.
saturation
,
functional
.
hue
]
random
.
shuffle
(
ops
)
params_dict
=
{
'brightness'
:
{
'brightness_lower'
:
brightness_lower
,
'brightness_upper'
:
brightness_upper
},
'contrast'
:
{
'contrast_lower'
:
contrast_lower
,
'contrast_upper'
:
contrast_upper
},
'saturation'
:
{
'saturation_lower'
:
saturation_lower
,
'saturation_upper'
:
saturation_upper
},
'hue'
:
{
'hue_lower'
:
hue_lower
,
'hue_upper'
:
hue_upper
}
}
prob_dict
=
{
'brightness'
:
self
.
brightness_prob
,
'contrast'
:
self
.
contrast_prob
,
'saturation'
:
self
.
saturation_prob
,
'hue'
:
self
.
hue_prob
}
im
=
data
[
'img'
].
astype
(
'uint8'
)
im
=
Image
.
fromarray
(
im
)
for
id
in
range
(
len
(
ops
)):
params
=
params_dict
[
ops
[
id
].
__name__
]
params
[
'im'
]
=
im
prob
=
prob_dict
[
ops
[
id
].
__name__
]
if
np
.
random
.
uniform
(
0
,
1
)
<
prob
:
im
=
ops
[
id
](
**
params
)
data
[
'img'
]
=
np
.
asarray
(
im
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
in
[
'alpha'
,
'trimap'
]:
continue
else
:
im
=
data
[
key
].
astype
(
'uint8'
)
im
=
Image
.
fromarray
(
im
)
for
id
in
range
(
len
(
ops
)):
params
=
params_dict
[
ops
[
id
].
__name__
]
params
[
'im'
]
=
im
prob
=
prob_dict
[
ops
[
id
].
__name__
]
if
np
.
random
.
uniform
(
0
,
1
)
<
prob
:
im
=
ops
[
id
](
**
params
)
data
[
key
]
=
np
.
asarray
(
im
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
Padding
:
"""
Add bottom-right padding to a raw image or annotation image.
Args:
target_size (list|tuple): The target size after padding.
im_padding_value (list, optional): The padding value of raw image.
Default: [127.5, 127.5, 127.5].
label_padding_value (int, optional): The padding value of annotation image. Default: 255.
Raises:
TypeError: When target_size is neither list nor tuple.
ValueError: When the length of target_size is not 2.
"""
def
__init__
(
self
,
target_size
,
im_padding_value
=
(
127.5
,
127.5
,
127.5
)):
if
isinstance
(
target_size
,
list
)
or
isinstance
(
target_size
,
tuple
):
if
len
(
target_size
)
!=
2
:
raise
ValueError
(
'`target_size` should include 2 elements, but it is {}'
.
format
(
target_size
))
else
:
raise
TypeError
(
"Type of target_size is invalid. It should be list or tuple, now is {}"
.
format
(
type
(
target_size
)))
self
.
target_size
=
target_size
self
.
im_padding_value
=
im_padding_value
def
__call__
(
self
,
data
):
im_height
,
im_width
=
data
[
'img'
].
shape
[
0
],
data
[
'img'
].
shape
[
1
]
target_height
=
self
.
target_size
[
1
]
target_width
=
self
.
target_size
[
0
]
pad_height
=
max
(
0
,
target_height
-
im_height
)
pad_width
=
max
(
0
,
target_width
-
im_width
)
data
[
'trans_info'
].
append
((
'padding'
,
data
[
'img'
].
shape
[
0
:
2
]))
if
(
pad_height
==
0
)
and
(
pad_width
==
0
):
return
data
else
:
data
[
'img'
]
=
cv2
.
copyMakeBorder
(
data
[
'img'
],
0
,
pad_height
,
0
,
pad_width
,
cv2
.
BORDER_CONSTANT
,
value
=
self
.
im_padding_value
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
in
[
'trimap'
,
'alpha'
]:
value
=
0
else
:
value
=
self
.
im_padding_value
data
[
key
]
=
cv2
.
copyMakeBorder
(
data
[
key
],
0
,
pad_height
,
0
,
pad_width
,
cv2
.
BORDER_CONSTANT
,
value
=
value
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomSharpen
:
def
__init__
(
self
,
prob
=
0.1
):
if
prob
<
0
:
self
.
prob
=
0
elif
prob
>
1
:
self
.
prob
=
1
else
:
self
.
prob
=
prob
def
__call__
(
self
,
data
):
if
np
.
random
.
rand
()
>
self
.
prob
:
return
data
radius
=
np
.
random
.
choice
([
0
,
3
,
5
,
7
,
9
])
w
=
np
.
random
.
uniform
(
0.1
,
0.5
)
blur_img
=
cv2
.
GaussianBlur
(
data
[
'img'
],
(
radius
,
radius
),
5
)
data
[
'img'
]
=
cv2
.
addWeighted
(
data
[
'img'
],
1
+
w
,
blur_img
,
-
w
,
0
)
for
key
in
data
.
get
(
'gt_fields'
,
[]):
if
key
==
'trimap'
or
key
==
'alpha'
:
continue
blur_img
=
cv2
.
GaussianBlur
(
data
[
key
],
(
0
,
0
),
5
)
data
[
key
]
=
cv2
.
addWeighted
(
data
[
key
],
1.5
,
blur_img
,
-
0.5
,
0
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomNoise
:
def
__init__
(
self
,
prob
=
0.1
):
if
prob
<
0
:
self
.
prob
=
0
elif
prob
>
1
:
self
.
prob
=
1
else
:
self
.
prob
=
prob
def
__call__
(
self
,
data
):
if
np
.
random
.
rand
()
>
self
.
prob
:
return
data
mean
=
np
.
random
.
uniform
(
0
,
0.04
)
var
=
np
.
random
.
uniform
(
0
,
0.001
)
noise
=
np
.
random
.
normal
(
mean
,
var
**
0.5
,
data
[
'img'
].
shape
)
*
255
data
[
'img'
]
=
data
[
'img'
]
+
noise
data
[
'img'
]
=
np
.
clip
(
data
[
'img'
],
0
,
255
)
return
data
@
manager
.
TRANSFORMS
.
add_component
class
RandomReJpeg
:
def
__init__
(
self
,
prob
=
0.1
):
if
prob
<
0
:
self
.
prob
=
0
elif
prob
>
1
:
self
.
prob
=
1
else
:
self
.
prob
=
prob
def
__call__
(
self
,
data
):
if
np
.
random
.
rand
()
>
self
.
prob
:
return
data
q
=
np
.
random
.
randint
(
70
,
95
)
img
=
data
[
'img'
].
astype
(
'uint8'
)
# Ensure no conflicts between processes
tmp_name
=
str
(
os
.
getpid
())
+
'.jpg'
tmp_name
=
os
.
path
.
join
(
seg_env
.
TMP_HOME
,
tmp_name
)
cv2
.
imwrite
(
tmp_name
,
img
,
[
int
(
cv2
.
IMWRITE_JPEG_QUALITY
),
q
])
data
[
'img'
]
=
cv2
.
imread
(
tmp_name
)
return
data
Matting/ppmatting/utils/__init__.py
0 → 100644
View file @
0d97cc8c
from
.estimate_foreground_ml
import
estimate_foreground_ml
from
.utils
import
get_files
,
get_image_list
,
mkdir
,
load_pretrained_model
Matting/ppmatting/utils/estimate_foreground_ml.py
0 → 100644
View file @
0d97cc8c
import
numpy
as
np
from
numba
import
njit
,
prange
# The foreground estimation refer to pymatting [https://github.com/pymatting/pymatting/blob/master/pymatting/foreground/estimate_foreground_ml.py]
@
njit
(
"void(f4[:, :, :], f4[:, :, :])"
,
cache
=
True
,
nogil
=
True
,
parallel
=
True
)
def
_resize_nearest_multichannel
(
dst
,
src
):
"""
Internal method.
Resize image src to dst using nearest neighbors filtering.
Images must have multiple color channels, i.e. :code:`len(shape) == 3`.
Parameters
----------
dst: numpy.ndarray of type np.float32
output image
src: numpy.ndarray of type np.float32
input image
"""
h_src
,
w_src
,
depth
=
src
.
shape
h_dst
,
w_dst
,
depth
=
dst
.
shape
for
y_dst
in
prange
(
h_dst
):
for
x_dst
in
range
(
w_dst
):
x_src
=
max
(
0
,
min
(
w_src
-
1
,
x_dst
*
w_src
//
w_dst
))
y_src
=
max
(
0
,
min
(
h_src
-
1
,
y_dst
*
h_src
//
h_dst
))
for
c
in
range
(
depth
):
dst
[
y_dst
,
x_dst
,
c
]
=
src
[
y_src
,
x_src
,
c
]
@
njit
(
"void(f4[:, :], f4[:, :])"
,
cache
=
True
,
nogil
=
True
,
parallel
=
True
)
def
_resize_nearest
(
dst
,
src
):
"""
Internal method.
Resize image src to dst using nearest neighbors filtering.
Images must be grayscale, i.e. :code:`len(shape) == 3`.
Parameters
----------
dst: numpy.ndarray of type np.float32
output image
src: numpy.ndarray of type np.float32
input image
"""
h_src
,
w_src
=
src
.
shape
h_dst
,
w_dst
=
dst
.
shape
for
y_dst
in
prange
(
h_dst
):
for
x_dst
in
range
(
w_dst
):
x_src
=
max
(
0
,
min
(
w_src
-
1
,
x_dst
*
w_src
//
w_dst
))
y_src
=
max
(
0
,
min
(
h_src
-
1
,
y_dst
*
h_src
//
h_dst
))
dst
[
y_dst
,
x_dst
]
=
src
[
y_src
,
x_src
]
# TODO
# There should be an option to switch @njit(parallel=True) on or off.
# parallel=True would be faster, but might cause race conditions.
# User should have the option to turn it on or off.
@
njit
(
"Tuple((f4[:, :, :], f4[:, :, :]))(f4[:, :, :], f4[:, :], f4, i4, i4, i4, f4)"
,
cache
=
True
,
nogil
=
True
)
def
_estimate_fb_ml
(
input_image
,
input_alpha
,
regularization
,
n_small_iterations
,
n_big_iterations
,
small_size
,
gradient_weight
,
):
h0
,
w0
,
depth
=
input_image
.
shape
dtype
=
np
.
float32
w_prev
=
1
h_prev
=
1
F_prev
=
np
.
empty
((
h_prev
,
w_prev
,
depth
),
dtype
=
dtype
)
B_prev
=
np
.
empty
((
h_prev
,
w_prev
,
depth
),
dtype
=
dtype
)
n_levels
=
int
(
np
.
ceil
(
np
.
log2
(
max
(
w0
,
h0
))))
for
i_level
in
range
(
n_levels
+
1
):
w
=
round
(
w0
**
(
i_level
/
n_levels
))
h
=
round
(
h0
**
(
i_level
/
n_levels
))
image
=
np
.
empty
((
h
,
w
,
depth
),
dtype
=
dtype
)
alpha
=
np
.
empty
((
h
,
w
),
dtype
=
dtype
)
_resize_nearest_multichannel
(
image
,
input_image
)
_resize_nearest
(
alpha
,
input_alpha
)
F
=
np
.
empty
((
h
,
w
,
depth
),
dtype
=
dtype
)
B
=
np
.
empty
((
h
,
w
,
depth
),
dtype
=
dtype
)
_resize_nearest_multichannel
(
F
,
F_prev
)
_resize_nearest_multichannel
(
B
,
B_prev
)
if
w
<=
small_size
and
h
<=
small_size
:
n_iter
=
n_small_iterations
else
:
n_iter
=
n_big_iterations
b
=
np
.
zeros
((
2
,
depth
),
dtype
=
dtype
)
dx
=
[
-
1
,
1
,
0
,
0
]
dy
=
[
0
,
0
,
-
1
,
1
]
for
i_iter
in
range
(
n_iter
):
for
y
in
prange
(
h
):
for
x
in
range
(
w
):
a0
=
alpha
[
y
,
x
]
a1
=
1.0
-
a0
a00
=
a0
*
a0
a01
=
a0
*
a1
# a10 = a01 can be omitted due to symmetry of matrix
a11
=
a1
*
a1
for
c
in
range
(
depth
):
b
[
0
,
c
]
=
a0
*
image
[
y
,
x
,
c
]
b
[
1
,
c
]
=
a1
*
image
[
y
,
x
,
c
]
for
d
in
range
(
4
):
x2
=
max
(
0
,
min
(
w
-
1
,
x
+
dx
[
d
]))
y2
=
max
(
0
,
min
(
h
-
1
,
y
+
dy
[
d
]))
gradient
=
abs
(
a0
-
alpha
[
y2
,
x2
])
da
=
regularization
+
gradient_weight
*
gradient
a00
+=
da
a11
+=
da
for
c
in
range
(
depth
):
b
[
0
,
c
]
+=
da
*
F
[
y2
,
x2
,
c
]
b
[
1
,
c
]
+=
da
*
B
[
y2
,
x2
,
c
]
determinant
=
a00
*
a11
-
a01
*
a01
inv_det
=
1.0
/
determinant
b00
=
inv_det
*
a11
b01
=
inv_det
*
-
a01
b11
=
inv_det
*
a00
for
c
in
range
(
depth
):
F_c
=
b00
*
b
[
0
,
c
]
+
b01
*
b
[
1
,
c
]
B_c
=
b01
*
b
[
0
,
c
]
+
b11
*
b
[
1
,
c
]
F_c
=
max
(
0.0
,
min
(
1.0
,
F_c
))
B_c
=
max
(
0.0
,
min
(
1.0
,
B_c
))
F
[
y
,
x
,
c
]
=
F_c
B
[
y
,
x
,
c
]
=
B_c
F_prev
=
F
B_prev
=
B
w_prev
=
w
h_prev
=
h
return
F
,
B
def
estimate_foreground_ml
(
image
,
alpha
,
regularization
=
1e-5
,
n_small_iterations
=
10
,
n_big_iterations
=
2
,
small_size
=
32
,
return_background
=
False
,
gradient_weight
=
1.0
,
):
"""Estimates the foreground of an image given its alpha matte.
See :cite:`germer2020multilevel` for reference.
Parameters
----------
image: numpy.ndarray
Input image with shape :math:`h
\\
times w
\\
times d`
alpha: numpy.ndarray
Input alpha matte shape :math:`h
\\
times w`
regularization: float
Regularization strength :math:`
\\
epsilon`, defaults to :math:`10^{-5}`.
Higher regularization results in smoother colors.
n_small_iterations: int
Number of iterations performed on small scale, defaults to :math:`10`
n_big_iterations: int
Number of iterations performed on large scale, defaults to :math:`2`
small_size: int
Threshold that determines at which size `n_small_iterations` should be used
return_background: bool
Whether to return the estimated background in addition to the foreground
gradient_weight: float
Larger values enforce smoother foregrounds, defaults to :math:`1`
Returns
-------
F: numpy.ndarray
Extracted foreground
B: numpy.ndarray
Extracted background
Example
-------
>>> from pymatting import *
>>> image = load_image("data/lemur/lemur.png", "RGB")
>>> alpha = load_image("data/lemur/lemur_alpha.png", "GRAY")
>>> F = estimate_foreground_ml(image, alpha, return_background=False)
>>> F, B = estimate_foreground_ml(image, alpha, return_background=True)
See Also
----
stack_images: This function can be used to place the foreground on a new background.
"""
foreground
,
background
=
_estimate_fb_ml
(
image
.
astype
(
np
.
float32
),
alpha
.
astype
(
np
.
float32
),
regularization
,
n_small_iterations
,
n_big_iterations
,
small_size
,
gradient_weight
,
)
if
return_background
:
return
foreground
,
background
return
foreground
Prev
1
…
12
13
14
15
16
17
18
19
20
…
31
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment