Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
aad3093a
Commit
aad3093a
authored
Oct 13, 2020
by
WenmuZhou
Browse files
dygraph first commit
parent
10f7e519
Changes
147
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
740 additions
and
2955 deletions
+740
-2955
ppocr/modeling/backbones/det_resnet_vd_sast.py
ppocr/modeling/backbones/det_resnet_vd_sast.py
+0
-274
ppocr/modeling/backbones/rec_mobilenet_v3.py
ppocr/modeling/backbones/rec_mobilenet_v3.py
+82
-201
ppocr/modeling/backbones/rec_resnet_fpn.py
ppocr/modeling/backbones/rec_resnet_fpn.py
+0
-246
ppocr/modeling/backbones/rec_resnet_vd.py
ppocr/modeling/backbones/rec_resnet_vd.py
+244
-203
ppocr/modeling/common_functions.py
ppocr/modeling/common_functions.py
+0
-95
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+17
-0
ppocr/modeling/heads/det_db_head.py
ppocr/modeling/heads/det_db_head.py
+101
-178
ppocr/modeling/heads/det_east_head.py
ppocr/modeling/heads/det_east_head.py
+0
-117
ppocr/modeling/heads/det_sast_head.py
ppocr/modeling/heads/det_sast_head.py
+0
-228
ppocr/modeling/heads/rec_attention_head.py
ppocr/modeling/heads/rec_attention_head.py
+0
-237
ppocr/modeling/heads/rec_ctc_head.py
ppocr/modeling/heads/rec_ctc_head.py
+38
-39
ppocr/modeling/heads/rec_seq_encoder.py
ppocr/modeling/heads/rec_seq_encoder.py
+0
-100
ppocr/modeling/heads/rec_srn_all_head.py
ppocr/modeling/heads/rec_srn_all_head.py
+0
-230
ppocr/modeling/heads/self_attention/__init__.py
ppocr/modeling/heads/self_attention/__init__.py
+0
-0
ppocr/modeling/heads/self_attention/model.py
ppocr/modeling/heads/self_attention/model.py
+0
-485
ppocr/modeling/losses/__init__.py
ppocr/modeling/losses/__init__.py
+19
-0
ppocr/modeling/losses/det_basic_loss.py
ppocr/modeling/losses/det_basic_loss.py
+193
-103
ppocr/modeling/losses/det_db_loss.py
ppocr/modeling/losses/det_db_loss.py
+46
-43
ppocr/modeling/losses/det_east_loss.py
ppocr/modeling/losses/det_east_loss.py
+0
-61
ppocr/modeling/losses/det_sast_loss.py
ppocr/modeling/losses/det_sast_loss.py
+0
-115
No files found.
ppocr/modeling/backbones/det_resnet_vd_sast.py
deleted
100644 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
__all__
=
[
"ResNet"
]
class
ResNet
(
object
):
def
__init__
(
self
,
params
):
"""
the Resnet backbone network for detection module.
Args:
params(dict): the super parameters for network build
"""
self
.
layers
=
params
[
'layers'
]
supported_layers
=
[
18
,
34
,
50
,
101
,
152
]
assert
self
.
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
self
.
layers
)
self
.
is_3x3
=
True
def
__call__
(
self
,
input
):
layers
=
self
.
layers
is_3x3
=
self
.
is_3x3
# if layers == 18:
# depth = [2, 2, 2, 2]
# elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
# elif layers == 101:
# depth = [3, 4, 23, 3]
# elif layers == 152:
# depth = [3, 8, 36, 3]
# elif layers == 200:
# depth = [3, 12, 48, 3]
# num_filters = [64, 128, 256, 512]
# outs = []
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
#, 3, 3]
elif
layers
==
34
or
layers
==
50
:
#depth = [3, 4, 6, 3]#, 3, 3]
depth
=
[
3
,
4
,
6
,
3
,
3
]
#, 3]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
#, 3, 3]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
#, 3, 3]
num_filters
=
[
64
,
128
,
256
,
512
,
512
]
#, 512]
blocks
=
{}
idx
=
'block_0'
blocks
[
idx
]
=
input
if
is_3x3
==
False
:
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
)
else
:
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
32
,
filter_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
'conv1_1'
)
conv
=
self
.
conv_bn_layer
(
input
=
conv
,
num_filters
=
32
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_2'
)
conv
=
self
.
conv_bn_layer
(
input
=
conv
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_3'
)
idx
=
'block_1'
blocks
[
idx
]
=
conv
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
,
200
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
conv
=
self
.
bottleneck_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
# outs.append(conv)
idx
=
'block_'
+
str
(
block
+
2
)
blocks
[
idx
]
=
conv
else
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
conv
=
self
.
basic_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
# outs.append(conv)
idx
=
'block_'
+
str
(
block
+
2
)
blocks
[
idx
]
=
conv
# return outs
return
blocks
def
conv_bn_layer
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
conv_bn_layer_new
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
pool
=
fluid
.
layers
.
pool2d
(
input
=
input
,
pool_size
=
2
,
pool_stride
=
2
,
pool_padding
=
0
,
pool_type
=
'avg'
,
ceil_mode
=
True
)
conv
=
fluid
.
layers
.
conv2d
(
input
=
pool
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
1
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
name
,
if_first
=
False
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
:
if
if_first
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
else
:
return
self
.
conv_bn_layer_new
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
elif
if_first
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
else
:
return
input
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
,
name
,
if_first
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
short
=
self
.
shortcut
(
input
,
num_filters
*
4
,
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
def
basic_block
(
self
,
input
,
num_filters
,
stride
,
name
,
if_first
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
short
=
self
.
shortcut
(
input
,
num_filters
,
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv1
,
act
=
'relu'
)
ppocr/modeling/backbones/rec_mobilenet_v3.py
100755 → 100644
View file @
aad3093a
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
Licensed under the Apache License, Version 2.0 (the "License");
#
you may not use this file except in compliance with the License.
#
You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
Unless required by applicable law or agreed to in writing, software
#
distributed under the License is distributed on an "AS IS" BASIS,
#
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
See the License for the specific language governing permissions and
#
limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
import
paddle.fluid
as
fluid
from
paddle.fluid.initializer
import
MSRA
from
paddle.fluid.param_attr
import
ParamAttr
from
ppocr.modeling.backbones.det_mobilenet_v3
import
ResidualUnit
,
ConvBNLayer
,
make_divisible
__all__
=
[
'MobileNetV3'
,
'MobileNetV3_small_x0_35'
,
'MobileNetV3_small_x0_5'
,
'MobileNetV3_small_x0_75'
,
'MobileNetV3_small_x1_0'
,
'MobileNetV3_small_x1_25'
,
'MobileNetV3_large_x0_35'
,
'MobileNetV3_large_x0_5'
,
'MobileNetV3_large_x0_75'
,
'MobileNetV3_large_x1_0'
,
'MobileNetV3_large_x1_25'
]
__all__
=
[
'MobileNetV3'
]
class
MobileNetV3
():
def
__init__
(
self
,
params
):
self
.
scale
=
params
.
get
(
"scale"
,
0.5
)
model_name
=
params
.
get
(
"model_name"
,
"small"
)
large_stride
=
params
.
get
(
"large_stride"
,
[
1
,
2
,
2
,
2
])
small_stride
=
params
.
get
(
"small_stride"
,
[
2
,
2
,
2
,
2
])
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
'small'
,
scale
=
0.5
,
large_stride
=
None
,
small_stride
=
None
,
**
kwargs
):
super
(
MobileNetV3
,
self
).
__init__
()
if
small_stride
is
None
:
small_stride
=
[
2
,
2
,
2
,
2
]
if
large_stride
is
None
:
large_stride
=
[
1
,
2
,
2
,
2
]
assert
isinstance
(
large_stride
,
list
),
"large_stride type must "
\
"be list but got {}"
.
format
(
type
(
large_stride
))
"be list but got {}"
.
format
(
type
(
large_stride
))
assert
isinstance
(
small_stride
,
list
),
"small_stride type must "
\
"be list but got {}"
.
format
(
type
(
small_stride
))
"be list but got {}"
.
format
(
type
(
small_stride
))
assert
len
(
large_stride
)
==
4
,
"large_stride length must be "
\
"4 but got {}"
.
format
(
len
(
large_stride
))
"4 but got {}"
.
format
(
len
(
large_stride
))
assert
len
(
small_stride
)
==
4
,
"small_stride length must be "
\
"4 but got {}"
.
format
(
len
(
small_stride
))
"4 but got {}"
.
format
(
len
(
small_stride
))
self
.
inplanes
=
16
if
model_name
==
"large"
:
self
.
cfg
=
[
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
large_stride
[
0
]],
[
3
,
64
,
24
,
False
,
'relu'
,
(
large_stride
[
1
],
1
)],
...
...
@@ -65,10 +61,9 @@ class MobileNetV3():
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
]
self
.
cls_ch_squeeze
=
960
self
.
cls_ch_expand
=
1280
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
self
.
cfg
=
[
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
(
small_stride
[
0
],
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
(
small_stride
[
1
],
1
)],
...
...
@@ -82,186 +77,72 @@ class MobileNetV3():
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
]
self
.
cls_ch_squeeze
=
576
self
.
cls_ch_expand
=
1280
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
self
.
scale
in
supported_scale
,
\
"supported scales are {} but input scale is {}"
.
format
(
supported_scale
,
self
.
scale
)
def
__call__
(
self
,
input
):
scale
=
self
.
scale
inplanes
=
self
.
inplanes
cfg
=
self
.
cfg
cls_ch_squeeze
=
self
.
cls_ch_squeeze
cls_ch_expand
=
self
.
cls_ch_expand
#conv1
conv
=
self
.
conv_bn_layer
(
input
,
filter_size
=
3
,
num_filters
=
self
.
make_divisible
(
inplanes
*
scale
),
assert
scale
in
supported_scale
,
\
"supported scales are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
num_
groups
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hard_swish'
,
name
=
'conv1'
)
i
=
0
inplanes
=
self
.
make_divisible
(
inplanes
*
scale
)
for
layer_cfg
in
cfg
:
conv
=
self
.
residual_unit
(
input
=
conv
,
num_in_filter
=
inplanes
,
num_mid_filter
=
self
.
make_divisible
(
scale
*
layer_cfg
[
1
]),
num_out_filter
=
self
.
make_divisible
(
scale
*
layer_cfg
[
2
]),
act
=
layer_cfg
[
4
],
stride
=
layer_cfg
[
5
],
filter_size
=
layer_cfg
[
0
],
use_se
=
layer_cfg
[
3
],
name
=
'conv'
+
str
(
i
+
2
))
inplanes
=
self
.
make_divisible
(
scale
*
layer_cfg
[
2
])
block_list
=
[]
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
cfg
:
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
'conv'
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
conv
=
self
.
conv_bn_l
ayer
(
in
put
=
conv
,
filter_size
=
1
,
num_filters
=
self
.
make_divisible
(
scale
*
cls_ch_squeeze
)
,
self
.
conv
2
=
ConvBNL
ayer
(
in
_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
num_
groups
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hard_swish'
,
name
=
'conv_last'
)
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
2
,
pool_stride
=
2
,
pool_padding
=
0
,
pool_type
=
'max'
)
return
conv
def
conv_bn_layer
(
self
,
input
,
filter_size
,
num_filters
,
stride
,
padding
,
num_groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
,
use_cudnn
=
True
,
res_last_bn_init
=
False
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
act
=
None
,
use_cudnn
=
use_cudnn
,
param_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
bn_name
=
name
+
'_bn'
bn
=
fluid
.
layers
.
batch_norm
(
input
=
conv
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
,
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
0.0
)),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
,
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
0.0
)),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
if
if_act
:
if
act
==
'relu'
:
bn
=
fluid
.
layers
.
relu
(
bn
)
elif
act
==
'hard_swish'
:
bn
=
fluid
.
layers
.
hard_swish
(
bn
)
return
bn
def
make_divisible
(
self
,
v
,
divisor
=
8
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
def
se_block
(
self
,
input
,
num_out_filter
,
ratio
=
4
,
name
=
None
):
num_mid_filter
=
num_out_filter
//
ratio
pool
=
fluid
.
layers
.
pool2d
(
input
=
input
,
pool_type
=
'avg'
,
global_pooling
=
True
,
use_cudnn
=
False
)
conv1
=
fluid
.
layers
.
conv2d
(
input
=
pool
,
filter_size
=
1
,
num_filters
=
num_mid_filter
,
act
=
'relu'
,
param_attr
=
ParamAttr
(
name
=
name
+
'_1_weights'
),
bias_attr
=
ParamAttr
(
name
=
name
+
'_1_offset'
))
conv2
=
fluid
.
layers
.
conv2d
(
input
=
conv1
,
filter_size
=
1
,
num_filters
=
num_out_filter
,
act
=
'hard_sigmoid'
,
param_attr
=
ParamAttr
(
name
=
name
+
'_2_weights'
),
bias_attr
=
ParamAttr
(
name
=
name
+
'_2_offset'
))
scale
=
fluid
.
layers
.
elementwise_mul
(
x
=
input
,
y
=
conv2
,
axis
=
0
)
return
scale
def
residual_unit
(
self
,
input
,
num_in_filter
,
num_mid_filter
,
num_out_filter
,
stride
,
filter_size
,
act
=
None
,
use_se
=
False
,
name
=
None
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
filter_size
=
1
,
num_filters
=
num_mid_filter
,
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
'_expand'
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
filter_size
=
filter_size
,
num_filters
=
num_mid_filter
,
stride
=
stride
,
padding
=
int
((
filter_size
-
1
)
//
2
),
if_act
=
True
,
act
=
act
,
num_groups
=
num_mid_filter
,
use_cudnn
=
False
,
name
=
name
+
'_depthwise'
)
if
use_se
:
conv1
=
self
.
se_block
(
input
=
conv1
,
num_out_filter
=
num_mid_filter
,
name
=
name
+
'_se'
)
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
filter_size
=
1
,
num_filters
=
num_out_filter
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
name
=
name
+
'_linear'
,
res_last_bn_init
=
True
)
if
num_in_filter
!=
num_out_filter
or
stride
!=
1
:
return
conv2
else
:
return
fluid
.
layers
.
elementwise_add
(
x
=
input
,
y
=
conv2
,
act
=
None
)
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
blocks
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
pool
(
x
)
return
x
if
__name__
==
'__main__'
:
import
paddle
paddle
.
disable_static
()
x
=
paddle
.
zeros
((
1
,
3
,
32
,
320
))
x
=
paddle
.
to_variable
(
x
)
net
=
MobileNetV3
(
model_name
=
'small'
,
small_stride
=
[
1
,
2
,
2
,
2
])
y
=
net
(
x
)
print
(
y
.
shape
)
ppocr/modeling/backbones/rec_resnet_fpn.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
__all__
=
[
"ResNet"
,
"ResNet18"
,
"ResNet34"
,
"ResNet50"
,
"ResNet101"
,
"ResNet152"
]
Trainable
=
True
w_nolr
=
fluid
.
ParamAttr
(
trainable
=
Trainable
)
train_parameters
=
{
"input_size"
:
[
3
,
224
,
224
],
"input_mean"
:
[
0.485
,
0.456
,
0.406
],
"input_std"
:
[
0.229
,
0.224
,
0.225
],
"learning_strategy"
:
{
"name"
:
"piecewise_decay"
,
"batch_size"
:
256
,
"epochs"
:
[
30
,
60
,
90
],
"steps"
:
[
0.1
,
0.01
,
0.001
,
0.0001
]
}
}
class
ResNet
():
def
__init__
(
self
,
params
):
self
.
layers
=
params
[
'layers'
]
self
.
params
=
train_parameters
def
__call__
(
self
,
input
):
layers
=
self
.
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
stride_list
=
[(
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
1
,
1
)]
num_filters
=
[
64
,
128
,
256
,
512
]
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1"
)
F
=
[]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
conv
=
self
.
bottleneck_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
stride_list
[
block
]
if
i
==
0
else
1
,
name
=
conv_name
)
F
.
append
(
conv
)
else
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
conv
=
self
.
basic_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
stride
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
F
.
append
(
conv
)
base
=
F
[
-
1
]
for
i
in
[
-
2
,
-
3
]:
b
,
c
,
w
,
h
=
F
[
i
].
shape
if
(
w
,
h
)
==
base
.
shape
[
2
:]:
base
=
base
else
:
base
=
fluid
.
layers
.
conv2d_transpose
(
input
=
base
,
num_filters
=
c
,
filter_size
=
4
,
stride
=
2
,
padding
=
1
,
act
=
None
,
param_attr
=
w_nolr
,
bias_attr
=
w_nolr
)
base
=
fluid
.
layers
.
batch_norm
(
base
,
act
=
"relu"
,
param_attr
=
w_nolr
,
bias_attr
=
w_nolr
)
base
=
fluid
.
layers
.
concat
([
base
,
F
[
i
]],
axis
=
1
)
base
=
fluid
.
layers
.
conv2d
(
base
,
num_filters
=
c
,
filter_size
=
1
,
param_attr
=
w_nolr
,
bias_attr
=
w_nolr
)
base
=
fluid
.
layers
.
conv2d
(
base
,
num_filters
=
c
,
filter_size
=
3
,
padding
=
1
,
param_attr
=
w_nolr
,
bias_attr
=
w_nolr
)
base
=
fluid
.
layers
.
batch_norm
(
base
,
act
=
"relu"
,
param_attr
=
w_nolr
,
bias_attr
=
w_nolr
)
base
=
fluid
.
layers
.
conv2d
(
base
,
num_filters
=
512
,
filter_size
=
1
,
bias_attr
=
w_nolr
,
param_attr
=
w_nolr
)
return
base
def
conv_bn_layer
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
2
if
stride
==
(
1
,
1
)
else
filter_size
,
dilation
=
2
if
stride
==
(
1
,
1
)
else
1
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
,
trainable
=
Trainable
),
bias_attr
=
False
,
name
=
name
+
'.conv2d.output.1'
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
name
=
bn_name
+
'.output.1'
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
,
trainable
=
Trainable
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
,
trainable
=
Trainable
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
,
)
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
is_first
,
name
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
!=
1
or
is_first
==
True
:
if
stride
==
(
1
,
1
):
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
1
,
name
=
name
)
else
:
#stride == (2,2)
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
else
:
return
input
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
,
name
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
conv2
=
self
.
conv_bn_layer
(
input
=
conv1
,
num_filters
=
num_filters
*
4
,
filter_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
short
=
self
.
shortcut
(
input
,
num_filters
*
4
,
stride
,
is_first
=
False
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
,
name
=
name
+
".add.output.5"
)
def
basic_block
(
self
,
input
,
num_filters
,
stride
,
is_first
,
name
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_layer
(
input
=
conv0
,
num_filters
=
num_filters
,
filter_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
short
=
self
.
shortcut
(
input
,
num_filters
,
stride
,
is_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv1
,
act
=
'relu'
)
ppocr/modeling/backbones/rec_resnet_vd.py
100755 → 100644
View file @
aad3093a
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
Licensed under the Apache License, Version 2.0 (the "License");
#
you may not use this file except in compliance with the License.
#
You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
Unless required by applicable law or agreed to in writing, software
#
distributed under the License is distributed on an "AS IS" BASIS,
#
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
See the License for the specific language governing permissions and
#
limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
from
paddle
import
nn
,
ParamAttr
from
paddle.nn
import
functional
as
F
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
__all__
=
[
"ResNet"
]
__all__
=
[
"ResNet"
,
"ResNet18_vd"
,
"ResNet34_vd"
,
"ResNet50_vd"
,
"ResNet101_vd"
,
"ResNet152_vd"
,
"ResNet200_vd"
]
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
34
):
super
(
ResNet
,
self
).
__init__
()
supported_layers
=
{
18
:
{
'depth'
:
[
2
,
2
,
2
,
2
],
'block_class'
:
BasicBlock
},
34
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BasicBlock
},
50
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BottleneckBlock
},
101
:
{
'depth'
:
[
3
,
4
,
23
,
3
],
'block_class'
:
BottleneckBlock
},
152
:
{
'depth'
:
[
3
,
8
,
36
,
3
],
'block_class'
:
BottleneckBlock
},
200
:
{
'depth'
:
[
3
,
12
,
48
,
3
],
'block_class'
:
BottleneckBlock
}
}
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
.
keys
(),
layers
)
is_3x3
=
True
class
ResNet
():
def
__init__
(
self
,
params
):
self
.
layers
=
params
[
'layers'
]
self
.
is_3x3
=
True
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
self
.
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
self
.
layers
)
def
__call__
(
self
,
input
):
is_3x3
=
self
.
is_3x3
layers
=
self
.
layers
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_filters
=
[
64
,
128
,
256
,
512
]
depth
=
supported_layers
[
layers
][
'depth'
]
block_class
=
supported_layers
[
layers
][
'block_class'
]
conv
=
[]
if
is_3x3
==
False
:
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
64
,
filter_size
=
7
,
stride
=
1
,
act
=
'relu'
)
conv
.
append
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
64
,
kernel_size
=
7
,
stride
=
1
,
act
=
'relu'
))
else
:
conv
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
32
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_1'
)
conv
=
self
.
conv_bn_layer
(
input
=
conv
,
num_filters
=
32
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_2'
)
conv
=
self
.
conv_bn_layer
(
input
=
conv
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_3'
)
conv
.
append
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_1'
))
conv
.
append
(
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_2'
))
conv
.
append
(
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_3'
))
self
.
conv1
=
nn
.
Sequential
(
*
conv
)
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
3
,
pool_stride
=
2
,
pool_padding
=
1
,
pool_type
=
'max'
)
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
)
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
,
200
]
and
block
==
2
:
block_list
=
[]
in_ch
=
64
for
block_index
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block_index
]):
if
layers
>=
50
:
if
layers
in
[
101
,
152
,
200
]
and
block_index
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
conv_name
=
"res"
+
str
(
block
_index
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
conv
=
self
.
bottleneck_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
stride
=
stride
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
else
:
for
block
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
"b"
+
str
(
i
)
else
:
stride
=
(
1
,
1
)
conv
=
self
.
basic_block
(
input
=
conv
,
num_filters
=
num_filters
[
block
],
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
chr
(
97
+
i
)
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block_index
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
block_list
.
append
(
block_class
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block_index
],
stride
=
stride
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
)
if_first
=
block_index
==
i
==
0
,
name
=
conv_name
))
in_ch
=
block_list
[
-
1
].
out_channels
self
.
block_list
=
nn
.
Sequential
(
*
block_list
)
self
.
add_sublayer
(
sublayer
=
self
.
block_list
,
name
=
"block_list"
)
self
.
pool_out
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
in_ch
conv
=
fluid
.
layers
.
pool2d
(
input
=
conv
,
pool_size
=
2
,
pool_stride
=
2
,
pool_padding
=
0
,
pool_type
=
'max'
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
pool
(
x
)
x
=
self
.
block_list
(
x
)
x
=
self
.
pool_out
(
x
)
return
x
return
conv
def
conv_bn_layer
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
filter
_size
-
1
)
//
2
,
padding
=
(
kernel
_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
b
atch
_n
orm
(
input
=
conv
,
self
.
bn
=
nn
.
B
atch
N
orm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
def
conv_bn_layer_new
(
self
,
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
pool
=
fluid
.
layers
.
pool2d
(
input
=
input
,
pool_size
=
stride
,
pool_stride
=
stride
,
pool_padding
=
0
,
pool_type
=
'avg'
,
ceil_mode
=
True
)
conv
=
fluid
.
layers
.
conv2d
(
input
=
pool
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
class
ConvBNLayerNew
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayerNew
,
self
).
__init__
()
self
.
pool
=
nn
.
AvgPool2d
(
kernel_size
=
stride
,
stride
=
stride
,
padding
=
0
,
ceil_mode
=
True
)
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
1
,
padding
=
(
filter
_size
-
1
)
//
2
,
padding
=
(
kernel
_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
return
fluid
.
layers
.
b
atch
_n
orm
(
input
=
conv
,
self
.
bn
=
nn
.
B
atch
N
orm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
pool
(
x
)
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ShortCut
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
=
False
):
super
(
ShortCut
,
self
).
__init__
()
self
.
use_conv
=
True
def
shortcut
(
self
,
input
,
ch_out
,
stride
,
name
,
if_first
=
False
):
ch_in
=
input
.
shape
[
1
]
if
ch_in
!=
ch_out
or
stride
[
0
]
!=
1
:
if
in_channels
!=
out_channels
or
stride
[
0
]
!=
1
:
if
if_first
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
return
self
.
conv
_bn_l
ayer
_n
ew
(
in
put
,
ch_out
,
1
,
stride
,
name
=
name
)
self
.
conv
=
ConvBNL
ayer
N
ew
(
in
_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
elif
if_first
:
return
self
.
conv_bn_layer
(
input
,
ch_out
,
1
,
stride
,
name
=
name
)
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
return
input
self
.
use_conv
=
False
def
bottleneck_block
(
self
,
input
,
num_filters
,
stride
,
name
,
if_first
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
1
,
def
forward
(
self
,
x
):
if
self
.
use_conv
:
x
=
self
.
conv
(
x
)
return
x
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_l
ayer
(
in
put
=
conv0
,
num_filters
=
num_filter
s
,
filter
_size
=
3
,
self
.
conv1
=
ConvBNL
ayer
(
in
_channels
=
out_channels
,
out_channels
=
out_channel
s
,
kernel
_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
conv2
=
self
.
conv_bn_l
ayer
(
in
put
=
conv1
,
num_filters
=
num_filter
s
*
4
,
filter
_size
=
1
,
self
.
conv2
=
ConvBNL
ayer
(
in
_channels
=
out_channels
,
out_channels
=
out_channel
s
*
4
,
kernel
_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
short
=
self
.
short
c
ut
(
in
put
,
num_filter
s
*
4
,
stride
,
self
.
short
=
ShortC
ut
(
in
_channels
=
in_channels
,
out_channels
=
out_channel
s
*
4
,
stride
=
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
*
4
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv2
,
act
=
'relu'
)
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
self
.
conv2
(
y
)
y
=
y
+
self
.
short
(
x
)
y
=
F
.
relu
(
y
)
return
y
def
basic_block
(
self
,
input
,
num_filters
,
stride
,
name
,
if_first
):
conv0
=
self
.
conv_bn_layer
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
3
,
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
conv1
=
self
.
conv_bn_l
ayer
(
in
put
=
conv0
,
num_filters
=
num_filter
s
,
filter
_size
=
3
,
self
.
conv1
=
ConvBNL
ayer
(
in
_channels
=
out_channels
,
out_channels
=
out_channel
s
,
kernel
_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
short
=
self
.
short
c
ut
(
in
put
,
num_filter
s
,
stride
,
self
.
short
=
ShortC
ut
(
in
_channels
=
in_channels
,
out_channels
=
out_channel
s
,
stride
=
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
return
fluid
.
layers
.
elementwise_add
(
x
=
short
,
y
=
conv1
,
act
=
'relu'
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
y
+
self
.
short
(
x
)
return
F
.
relu
(
y
)
ppocr/modeling/common_functions.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
import
math
def
get_para_bias_attr
(
l2_decay
,
k
,
name
):
regularizer
=
fluid
.
regularizer
.
L2Decay
(
l2_decay
)
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
para_attr
=
fluid
.
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_w_attr"
)
bias_attr
=
fluid
.
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
[
para_attr
,
bias_attr
]
def
conv_bn_layer
(
input
,
num_filters
,
filter_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
conv
=
fluid
.
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
(
filter_size
-
1
)
//
2
,
groups
=
groups
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
,
name
=
name
+
'.conv2d'
)
bn_name
=
"bn_"
+
name
return
fluid
.
layers
.
batch_norm
(
input
=
conv
,
act
=
act
,
name
=
bn_name
+
'.output'
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
deconv_bn_layer
(
input
,
num_filters
,
filter_size
=
4
,
stride
=
2
,
act
=
'relu'
,
name
=
None
):
deconv
=
fluid
.
layers
.
conv2d_transpose
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
stride
,
padding
=
1
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
,
name
=
name
+
'.deconv2d'
)
bn_name
=
"bn_"
+
name
return
fluid
.
layers
.
batch_norm
(
input
=
deconv
,
act
=
act
,
name
=
bn_name
+
'.output'
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
create_tmp_var
(
program
,
name
,
dtype
,
shape
,
lod_level
=
0
):
return
program
.
current_block
().
create_var
(
name
=
name
,
dtype
=
dtype
,
shape
=
shape
,
lod_level
=
lod_level
)
ppocr/modeling/heads/__init__.py
View file @
aad3093a
...
...
@@ -11,3 +11,20 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'build_head'
]
def
build_head
(
config
):
# det head
from
.det_db_head
import
DBHead
# rec head
from
.rec_ctc_head
import
CTC
support_dict
=
[
'DBHead'
,
'CTC'
]
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
ppocr/modeling/heads/det_db_head.py
View file @
aad3093a
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
Licensed under the Apache License, Version 2.0 (the "License");
#
you may not use this file except in compliance with the License.
#
You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
Unless required by applicable law or agreed to in writing, software
#
distributed under the License is distributed on an "AS IS" BASIS,
#
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
See the License for the specific language governing permissions and
#
limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
import
paddle.fluid
as
fluid
def
get_bias_attr
(
k
,
name
):
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
paddle
.
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
bias_attr
=
ParamAttr
(
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
bias_attr
class
DBHead
(
object
):
"""
Differentiable Binarization (DB) for text detection:
see https://arxiv.org/abs/1911.08947
args:
params(dict): super parameters for build DB network
"""
def
__init__
(
self
,
params
):
self
.
k
=
params
[
'k'
]
self
.
inner_channels
=
params
[
'inner_channels'
]
self
.
C
,
self
.
H
,
self
.
W
=
params
[
'image_shape'
]
print
(
self
.
C
,
self
.
H
,
self
.
W
)
def
binarize
(
self
,
x
):
conv1
=
fluid
.
layers
.
conv2d
(
input
=
x
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
3
,
class
Head
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
name_list
):
super
(
Head
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
),
weight_attr
=
ParamAttr
(
name
=
name_list
[
0
]
+
'.w_0'
),
bias_attr
=
False
)
conv_bn1
=
fluid
.
layers
.
batch_norm
(
input
=
conv1
,
param_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1.0
),
bias_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1e-4
),
act
=
"relu"
)
conv2
=
fluid
.
layers
.
conv2d_transpose
(
input
=
conv_bn1
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
2
,
self
.
conv_bn1
=
nn
.
BatchNorm
(
num_channels
=
in_channels
//
4
,
param_attr
=
ParamAttr
(
name
=
name_list
[
1
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
)),
bias_attr
=
ParamAttr
(
name
=
name_list
[
1
]
+
'.b_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1e-4
)),
moving_mean_name
=
name_list
[
1
]
+
'.w_1'
,
moving_variance_name
=
name_list
[
1
]
+
'.w_2'
,
act
=
'relu'
)
self
.
conv2
=
nn
.
ConvTranspose2d
(
in_channels
=
in_channels
//
4
,
out_channels
=
in_channels
//
4
,
kernel_size
=
2
,
stride
=
2
,
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
),
bias_attr
=
self
.
_get_bias_attr
(
0.0004
,
conv_bn1
.
shape
[
1
],
"conv2"
),
act
=
None
)
conv_bn2
=
fluid
.
layers
.
batch_norm
(
input
=
conv2
,
param_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1.0
),
bias_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1e-4
),
weight_attr
=
ParamAttr
(
name
=
name_list
[
2
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
MSRA
(
uniform
=
False
)),
bias_attr
=
get_bias_attr
(
in_channels
//
4
,
name_list
[
-
1
]
+
"conv2"
))
self
.
conv_bn2
=
nn
.
BatchNorm
(
num_channels
=
in_channels
//
4
,
param_attr
=
ParamAttr
(
name
=
name_list
[
3
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
)),
bias_attr
=
ParamAttr
(
name
=
name_list
[
3
]
+
'.b_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1e-4
)),
moving_mean_name
=
name_list
[
3
]
+
'.w_1'
,
moving_variance_name
=
name_list
[
3
]
+
'.w_2'
,
act
=
"relu"
)
conv3
=
fluid
.
layers
.
conv2d_t
ranspose
(
in
put
=
conv_bn2
,
num_filter
s
=
1
,
filter
_size
=
2
,
self
.
conv3
=
nn
.
ConvT
ranspose
2d
(
in
_channels
=
in_channels
//
4
,
out_channel
s
=
1
,
kernel
_size
=
2
,
stride
=
2
,
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
),
bias_attr
=
self
.
_get_bias_attr
(
0.0004
,
conv_bn2
.
shape
[
1
],
"conv3"
)
,
act
=
None
)
out
=
fluid
.
layers
.
sigmoid
(
conv3
)
return
out
weight_attr
=
ParamAttr
(
name
=
name_list
[
4
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
MSRA
(
uniform
=
False
)),
bias_attr
=
get_bias_attr
(
in_channels
//
4
,
name_list
[
-
1
]
+
"
conv3
"
),
)
def
thresh
(
self
,
x
):
conv1
=
fluid
.
layers
.
conv2d
(
input
=
x
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
3
,
padding
=
1
,
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
),
bias_attr
=
False
)
conv_bn1
=
fluid
.
layers
.
batch_norm
(
input
=
conv1
,
param_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1.0
),
bias_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1e-4
),
act
=
"relu"
)
conv2
=
fluid
.
layers
.
conv2d_transpose
(
input
=
conv_bn1
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
2
,
stride
=
2
,
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
),
bias_attr
=
self
.
_get_bias_attr
(
0.0004
,
conv_bn1
.
shape
[
1
],
"conv2"
),
act
=
None
)
conv_bn2
=
fluid
.
layers
.
batch_norm
(
input
=
conv2
,
param_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1.0
),
bias_attr
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
1e-4
),
act
=
"relu"
)
conv3
=
fluid
.
layers
.
conv2d_transpose
(
input
=
conv_bn2
,
num_filters
=
1
,
filter_size
=
2
,
stride
=
2
,
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
),
bias_attr
=
self
.
_get_bias_attr
(
0.0004
,
conv_bn2
.
shape
[
1
],
"conv3"
),
act
=
None
)
out
=
fluid
.
layers
.
sigmoid
(
conv3
)
return
out
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
conv_bn1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
conv_bn2
(
x
)
x
=
self
.
conv3
(
x
)
x
=
F
.
sigmoid
(
x
)
return
x
def
_get_bias_attr
(
self
,
l2_decay
,
k
,
name
,
gradient_clip
=
None
):
regularizer
=
fluid
.
regularizer
.
L2Decay
(
l2_decay
)
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
fluid
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
bias_attr
=
fluid
.
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
bias_attr
def
step_function
(
self
,
x
,
y
):
return
fluid
.
layers
.
reciprocal
(
1
+
fluid
.
layers
.
exp
(
-
self
.
k
*
(
x
-
y
)))
class
DBHead
(
nn
.
Layer
):
"""
Differentiable Binarization (DB) for text detection:
see https://arxiv.org/abs/1911.08947
args:
params(dict): super parameters for build DB network
"""
def
__call__
(
self
,
conv_features
,
mode
=
"train"
):
c2
,
c3
,
c4
,
c5
=
conv_features
param_attr
=
fluid
.
initializer
.
MSRAInitializer
(
uniform
=
False
)
in5
=
fluid
.
layers
.
conv2d
(
input
=
c5
,
num_filters
=
self
.
inner_channels
,
filter_size
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
in4
=
fluid
.
layers
.
conv2d
(
input
=
c4
,
num_filters
=
self
.
inner_channels
,
filter_size
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
in3
=
fluid
.
layers
.
conv2d
(
input
=
c3
,
num_filters
=
self
.
inner_channels
,
filter_size
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
in2
=
fluid
.
layers
.
conv2d
(
input
=
c2
,
num_filters
=
self
.
inner_channels
,
filter_size
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
def
__init__
(
self
,
in_channels
,
k
=
50
,
**
kwargs
):
super
(
DBHead
,
self
).
__init__
()
self
.
k
=
k
binarize_name_list
=
[
'conv2d_56'
,
'batch_norm_47'
,
'conv2d_transpose_0'
,
'batch_norm_48'
,
'conv2d_transpose_1'
,
'binarize'
]
thresh_name_list
=
[
'conv2d_57'
,
'batch_norm_49'
,
'conv2d_transpose_2'
,
'batch_norm_50'
,
'conv2d_transpose_3'
,
'thresh'
]
self
.
binarize
=
Head
(
in_channels
,
binarize_name_list
)
self
.
thresh
=
Head
(
in_channels
,
thresh_name_list
)
out4
=
fluid
.
layers
.
elementwise_add
(
x
=
fluid
.
layers
.
resize_nearest
(
input
=
in5
,
scale
=
2
),
y
=
in4
)
# 1/16
out3
=
fluid
.
layers
.
elementwise_add
(
x
=
fluid
.
layers
.
resize_nearest
(
input
=
out4
,
scale
=
2
),
y
=
in3
)
# 1/8
out2
=
fluid
.
layers
.
elementwise_add
(
x
=
fluid
.
layers
.
resize_nearest
(
input
=
out3
,
scale
=
2
),
y
=
in2
)
# 1/4
def
step_function
(
self
,
x
,
y
):
return
paddle
.
reciprocal
(
1
+
paddle
.
exp
(
-
self
.
k
*
(
x
-
y
)))
p5
=
fluid
.
layers
.
conv2d
(
input
=
in5
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
3
,
padding
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
p5
=
fluid
.
layers
.
resize_nearest
(
input
=
p5
,
scale
=
8
)
p4
=
fluid
.
layers
.
conv2d
(
input
=
out4
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
3
,
padding
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
p4
=
fluid
.
layers
.
resize_nearest
(
input
=
p4
,
scale
=
4
)
p3
=
fluid
.
layers
.
conv2d
(
input
=
out3
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
3
,
padding
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
p3
=
fluid
.
layers
.
resize_nearest
(
input
=
p3
,
scale
=
2
)
p2
=
fluid
.
layers
.
conv2d
(
input
=
out2
,
num_filters
=
self
.
inner_channels
//
4
,
filter_size
=
3
,
padding
=
1
,
param_attr
=
param_attr
,
bias_attr
=
False
)
def
forward
(
self
,
x
):
shrink_maps
=
self
.
binarize
(
x
)
if
not
self
.
training
:
return
shrink_maps
fuse
=
fluid
.
layers
.
concat
(
input
=
[
p5
,
p4
,
p3
,
p2
],
axis
=
1
)
shrink_maps
=
self
.
binarize
(
fuse
)
if
mode
!=
"train"
:
return
{
"maps"
:
shrink_maps
}
threshold_maps
=
self
.
thresh
(
fuse
)
threshold_maps
=
self
.
thresh
(
x
)
binary_maps
=
self
.
step_function
(
shrink_maps
,
threshold_maps
)
y
=
fluid
.
layers
.
concat
(
input
=
[
shrink_maps
,
threshold_maps
,
binary_maps
],
axis
=
1
)
predicts
=
{}
predicts
[
'maps'
]
=
y
return
predicts
y
=
paddle
.
concat
([
shrink_maps
,
threshold_maps
,
binary_maps
],
axis
=
1
)
return
y
ppocr/modeling/heads/det_east_head.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle.fluid
as
fluid
from
..common_functions
import
conv_bn_layer
,
deconv_bn_layer
from
collections
import
OrderedDict
class
EASTHead
(
object
):
"""
EAST: An Efficient and Accurate Scene Text Detector
see arxiv: https://arxiv.org/abs/1704.03155
args:
params(dict): the super parameters for network build
"""
def
__init__
(
self
,
params
):
self
.
model_name
=
params
[
'model_name'
]
def
unet_fusion
(
self
,
inputs
):
f
=
inputs
[::
-
1
]
if
self
.
model_name
==
"large"
:
num_outputs
=
[
128
,
128
,
128
,
128
]
else
:
num_outputs
=
[
64
,
64
,
64
,
64
]
g
=
[
None
,
None
,
None
,
None
]
h
=
[
None
,
None
,
None
,
None
]
for
i
in
range
(
4
):
if
i
==
0
:
h
[
i
]
=
f
[
i
]
else
:
h
[
i
]
=
fluid
.
layers
.
concat
([
g
[
i
-
1
],
f
[
i
]],
axis
=
1
)
h
[
i
]
=
conv_bn_layer
(
input
=
h
[
i
],
num_filters
=
num_outputs
[
i
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"unet_h_%d"
%
(
i
))
if
i
<=
2
:
#can be replaced with unpool
g
[
i
]
=
deconv_bn_layer
(
input
=
h
[
i
],
num_filters
=
num_outputs
[
i
],
name
=
"unet_g_%d"
%
(
i
))
else
:
g
[
i
]
=
conv_bn_layer
(
input
=
h
[
i
],
num_filters
=
num_outputs
[
i
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"unet_g_%d"
%
(
i
))
return
g
[
3
]
def
detector_header
(
self
,
f_common
):
if
self
.
model_name
==
"large"
:
num_outputs
=
[
128
,
64
,
1
,
8
]
else
:
num_outputs
=
[
64
,
32
,
1
,
8
]
f_det
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
num_outputs
[
0
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"det_head1"
)
f_det
=
conv_bn_layer
(
input
=
f_det
,
num_filters
=
num_outputs
[
1
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"det_head2"
)
#f_score
f_score
=
conv_bn_layer
(
input
=
f_det
,
num_filters
=
num_outputs
[
2
],
filter_size
=
1
,
stride
=
1
,
act
=
None
,
name
=
"f_score"
)
f_score
=
fluid
.
layers
.
sigmoid
(
f_score
)
#f_geo
f_geo
=
conv_bn_layer
(
input
=
f_det
,
num_filters
=
num_outputs
[
3
],
filter_size
=
1
,
stride
=
1
,
act
=
None
,
name
=
"f_geo"
)
f_geo
=
(
fluid
.
layers
.
sigmoid
(
f_geo
)
-
0.5
)
*
2
*
800
return
f_score
,
f_geo
def
__call__
(
self
,
inputs
):
f_common
=
self
.
unet_fusion
(
inputs
)
f_score
,
f_geo
=
self
.
detector_header
(
f_common
)
predicts
=
OrderedDict
()
predicts
[
'f_score'
]
=
f_score
predicts
[
'f_geo'
]
=
f_geo
return
predicts
ppocr/modeling/heads/det_sast_head.py
deleted
100644 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle.fluid
as
fluid
from
..common_functions
import
conv_bn_layer
,
deconv_bn_layer
from
collections
import
OrderedDict
class
SASTHead
(
object
):
"""
SAST:
see arxiv: https://arxiv.org/abs/1908.05498
args:
params(dict): the super parameters for network build
"""
def
__init__
(
self
,
params
):
self
.
model_name
=
params
[
'model_name'
]
self
.
with_cab
=
params
[
'with_cab'
]
def
FPN_Up_Fusion
(
self
,
blocks
):
"""
blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
"""
f
=
[
blocks
[
'block_6'
],
blocks
[
'block_5'
],
blocks
[
'block_4'
],
blocks
[
'block_3'
],
blocks
[
'block_2'
]]
num_outputs
=
[
256
,
256
,
192
,
192
,
128
]
g
=
[
None
,
None
,
None
,
None
,
None
]
h
=
[
None
,
None
,
None
,
None
,
None
]
for
i
in
range
(
5
):
h
[
i
]
=
conv_bn_layer
(
input
=
f
[
i
],
num_filters
=
num_outputs
[
i
],
filter_size
=
1
,
stride
=
1
,
act
=
None
,
name
=
'fpn_up_h'
+
str
(
i
))
for
i
in
range
(
4
):
if
i
==
0
:
g
[
i
]
=
deconv_bn_layer
(
input
=
h
[
i
],
num_filters
=
num_outputs
[
i
+
1
],
act
=
None
,
name
=
'fpn_up_g0'
)
#print("g[{}] shape: {}".format(i, g[i].shape))
else
:
g
[
i
]
=
fluid
.
layers
.
elementwise_add
(
x
=
g
[
i
-
1
],
y
=
h
[
i
])
g
[
i
]
=
fluid
.
layers
.
relu
(
g
[
i
])
#g[i] = conv_bn_layer(input=g[i], num_filters=num_outputs[i],
# filter_size=1, stride=1, act='relu')
g
[
i
]
=
conv_bn_layer
(
input
=
g
[
i
],
num_filters
=
num_outputs
[
i
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'fpn_up_g%d_1'
%
i
)
g
[
i
]
=
deconv_bn_layer
(
input
=
g
[
i
],
num_filters
=
num_outputs
[
i
+
1
],
act
=
None
,
name
=
'fpn_up_g%d_2'
%
i
)
#print("g[{}] shape: {}".format(i, g[i].shape))
g
[
4
]
=
fluid
.
layers
.
elementwise_add
(
x
=
g
[
3
],
y
=
h
[
4
])
g
[
4
]
=
fluid
.
layers
.
relu
(
g
[
4
])
g
[
4
]
=
conv_bn_layer
(
input
=
g
[
4
],
num_filters
=
num_outputs
[
4
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'fpn_up_fusion_1'
)
g
[
4
]
=
conv_bn_layer
(
input
=
g
[
4
],
num_filters
=
num_outputs
[
4
],
filter_size
=
1
,
stride
=
1
,
act
=
None
,
name
=
'fpn_up_fusion_2'
)
return
g
[
4
]
def
FPN_Down_Fusion
(
self
,
blocks
):
"""
blocks{}: contain block_2, block_3, block_4, block_5, block_6, block_7 with
1/4, 1/8, 1/16, 1/32, 1/64, 1/128 resolution.
"""
f
=
[
blocks
[
'block_0'
],
blocks
[
'block_1'
],
blocks
[
'block_2'
]]
num_outputs
=
[
32
,
64
,
128
]
g
=
[
None
,
None
,
None
]
h
=
[
None
,
None
,
None
]
for
i
in
range
(
3
):
h
[
i
]
=
conv_bn_layer
(
input
=
f
[
i
],
num_filters
=
num_outputs
[
i
],
filter_size
=
3
,
stride
=
1
,
act
=
None
,
name
=
'fpn_down_h'
+
str
(
i
))
for
i
in
range
(
2
):
if
i
==
0
:
g
[
i
]
=
conv_bn_layer
(
input
=
h
[
i
],
num_filters
=
num_outputs
[
i
+
1
],
filter_size
=
3
,
stride
=
2
,
act
=
None
,
name
=
'fpn_down_g0'
)
else
:
g
[
i
]
=
fluid
.
layers
.
elementwise_add
(
x
=
g
[
i
-
1
],
y
=
h
[
i
])
g
[
i
]
=
fluid
.
layers
.
relu
(
g
[
i
])
g
[
i
]
=
conv_bn_layer
(
input
=
g
[
i
],
num_filters
=
num_outputs
[
i
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'fpn_down_g%d_1'
%
i
)
g
[
i
]
=
conv_bn_layer
(
input
=
g
[
i
],
num_filters
=
num_outputs
[
i
+
1
],
filter_size
=
3
,
stride
=
2
,
act
=
None
,
name
=
'fpn_down_g%d_2'
%
i
)
# print("g[{}] shape: {}".format(i, g[i].shape))
g
[
2
]
=
fluid
.
layers
.
elementwise_add
(
x
=
g
[
1
],
y
=
h
[
2
])
g
[
2
]
=
fluid
.
layers
.
relu
(
g
[
2
])
g
[
2
]
=
conv_bn_layer
(
input
=
g
[
2
],
num_filters
=
num_outputs
[
2
],
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'fpn_down_fusion_1'
)
g
[
2
]
=
conv_bn_layer
(
input
=
g
[
2
],
num_filters
=
num_outputs
[
2
],
filter_size
=
1
,
stride
=
1
,
act
=
None
,
name
=
'fpn_down_fusion_2'
)
return
g
[
2
]
def
SAST_Header1
(
self
,
f_common
):
"""Detector header."""
#f_score
f_score
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
64
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_score1'
)
f_score
=
conv_bn_layer
(
input
=
f_score
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_score2'
)
f_score
=
conv_bn_layer
(
input
=
f_score
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_score3'
)
f_score
=
conv_bn_layer
(
input
=
f_score
,
num_filters
=
1
,
filter_size
=
3
,
stride
=
1
,
name
=
'f_score4'
)
f_score
=
fluid
.
layers
.
sigmoid
(
f_score
)
# print("f_score shape: {}".format(f_score.shape))
#f_boder
f_border
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
64
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_border1'
)
f_border
=
conv_bn_layer
(
input
=
f_border
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_border2'
)
f_border
=
conv_bn_layer
(
input
=
f_border
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_border3'
)
f_border
=
conv_bn_layer
(
input
=
f_border
,
num_filters
=
4
,
filter_size
=
3
,
stride
=
1
,
name
=
'f_border4'
)
# print("f_border shape: {}".format(f_border.shape))
return
f_score
,
f_border
def
SAST_Header2
(
self
,
f_common
):
"""Detector header."""
#f_tvo
f_tvo
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
64
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_tvo1'
)
f_tvo
=
conv_bn_layer
(
input
=
f_tvo
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_tvo2'
)
f_tvo
=
conv_bn_layer
(
input
=
f_tvo
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_tvo3'
)
f_tvo
=
conv_bn_layer
(
input
=
f_tvo
,
num_filters
=
8
,
filter_size
=
3
,
stride
=
1
,
name
=
'f_tvo4'
)
# print("f_tvo shape: {}".format(f_tvo.shape))
#f_tco
f_tco
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
64
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_tco1'
)
f_tco
=
conv_bn_layer
(
input
=
f_tco
,
num_filters
=
64
,
filter_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_tco2'
)
f_tco
=
conv_bn_layer
(
input
=
f_tco
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_tco3'
)
f_tco
=
conv_bn_layer
(
input
=
f_tco
,
num_filters
=
2
,
filter_size
=
3
,
stride
=
1
,
name
=
'f_tco4'
)
# print("f_tco shape: {}".format(f_tco.shape))
return
f_tvo
,
f_tco
def
cross_attention
(
self
,
f_common
):
"""
"""
f_shape
=
fluid
.
layers
.
shape
(
f_common
)
f_theta
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_theta'
)
f_phi
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_phi'
)
f_g
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_g'
)
### horizon
fh_theta
=
f_theta
fh_phi
=
f_phi
fh_g
=
f_g
#flatten
fh_theta
=
fluid
.
layers
.
transpose
(
fh_theta
,
[
0
,
2
,
3
,
1
])
fh_theta
=
fluid
.
layers
.
reshape
(
fh_theta
,
[
f_shape
[
0
]
*
f_shape
[
2
],
f_shape
[
3
],
128
])
fh_phi
=
fluid
.
layers
.
transpose
(
fh_phi
,
[
0
,
2
,
3
,
1
])
fh_phi
=
fluid
.
layers
.
reshape
(
fh_phi
,
[
f_shape
[
0
]
*
f_shape
[
2
],
f_shape
[
3
],
128
])
fh_g
=
fluid
.
layers
.
transpose
(
fh_g
,
[
0
,
2
,
3
,
1
])
fh_g
=
fluid
.
layers
.
reshape
(
fh_g
,
[
f_shape
[
0
]
*
f_shape
[
2
],
f_shape
[
3
],
128
])
#correlation
fh_attn
=
fluid
.
layers
.
matmul
(
fh_theta
,
fluid
.
layers
.
transpose
(
fh_phi
,
[
0
,
2
,
1
]))
#scale
fh_attn
=
fh_attn
/
(
128
**
0.5
)
fh_attn
=
fluid
.
layers
.
softmax
(
fh_attn
)
#weighted sum
fh_weight
=
fluid
.
layers
.
matmul
(
fh_attn
,
fh_g
)
fh_weight
=
fluid
.
layers
.
reshape
(
fh_weight
,
[
f_shape
[
0
],
f_shape
[
2
],
f_shape
[
3
],
128
])
# print("fh_weight: {}".format(fh_weight.shape))
fh_weight
=
fluid
.
layers
.
transpose
(
fh_weight
,
[
0
,
3
,
1
,
2
])
fh_weight
=
conv_bn_layer
(
input
=
fh_weight
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
name
=
'fh_weight'
)
#short cut
fh_sc
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
name
=
'fh_sc'
)
f_h
=
fluid
.
layers
.
relu
(
fh_weight
+
fh_sc
)
######
#vertical
fv_theta
=
fluid
.
layers
.
transpose
(
f_theta
,
[
0
,
1
,
3
,
2
])
fv_phi
=
fluid
.
layers
.
transpose
(
f_phi
,
[
0
,
1
,
3
,
2
])
fv_g
=
fluid
.
layers
.
transpose
(
f_g
,
[
0
,
1
,
3
,
2
])
#flatten
fv_theta
=
fluid
.
layers
.
transpose
(
fv_theta
,
[
0
,
2
,
3
,
1
])
fv_theta
=
fluid
.
layers
.
reshape
(
fv_theta
,
[
f_shape
[
0
]
*
f_shape
[
3
],
f_shape
[
2
],
128
])
fv_phi
=
fluid
.
layers
.
transpose
(
fv_phi
,
[
0
,
2
,
3
,
1
])
fv_phi
=
fluid
.
layers
.
reshape
(
fv_phi
,
[
f_shape
[
0
]
*
f_shape
[
3
],
f_shape
[
2
],
128
])
fv_g
=
fluid
.
layers
.
transpose
(
fv_g
,
[
0
,
2
,
3
,
1
])
fv_g
=
fluid
.
layers
.
reshape
(
fv_g
,
[
f_shape
[
0
]
*
f_shape
[
3
],
f_shape
[
2
],
128
])
#correlation
fv_attn
=
fluid
.
layers
.
matmul
(
fv_theta
,
fluid
.
layers
.
transpose
(
fv_phi
,
[
0
,
2
,
1
]))
#scale
fv_attn
=
fv_attn
/
(
128
**
0.5
)
fv_attn
=
fluid
.
layers
.
softmax
(
fv_attn
)
#weighted sum
fv_weight
=
fluid
.
layers
.
matmul
(
fv_attn
,
fv_g
)
fv_weight
=
fluid
.
layers
.
reshape
(
fv_weight
,
[
f_shape
[
0
],
f_shape
[
3
],
f_shape
[
2
],
128
])
# print("fv_weight: {}".format(fv_weight.shape))
fv_weight
=
fluid
.
layers
.
transpose
(
fv_weight
,
[
0
,
3
,
2
,
1
])
fv_weight
=
conv_bn_layer
(
input
=
fv_weight
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
name
=
'fv_weight'
)
#short cut
fv_sc
=
conv_bn_layer
(
input
=
f_common
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
name
=
'fv_sc'
)
f_v
=
fluid
.
layers
.
relu
(
fv_weight
+
fv_sc
)
######
f_attn
=
fluid
.
layers
.
concat
([
f_h
,
f_v
],
axis
=
1
)
f_attn
=
conv_bn_layer
(
input
=
f_attn
,
num_filters
=
128
,
filter_size
=
1
,
stride
=
1
,
act
=
'relu'
,
name
=
'f_attn'
)
return
f_attn
def
__call__
(
self
,
blocks
,
with_cab
=
False
):
# for k, v in blocks.items():
# print(k, v.shape)
#down fpn
f_down
=
self
.
FPN_Down_Fusion
(
blocks
)
# print("f_down shape: {}".format(f_down.shape))
#up fpn
f_up
=
self
.
FPN_Up_Fusion
(
blocks
)
# print("f_up shape: {}".format(f_up.shape))
#fusion
f_common
=
fluid
.
layers
.
elementwise_add
(
x
=
f_down
,
y
=
f_up
)
f_common
=
fluid
.
layers
.
relu
(
f_common
)
# print("f_common: {}".format(f_common.shape))
if
self
.
with_cab
:
# print('enhence f_common with CAB.')
f_common
=
self
.
cross_attention
(
f_common
)
f_score
,
f_border
=
self
.
SAST_Header1
(
f_common
)
f_tvo
,
f_tco
=
self
.
SAST_Header2
(
f_common
)
predicts
=
OrderedDict
()
predicts
[
'f_score'
]
=
f_score
predicts
[
'f_border'
]
=
f_border
predicts
[
'f_tvo'
]
=
f_tvo
predicts
[
'f_tco'
]
=
f_tco
return
predicts
\ No newline at end of file
ppocr/modeling/heads/rec_attention_head.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
.rec_seq_encoder
import
SequenceEncoder
import
numpy
as
np
class
AttentionPredict
(
object
):
def
__init__
(
self
,
params
):
super
(
AttentionPredict
,
self
).
__init__
()
self
.
char_num
=
params
[
'char_num'
]
self
.
encoder
=
SequenceEncoder
(
params
)
self
.
decoder_size
=
params
[
'Attention'
][
'decoder_size'
]
self
.
word_vector_dim
=
params
[
'Attention'
][
'word_vector_dim'
]
self
.
encoder_type
=
params
[
'encoder_type'
]
self
.
max_length
=
params
[
'max_text_length'
]
def
simple_attention
(
self
,
encoder_vec
,
encoder_proj
,
decoder_state
,
decoder_size
):
decoder_state_proj
=
layers
.
fc
(
input
=
decoder_state
,
size
=
decoder_size
,
bias_attr
=
False
,
name
=
"decoder_state_proj_fc"
)
decoder_state_expand
=
layers
.
sequence_expand
(
x
=
decoder_state_proj
,
y
=
encoder_proj
)
concated
=
layers
.
elementwise_add
(
encoder_proj
,
decoder_state_expand
)
concated
=
layers
.
tanh
(
x
=
concated
)
attention_weights
=
layers
.
fc
(
input
=
concated
,
size
=
1
,
act
=
None
,
bias_attr
=
False
,
name
=
"attention_weights_fc"
)
attention_weights
=
layers
.
sequence_softmax
(
input
=
attention_weights
)
weigths_reshape
=
layers
.
reshape
(
x
=
attention_weights
,
shape
=
[
-
1
])
scaled
=
layers
.
elementwise_mul
(
x
=
encoder_vec
,
y
=
weigths_reshape
,
axis
=
0
)
context
=
layers
.
sequence_pool
(
input
=
scaled
,
pool_type
=
'sum'
)
return
context
def
gru_decoder_with_attention
(
self
,
target_embedding
,
encoder_vec
,
encoder_proj
,
decoder_boot
,
decoder_size
,
char_num
):
rnn
=
layers
.
DynamicRNN
()
with
rnn
.
block
():
current_word
=
rnn
.
step_input
(
target_embedding
)
encoder_vec
=
rnn
.
static_input
(
encoder_vec
)
encoder_proj
=
rnn
.
static_input
(
encoder_proj
)
hidden_mem
=
rnn
.
memory
(
init
=
decoder_boot
,
need_reorder
=
True
)
context
=
self
.
simple_attention
(
encoder_vec
,
encoder_proj
,
hidden_mem
,
decoder_size
)
fc_1
=
layers
.
fc
(
input
=
context
,
size
=
decoder_size
*
3
,
bias_attr
=
False
,
name
=
"rnn_fc1"
)
fc_2
=
layers
.
fc
(
input
=
current_word
,
size
=
decoder_size
*
3
,
bias_attr
=
False
,
name
=
"rnn_fc2"
)
decoder_inputs
=
fc_1
+
fc_2
h
,
_
,
_
=
layers
.
gru_unit
(
input
=
decoder_inputs
,
hidden
=
hidden_mem
,
size
=
decoder_size
*
3
)
rnn
.
update_memory
(
hidden_mem
,
h
)
out
=
layers
.
fc
(
input
=
h
,
size
=
char_num
,
bias_attr
=
True
,
act
=
'softmax'
,
name
=
"rnn_out_fc"
)
rnn
.
output
(
out
)
return
rnn
()
def
gru_attention_infer
(
self
,
decoder_boot
,
max_length
,
char_num
,
word_vector_dim
,
encoded_vector
,
encoded_proj
,
decoder_size
):
init_state
=
decoder_boot
beam_size
=
1
array_len
=
layers
.
fill_constant
(
shape
=
[
1
],
dtype
=
'int64'
,
value
=
max_length
)
counter
=
layers
.
zeros
(
shape
=
[
1
],
dtype
=
'int64'
,
force_cpu
=
True
)
# fill the first element with init_state
state_array
=
layers
.
create_array
(
'float32'
)
layers
.
array_write
(
init_state
,
array
=
state_array
,
i
=
counter
)
# ids, scores as memory
ids_array
=
layers
.
create_array
(
'int64'
)
scores_array
=
layers
.
create_array
(
'float32'
)
rois_shape
=
layers
.
shape
(
init_state
)
batch_size
=
layers
.
slice
(
rois_shape
,
axes
=
[
0
],
starts
=
[
0
],
ends
=
[
1
])
+
1
lod_level
=
layers
.
range
(
start
=
0
,
end
=
batch_size
,
step
=
1
,
dtype
=
batch_size
.
dtype
)
init_ids
=
layers
.
fill_constant_batch_size_like
(
input
=
init_state
,
shape
=
[
-
1
,
1
],
value
=
0
,
dtype
=
'int64'
)
init_ids
=
layers
.
lod_reset
(
init_ids
,
lod_level
)
init_ids
=
layers
.
lod_append
(
init_ids
,
lod_level
)
init_scores
=
layers
.
fill_constant_batch_size_like
(
input
=
init_state
,
shape
=
[
-
1
,
1
],
value
=
1
,
dtype
=
'float32'
)
init_scores
=
layers
.
lod_reset
(
init_scores
,
init_ids
)
layers
.
array_write
(
init_ids
,
array
=
ids_array
,
i
=
counter
)
layers
.
array_write
(
init_scores
,
array
=
scores_array
,
i
=
counter
)
full_ids
=
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
init_state
,
shape
=
[
-
1
,
1
],
dtype
=
'int64'
,
value
=
1
)
full_scores
=
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
init_state
,
shape
=
[
-
1
,
1
],
dtype
=
'float32'
,
value
=
1
)
cond
=
layers
.
less_than
(
x
=
counter
,
y
=
array_len
)
while_op
=
layers
.
While
(
cond
=
cond
)
with
while_op
.
block
():
pre_ids
=
layers
.
array_read
(
array
=
ids_array
,
i
=
counter
)
pre_state
=
layers
.
array_read
(
array
=
state_array
,
i
=
counter
)
pre_score
=
layers
.
array_read
(
array
=
scores_array
,
i
=
counter
)
pre_ids_emb
=
layers
.
embedding
(
input
=
pre_ids
,
size
=
[
char_num
,
word_vector_dim
],
dtype
=
'float32'
)
context
=
self
.
simple_attention
(
encoded_vector
,
encoded_proj
,
pre_state
,
decoder_size
)
# expand the recursive_sequence_lengths of pre_state
# to be the same with pre_score
pre_state_expanded
=
layers
.
sequence_expand
(
pre_state
,
pre_score
)
context_expanded
=
layers
.
sequence_expand
(
context
,
pre_score
)
fc_1
=
layers
.
fc
(
input
=
context_expanded
,
size
=
decoder_size
*
3
,
bias_attr
=
False
,
name
=
"rnn_fc1"
)
fc_2
=
layers
.
fc
(
input
=
pre_ids_emb
,
size
=
decoder_size
*
3
,
bias_attr
=
False
,
name
=
"rnn_fc2"
)
decoder_inputs
=
fc_1
+
fc_2
current_state
,
_
,
_
=
layers
.
gru_unit
(
input
=
decoder_inputs
,
hidden
=
pre_state_expanded
,
size
=
decoder_size
*
3
)
current_state_with_lod
=
layers
.
lod_reset
(
x
=
current_state
,
y
=
pre_score
)
# use score to do beam search
current_score
=
layers
.
fc
(
input
=
current_state_with_lod
,
size
=
char_num
,
bias_attr
=
True
,
act
=
'softmax'
,
name
=
"rnn_out_fc"
)
topk_scores
,
topk_indices
=
layers
.
topk
(
current_score
,
k
=
beam_size
)
new_ids
=
fluid
.
layers
.
concat
([
full_ids
,
topk_indices
],
axis
=
1
)
fluid
.
layers
.
assign
(
new_ids
,
full_ids
)
new_scores
=
fluid
.
layers
.
concat
([
full_scores
,
topk_scores
],
axis
=
1
)
fluid
.
layers
.
assign
(
new_scores
,
full_scores
)
layers
.
increment
(
x
=
counter
,
value
=
1
,
in_place
=
True
)
# update the memories
layers
.
array_write
(
current_state
,
array
=
state_array
,
i
=
counter
)
layers
.
array_write
(
topk_indices
,
array
=
ids_array
,
i
=
counter
)
layers
.
array_write
(
topk_scores
,
array
=
scores_array
,
i
=
counter
)
# update the break condition:
# up to the max length or all candidates of
# source sentences have ended.
length_cond
=
layers
.
less_than
(
x
=
counter
,
y
=
array_len
)
finish_cond
=
layers
.
logical_not
(
layers
.
is_empty
(
x
=
topk_indices
))
layers
.
logical_and
(
x
=
length_cond
,
y
=
finish_cond
,
out
=
cond
)
return
full_ids
,
full_scores
def
__call__
(
self
,
inputs
,
labels
=
None
,
mode
=
None
):
encoder_features
=
self
.
encoder
(
inputs
)
char_num
=
self
.
char_num
word_vector_dim
=
self
.
word_vector_dim
decoder_size
=
self
.
decoder_size
if
self
.
encoder_type
==
"reshape"
:
encoder_input
=
encoder_features
encoded_vector
=
encoder_features
else
:
encoder_input
=
encoder_features
[
1
]
encoded_vector
=
layers
.
concat
(
encoder_features
,
axis
=
1
)
encoded_proj
=
layers
.
fc
(
input
=
encoded_vector
,
size
=
decoder_size
,
bias_attr
=
False
,
name
=
"encoded_proj_fc"
)
backward_first
=
layers
.
sequence_pool
(
input
=
encoder_input
,
pool_type
=
'first'
)
decoder_boot
=
layers
.
fc
(
input
=
backward_first
,
size
=
decoder_size
,
bias_attr
=
False
,
act
=
"relu"
,
name
=
'decoder_boot'
)
if
mode
==
"train"
:
label_in
=
labels
[
'label_in'
]
label_out
=
labels
[
'label_out'
]
label_in
=
layers
.
cast
(
x
=
label_in
,
dtype
=
'int64'
)
trg_embedding
=
layers
.
embedding
(
input
=
label_in
,
size
=
[
char_num
,
word_vector_dim
],
dtype
=
'float32'
)
predict
=
self
.
gru_decoder_with_attention
(
trg_embedding
,
encoded_vector
,
encoded_proj
,
decoder_boot
,
decoder_size
,
char_num
)
_
,
decoded_out
=
layers
.
topk
(
input
=
predict
,
k
=
1
)
decoded_out
=
layers
.
lod_reset
(
decoded_out
,
y
=
label_out
)
predicts
=
{
'predict'
:
predict
,
'decoded_out'
:
decoded_out
}
else
:
ids
,
predict
=
self
.
gru_attention_infer
(
decoder_boot
,
self
.
max_length
,
char_num
,
word_vector_dim
,
encoded_vector
,
encoded_proj
,
decoder_size
)
predicts
=
{
'predict'
:
predict
,
'decoded_out'
:
ids
}
return
predicts
ppocr/modeling/heads/rec_ctc_head.py
View file @
aad3093a
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
Licensed under the Apache License, Version 2.0 (the "License");
#
you may not use this file except in compliance with the License.
#
You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
Unless required by applicable law or agreed to in writing, software
#
distributed under the License is distributed on an "AS IS" BASIS,
#
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
See the License for the specific language governing permissions and
#
limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -19,34 +19,33 @@ from __future__ import print_function
import
math
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
from
.rec_seq_encoder
import
SequenceEncoder
from
..common_functions
import
get_para_bias_attr
import
numpy
as
np
class
CTCPredict
(
object
):
def
__init__
(
self
,
params
):
super
(
CTCPredict
,
self
).
__init__
()
self
.
char_num
=
params
[
'char_num'
]
self
.
encoder
=
SequenceEncoder
(
params
)
self
.
encoder_type
=
params
[
'encoder_type'
]
self
.
fc_decay
=
params
.
get
(
"fc_decay"
,
0.0004
)
def
__call__
(
self
,
inputs
,
labels
=
None
,
mode
=
None
):
encoder_features
=
self
.
encoder
(
inputs
)
if
self
.
encoder_type
!=
"reshape"
:
encoder_features
=
fluid
.
layers
.
concat
(
encoder_features
,
axis
=
1
)
name
=
"ctc_fc"
para_attr
,
bias_attr
=
get_para_bias_attr
(
l2_decay
=
self
.
fc_decay
,
k
=
encoder_features
.
shape
[
1
],
name
=
name
)
predict
=
fluid
.
layers
.
fc
(
input
=
encoder_features
,
size
=
self
.
char_num
+
1
,
param_attr
=
para_attr
,
bias_attr
=
bias_attr
,
name
=
name
)
decoded_out
=
fluid
.
layers
.
ctc_greedy_decoder
(
input
=
predict
,
blank
=
self
.
char_num
)
predicts
=
{
'predict'
:
predict
,
'decoded_out'
:
decoded_out
}
from
paddle
import
ParamAttr
,
nn
def
get_para_bias_attr
(
l2_decay
,
k
,
name
):
regularizer
=
paddle
.
fluid
.
regularizer
.
L2Decay
(
l2_decay
)
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
weight_attr
=
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_w_attr"
)
bias_attr
=
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
[
weight_attr
,
bias_attr
]
class
CTC
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
fc_decay
=
1e-5
,
**
kwargs
):
super
(
CTC
,
self
).
__init__
()
weight_attr
,
bias_attr
=
get_para_bias_attr
(
l2_decay
=
fc_decay
,
k
=
in_channels
,
name
=
'ctc_fc'
)
self
.
fc
=
nn
.
Linear
(
in_channels
,
out_channels
,
weight_attr
=
weight_attr
,
bias_attr
=
bias_attr
,
name
=
'ctc_fc'
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
,
labels
=
None
):
predicts
=
self
.
fc
(
x
)
return
predicts
ppocr/modeling/heads/rec_seq_encoder.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
class
EncoderWithReshape
(
object
):
def
__init__
(
self
,
params
):
super
(
EncoderWithReshape
,
self
).
__init__
()
def
__call__
(
self
,
inputs
):
sliced_feature
=
layers
.
im2sequence
(
input
=
inputs
,
stride
=
[
1
,
1
],
filter_size
=
[
inputs
.
shape
[
2
],
1
],
name
=
"sliced_feature"
)
return
sliced_feature
class
EncoderWithRNN
(
object
):
def
__init__
(
self
,
params
):
super
(
EncoderWithRNN
,
self
).
__init__
()
self
.
rnn_hidden_size
=
params
[
'SeqRNN'
][
'hidden_size'
]
def
__call__
(
self
,
inputs
):
lstm_list
=
[]
name_prefix
=
"lstm"
rnn_hidden_size
=
self
.
rnn_hidden_size
for
no
in
range
(
1
,
3
):
if
no
==
1
:
is_reverse
=
False
else
:
is_reverse
=
True
name
=
"%s_st1_fc%d"
%
(
name_prefix
,
no
)
fc
=
layers
.
fc
(
input
=
inputs
,
size
=
rnn_hidden_size
*
4
,
param_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_w"
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_b"
),
name
=
name
)
name
=
"%s_st1_out%d"
%
(
name_prefix
,
no
)
lstm
,
_
=
layers
.
dynamic_lstm
(
input
=
fc
,
size
=
rnn_hidden_size
*
4
,
is_reverse
=
is_reverse
,
param_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_w"
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_b"
),
use_peepholes
=
False
)
name
=
"%s_st2_fc%d"
%
(
name_prefix
,
no
)
fc
=
layers
.
fc
(
input
=
lstm
,
size
=
rnn_hidden_size
*
4
,
param_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_w"
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_b"
),
name
=
name
)
name
=
"%s_st2_out%d"
%
(
name_prefix
,
no
)
lstm
,
_
=
layers
.
dynamic_lstm
(
input
=
fc
,
size
=
rnn_hidden_size
*
4
,
is_reverse
=
is_reverse
,
param_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_w"
),
bias_attr
=
fluid
.
ParamAttr
(
name
=
name
+
"_b"
),
use_peepholes
=
False
)
lstm_list
.
append
(
lstm
)
return
lstm_list
class
SequenceEncoder
(
object
):
def
__init__
(
self
,
params
):
super
(
SequenceEncoder
,
self
).
__init__
()
self
.
encoder_type
=
params
[
'encoder_type'
]
self
.
encoder_reshape
=
EncoderWithReshape
(
params
)
if
self
.
encoder_type
==
"rnn"
:
self
.
encoder_rnn
=
EncoderWithRNN
(
params
)
def
__call__
(
self
,
inputs
):
if
self
.
encoder_type
==
"reshape"
:
encoder_features
=
self
.
encoder_reshape
(
inputs
)
elif
self
.
encoder_type
==
"rnn"
:
inputs
=
self
.
encoder_reshape
(
inputs
)
encoder_features
=
self
.
encoder_rnn
(
inputs
)
else
:
assert
False
,
"Unsupport encoder_type:%s"
\
%
self
.
encoder_type
return
encoder_features
ppocr/modeling/heads/rec_srn_all_head.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.param_attr
import
ParamAttr
import
numpy
as
np
from
.self_attention.model
import
wrap_encoder
from
.self_attention.model
import
wrap_encoder_forFeature
gradient_clip
=
10
class
SRNPredict
(
object
):
def
__init__
(
self
,
params
):
super
(
SRNPredict
,
self
).
__init__
()
self
.
char_num
=
params
[
'char_num'
]
self
.
max_length
=
params
[
'max_text_length'
]
self
.
num_heads
=
params
[
'num_heads'
]
self
.
num_encoder_TUs
=
params
[
'num_encoder_TUs'
]
self
.
num_decoder_TUs
=
params
[
'num_decoder_TUs'
]
self
.
hidden_dims
=
params
[
'hidden_dims'
]
def
pvam
(
self
,
inputs
,
others
):
b
,
c
,
h
,
w
=
inputs
.
shape
conv_features
=
fluid
.
layers
.
reshape
(
x
=
inputs
,
shape
=
[
-
1
,
c
,
h
*
w
])
conv_features
=
fluid
.
layers
.
transpose
(
x
=
conv_features
,
perm
=
[
0
,
2
,
1
])
#===== Transformer encoder =====
b
,
t
,
c
=
conv_features
.
shape
encoder_word_pos
=
others
[
"encoder_word_pos"
]
gsrm_word_pos
=
others
[
"gsrm_word_pos"
]
enc_inputs
=
[
conv_features
,
encoder_word_pos
,
None
]
word_features
=
wrap_encoder_forFeature
(
src_vocab_size
=-
1
,
max_length
=
t
,
n_layer
=
self
.
num_encoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs
,
)
fluid
.
clip
.
set_gradient_clip
(
fluid
.
clip
.
GradientClipByValue
(
gradient_clip
))
#===== Parallel Visual Attention Module =====
b
,
t
,
c
=
word_features
.
shape
word_features
=
fluid
.
layers
.
fc
(
word_features
,
c
,
num_flatten_dims
=
2
)
word_features_
=
fluid
.
layers
.
reshape
(
word_features
,
[
-
1
,
1
,
t
,
c
])
word_features_
=
fluid
.
layers
.
expand
(
word_features_
,
[
1
,
self
.
max_length
,
1
,
1
])
word_pos_feature
=
fluid
.
layers
.
embedding
(
gsrm_word_pos
,
[
self
.
max_length
,
c
])
word_pos_
=
fluid
.
layers
.
reshape
(
word_pos_feature
,
[
-
1
,
self
.
max_length
,
1
,
c
])
word_pos_
=
fluid
.
layers
.
expand
(
word_pos_
,
[
1
,
1
,
t
,
1
])
temp
=
fluid
.
layers
.
elementwise_add
(
word_features_
,
word_pos_
,
act
=
'tanh'
)
attention_weight
=
fluid
.
layers
.
fc
(
input
=
temp
,
size
=
1
,
num_flatten_dims
=
3
,
bias_attr
=
False
)
attention_weight
=
fluid
.
layers
.
reshape
(
x
=
attention_weight
,
shape
=
[
-
1
,
self
.
max_length
,
t
])
attention_weight
=
fluid
.
layers
.
softmax
(
input
=
attention_weight
,
axis
=-
1
)
pvam_features
=
fluid
.
layers
.
matmul
(
attention_weight
,
word_features
)
#[b, max_length, c]
return
pvam_features
def
gsrm
(
self
,
pvam_features
,
others
):
#===== GSRM Visual-to-semantic embedding block =====
b
,
t
,
c
=
pvam_features
.
shape
word_out
=
fluid
.
layers
.
fc
(
input
=
fluid
.
layers
.
reshape
(
pvam_features
,
[
-
1
,
c
]),
size
=
self
.
char_num
,
act
=
"softmax"
)
#word_out.stop_gradient = True
word_ids
=
fluid
.
layers
.
argmax
(
word_out
,
axis
=
1
)
word_ids
.
stop_gradient
=
True
word_ids
=
fluid
.
layers
.
reshape
(
x
=
word_ids
,
shape
=
[
-
1
,
t
,
1
])
#===== GSRM Semantic reasoning block =====
"""
This module is achieved through bi-transformers,
ngram_feature1 is the froward one, ngram_fetaure2 is the backward one
"""
pad_idx
=
self
.
char_num
gsrm_word_pos
=
others
[
"gsrm_word_pos"
]
gsrm_slf_attn_bias1
=
others
[
"gsrm_slf_attn_bias1"
]
gsrm_slf_attn_bias2
=
others
[
"gsrm_slf_attn_bias2"
]
def
prepare_bi
(
word_ids
):
"""
prepare bi for gsrm
word1 for forward; word2 for backward
"""
word1
=
fluid
.
layers
.
cast
(
word_ids
,
"float32"
)
word1
=
fluid
.
layers
.
pad
(
word1
,
[
0
,
0
,
1
,
0
,
0
,
0
],
pad_value
=
1.0
*
pad_idx
)
word1
=
fluid
.
layers
.
cast
(
word1
,
"int64"
)
word1
=
word1
[:,
:
-
1
,
:]
word2
=
word_ids
return
word1
,
word2
word1
,
word2
=
prepare_bi
(
word_ids
)
word1
.
stop_gradient
=
True
word2
.
stop_gradient
=
True
enc_inputs_1
=
[
word1
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
]
enc_inputs_2
=
[
word2
,
gsrm_word_pos
,
gsrm_slf_attn_bias2
]
gsrm_feature1
=
wrap_encoder
(
src_vocab_size
=
self
.
char_num
+
1
,
max_length
=
self
.
max_length
,
n_layer
=
self
.
num_decoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs_1
,
)
gsrm_feature2
=
wrap_encoder
(
src_vocab_size
=
self
.
char_num
+
1
,
max_length
=
self
.
max_length
,
n_layer
=
self
.
num_decoder_TUs
,
n_head
=
self
.
num_heads
,
d_key
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_value
=
int
(
self
.
hidden_dims
/
self
.
num_heads
),
d_model
=
self
.
hidden_dims
,
d_inner_hid
=
self
.
hidden_dims
,
prepostprocess_dropout
=
0.1
,
attention_dropout
=
0.1
,
relu_dropout
=
0.1
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
,
weight_sharing
=
True
,
enc_inputs
=
enc_inputs_2
,
)
gsrm_feature2
=
fluid
.
layers
.
pad
(
gsrm_feature2
,
[
0
,
0
,
0
,
1
,
0
,
0
],
pad_value
=
0.
)
gsrm_feature2
=
gsrm_feature2
[:,
1
:,
]
gsrm_features
=
gsrm_feature1
+
gsrm_feature2
b
,
t
,
c
=
gsrm_features
.
shape
gsrm_out
=
fluid
.
layers
.
matmul
(
x
=
gsrm_features
,
y
=
fluid
.
default_main_program
().
global_block
().
var
(
"src_word_emb_table"
),
transpose_y
=
True
)
b
,
t
,
c
=
gsrm_out
.
shape
gsrm_out
=
fluid
.
layers
.
softmax
(
input
=
fluid
.
layers
.
reshape
(
gsrm_out
,
[
-
1
,
c
]))
return
gsrm_features
,
word_out
,
gsrm_out
def
vsfd
(
self
,
pvam_features
,
gsrm_features
):
#===== Visual-Semantic Fusion Decoder Module =====
b
,
t
,
c1
=
pvam_features
.
shape
b
,
t
,
c2
=
gsrm_features
.
shape
combine_features_
=
fluid
.
layers
.
concat
(
[
pvam_features
,
gsrm_features
],
axis
=
2
)
img_comb_features_
=
fluid
.
layers
.
reshape
(
x
=
combine_features_
,
shape
=
[
-
1
,
c1
+
c2
])
img_comb_features_map
=
fluid
.
layers
.
fc
(
input
=
img_comb_features_
,
size
=
c1
,
act
=
"sigmoid"
)
img_comb_features_map
=
fluid
.
layers
.
reshape
(
x
=
img_comb_features_map
,
shape
=
[
-
1
,
t
,
c1
])
combine_features
=
img_comb_features_map
*
pvam_features
+
(
1.0
-
img_comb_features_map
)
*
gsrm_features
img_comb_features
=
fluid
.
layers
.
reshape
(
x
=
combine_features
,
shape
=
[
-
1
,
c1
])
fc_out
=
fluid
.
layers
.
fc
(
input
=
img_comb_features
,
size
=
self
.
char_num
,
act
=
"softmax"
)
return
fc_out
def
__call__
(
self
,
inputs
,
others
,
mode
=
None
):
pvam_features
=
self
.
pvam
(
inputs
,
others
)
gsrm_features
,
word_out
,
gsrm_out
=
self
.
gsrm
(
pvam_features
,
others
)
final_out
=
self
.
vsfd
(
pvam_features
,
gsrm_features
)
_
,
decoded_out
=
fluid
.
layers
.
topk
(
input
=
final_out
,
k
=
1
)
predicts
=
{
'predict'
:
final_out
,
'decoded_out'
:
decoded_out
,
'word_out'
:
word_out
,
'gsrm_out'
:
gsrm_out
}
return
predicts
ppocr/modeling/heads/self_attention/__init__.py
deleted
100644 → 0
View file @
10f7e519
ppocr/modeling/heads/self_attention/model.py
deleted
100644 → 0
View file @
10f7e519
from
functools
import
partial
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
encoder_data_input_fields
=
(
"src_word"
,
"src_pos"
,
"src_slf_attn_bias"
,
)
def
wrap_layer_with_block
(
layer
,
block_idx
):
"""
Make layer define support indicating block, by which we can add layers
to other blocks within current block. This will make it easy to define
cache among while loop.
"""
class
BlockGuard
(
object
):
"""
BlockGuard class.
BlockGuard class is used to switch to the given block in a program by
using the Python `with` keyword.
"""
def
__init__
(
self
,
block_idx
=
None
,
main_program
=
None
):
self
.
main_program
=
fluid
.
default_main_program
(
)
if
main_program
is
None
else
main_program
self
.
old_block_idx
=
self
.
main_program
.
current_block
().
idx
self
.
new_block_idx
=
block_idx
def
__enter__
(
self
):
self
.
main_program
.
current_block_idx
=
self
.
new_block_idx
def
__exit__
(
self
,
exc_type
,
exc_val
,
exc_tb
):
self
.
main_program
.
current_block_idx
=
self
.
old_block_idx
if
exc_type
is
not
None
:
return
False
# re-raise exception
return
True
def
layer_wrapper
(
*
args
,
**
kwargs
):
with
BlockGuard
(
block_idx
):
return
layer
(
*
args
,
**
kwargs
)
return
layer_wrapper
def
multi_head_attention
(
queries
,
keys
,
values
,
attn_bias
,
d_key
,
d_value
,
d_model
,
n_head
=
1
,
dropout_rate
=
0.
,
cache
=
None
,
gather_idx
=
None
,
static_kv
=
False
):
"""
Multi-Head Attention. Note that attn_bias is added to the logit before
computing softmax activiation to mask certain selected positions so that
they will not considered in attention weights.
"""
keys
=
queries
if
keys
is
None
else
keys
values
=
keys
if
values
is
None
else
values
if
not
(
len
(
queries
.
shape
)
==
len
(
keys
.
shape
)
==
len
(
values
.
shape
)
==
3
):
raise
ValueError
(
"Inputs: quries, keys and values should all be 3-D tensors."
)
def
__compute_qkv
(
queries
,
keys
,
values
,
n_head
,
d_key
,
d_value
):
"""
Add linear projection to queries, keys, and values.
"""
q
=
layers
.
fc
(
input
=
queries
,
size
=
d_key
*
n_head
,
bias_attr
=
False
,
num_flatten_dims
=
2
)
# For encoder-decoder attention in inference, insert the ops and vars
# into global block to use as cache among beam search.
fc_layer
=
wrap_layer_with_block
(
layers
.
fc
,
fluid
.
default_main_program
().
current_block
()
.
parent_idx
)
if
cache
is
not
None
and
static_kv
else
layers
.
fc
k
=
fc_layer
(
input
=
keys
,
size
=
d_key
*
n_head
,
bias_attr
=
False
,
num_flatten_dims
=
2
)
v
=
fc_layer
(
input
=
values
,
size
=
d_value
*
n_head
,
bias_attr
=
False
,
num_flatten_dims
=
2
)
return
q
,
k
,
v
def
__split_heads_qkv
(
queries
,
keys
,
values
,
n_head
,
d_key
,
d_value
):
"""
Reshape input tensors at the last dimension to split multi-heads
and then transpose. Specifically, transform the input tensor with shape
[bs, max_sequence_length, n_head * hidden_dim] to the output tensor
with shape [bs, n_head, max_sequence_length, hidden_dim].
"""
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
reshaped_q
=
layers
.
reshape
(
x
=
queries
,
shape
=
[
0
,
0
,
n_head
,
d_key
],
inplace
=
True
)
# permuate the dimensions into:
# [batch_size, n_head, max_sequence_len, hidden_size_per_head]
q
=
layers
.
transpose
(
x
=
reshaped_q
,
perm
=
[
0
,
2
,
1
,
3
])
# For encoder-decoder attention in inference, insert the ops and vars
# into global block to use as cache among beam search.
reshape_layer
=
wrap_layer_with_block
(
layers
.
reshape
,
fluid
.
default_main_program
().
current_block
()
.
parent_idx
)
if
cache
is
not
None
and
static_kv
else
layers
.
reshape
transpose_layer
=
wrap_layer_with_block
(
layers
.
transpose
,
fluid
.
default_main_program
().
current_block
().
parent_idx
)
if
cache
is
not
None
and
static_kv
else
layers
.
transpose
reshaped_k
=
reshape_layer
(
x
=
keys
,
shape
=
[
0
,
0
,
n_head
,
d_key
],
inplace
=
True
)
k
=
transpose_layer
(
x
=
reshaped_k
,
perm
=
[
0
,
2
,
1
,
3
])
reshaped_v
=
reshape_layer
(
x
=
values
,
shape
=
[
0
,
0
,
n_head
,
d_value
],
inplace
=
True
)
v
=
transpose_layer
(
x
=
reshaped_v
,
perm
=
[
0
,
2
,
1
,
3
])
if
cache
is
not
None
:
# only for faster inference
if
static_kv
:
# For encoder-decoder attention in inference
cache_k
,
cache_v
=
cache
[
"static_k"
],
cache
[
"static_v"
]
# To init the static_k and static_v in cache.
# Maybe we can use condition_op(if_else) to do these at the first
# step in while loop to replace these, however it might be less
# efficient.
static_cache_init
=
wrap_layer_with_block
(
layers
.
assign
,
fluid
.
default_main_program
().
current_block
().
parent_idx
)
static_cache_init
(
k
,
cache_k
)
static_cache_init
(
v
,
cache_v
)
else
:
# For decoder self-attention in inference
cache_k
,
cache_v
=
cache
[
"k"
],
cache
[
"v"
]
# gather cell states corresponding to selected parent
select_k
=
layers
.
gather
(
cache_k
,
index
=
gather_idx
)
select_v
=
layers
.
gather
(
cache_v
,
index
=
gather_idx
)
if
not
static_kv
:
# For self attention in inference, use cache and concat time steps.
select_k
=
layers
.
concat
([
select_k
,
k
],
axis
=
2
)
select_v
=
layers
.
concat
([
select_v
,
v
],
axis
=
2
)
# update cell states(caches) cached in global block
layers
.
assign
(
select_k
,
cache_k
)
layers
.
assign
(
select_v
,
cache_v
)
return
q
,
select_k
,
select_v
return
q
,
k
,
v
def
__combine_heads
(
x
):
"""
Transpose and then reshape the last two dimensions of inpunt tensor x
so that it becomes one dimension, which is reverse to __split_heads.
"""
if
len
(
x
.
shape
)
!=
4
:
raise
ValueError
(
"Input(x) should be a 4-D Tensor."
)
trans_x
=
layers
.
transpose
(
x
,
perm
=
[
0
,
2
,
1
,
3
])
# The value 0 in shape attr means copying the corresponding dimension
# size of the input as the output dimension size.
return
layers
.
reshape
(
x
=
trans_x
,
shape
=
[
0
,
0
,
trans_x
.
shape
[
2
]
*
trans_x
.
shape
[
3
]],
inplace
=
True
)
def
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_key
,
dropout_rate
):
"""
Scaled Dot-Product Attention
"""
# print(q)
# print(k)
product
=
layers
.
matmul
(
x
=
q
,
y
=
k
,
transpose_y
=
True
,
alpha
=
d_key
**-
0.5
)
if
attn_bias
:
product
+=
attn_bias
weights
=
layers
.
softmax
(
product
)
if
dropout_rate
:
weights
=
layers
.
dropout
(
weights
,
dropout_prob
=
dropout_rate
,
seed
=
None
,
is_test
=
False
)
out
=
layers
.
matmul
(
weights
,
v
)
return
out
q
,
k
,
v
=
__compute_qkv
(
queries
,
keys
,
values
,
n_head
,
d_key
,
d_value
)
q
,
k
,
v
=
__split_heads_qkv
(
q
,
k
,
v
,
n_head
,
d_key
,
d_value
)
ctx_multiheads
=
scaled_dot_product_attention
(
q
,
k
,
v
,
attn_bias
,
d_model
,
dropout_rate
)
out
=
__combine_heads
(
ctx_multiheads
)
# Project back to the model size.
proj_out
=
layers
.
fc
(
input
=
out
,
size
=
d_model
,
bias_attr
=
False
,
num_flatten_dims
=
2
)
return
proj_out
def
positionwise_feed_forward
(
x
,
d_inner_hid
,
d_hid
,
dropout_rate
):
"""
Position-wise Feed-Forward Networks.
This module consists of two linear transformations with a ReLU activation
in between, which is applied to each position separately and identically.
"""
hidden
=
layers
.
fc
(
input
=
x
,
size
=
d_inner_hid
,
num_flatten_dims
=
2
,
act
=
"relu"
)
if
dropout_rate
:
hidden
=
layers
.
dropout
(
hidden
,
dropout_prob
=
dropout_rate
,
seed
=
None
,
is_test
=
False
)
out
=
layers
.
fc
(
input
=
hidden
,
size
=
d_hid
,
num_flatten_dims
=
2
)
return
out
def
pre_post_process_layer
(
prev_out
,
out
,
process_cmd
,
dropout_rate
=
0.
):
"""
Add residual connection, layer normalization and droput to the out tensor
optionally according to the value of process_cmd.
This will be used before or after multi-head attention and position-wise
feed-forward networks.
"""
for
cmd
in
process_cmd
:
if
cmd
==
"a"
:
# add residual connection
out
=
out
+
prev_out
if
prev_out
else
out
elif
cmd
==
"n"
:
# add layer normalization
out
=
layers
.
layer_norm
(
out
,
begin_norm_axis
=
len
(
out
.
shape
)
-
1
,
param_attr
=
fluid
.
initializer
.
Constant
(
1.
),
bias_attr
=
fluid
.
initializer
.
Constant
(
0.
))
elif
cmd
==
"d"
:
# add dropout
if
dropout_rate
:
out
=
layers
.
dropout
(
out
,
dropout_prob
=
dropout_rate
,
seed
=
None
,
is_test
=
False
)
return
out
pre_process_layer
=
partial
(
pre_post_process_layer
,
None
)
post_process_layer
=
pre_post_process_layer
def
prepare_encoder
(
src_word
,
# [b,t,c]
src_pos
,
src_vocab_size
,
src_emb_dim
,
src_max_len
,
dropout_rate
=
0.
,
bos_idx
=
0
,
word_emb_param_name
=
None
,
pos_enc_param_name
=
None
):
"""Add word embeddings and position encodings.
The output tensor has a shape of:
[batch_size, max_src_length_in_batch, d_model].
This module is used at the bottom of the encoder stacks.
"""
src_word_emb
=
src_word
src_word_emb
=
layers
.
cast
(
src_word_emb
,
'float32'
)
src_word_emb
=
layers
.
scale
(
x
=
src_word_emb
,
scale
=
src_emb_dim
**
0.5
)
src_pos_enc
=
layers
.
embedding
(
src_pos
,
size
=
[
src_max_len
,
src_emb_dim
],
param_attr
=
fluid
.
ParamAttr
(
name
=
pos_enc_param_name
,
trainable
=
False
))
src_pos_enc
.
stop_gradient
=
True
enc_input
=
src_word_emb
+
src_pos_enc
return
layers
.
dropout
(
enc_input
,
dropout_prob
=
dropout_rate
,
seed
=
None
,
is_test
=
False
)
if
dropout_rate
else
enc_input
def
prepare_decoder
(
src_word
,
src_pos
,
src_vocab_size
,
src_emb_dim
,
src_max_len
,
dropout_rate
=
0.
,
bos_idx
=
0
,
word_emb_param_name
=
None
,
pos_enc_param_name
=
None
):
"""Add word embeddings and position encodings.
The output tensor has a shape of:
[batch_size, max_src_length_in_batch, d_model].
This module is used at the bottom of the encoder stacks.
"""
src_word_emb
=
layers
.
embedding
(
src_word
,
size
=
[
src_vocab_size
,
src_emb_dim
],
padding_idx
=
bos_idx
,
# set embedding of bos to 0
param_attr
=
fluid
.
ParamAttr
(
name
=
word_emb_param_name
,
initializer
=
fluid
.
initializer
.
Normal
(
0.
,
src_emb_dim
**-
0.5
)))
src_word_emb
=
layers
.
scale
(
x
=
src_word_emb
,
scale
=
src_emb_dim
**
0.5
)
src_pos_enc
=
layers
.
embedding
(
src_pos
,
size
=
[
src_max_len
,
src_emb_dim
],
param_attr
=
fluid
.
ParamAttr
(
name
=
pos_enc_param_name
,
trainable
=
False
))
src_pos_enc
.
stop_gradient
=
True
enc_input
=
src_word_emb
+
src_pos_enc
return
layers
.
dropout
(
enc_input
,
dropout_prob
=
dropout_rate
,
seed
=
None
,
is_test
=
False
)
if
dropout_rate
else
enc_input
def
encoder_layer
(
enc_input
,
attn_bias
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
):
"""The encoder layers that can be stacked to form a deep encoder.
This module consits of a multi-head (self) attention followed by
position-wise feed-forward networks and both the two components companied
with the post_process_layer to add residual connection, layer normalization
and droput.
"""
attn_output
=
multi_head_attention
(
pre_process_layer
(
enc_input
,
preprocess_cmd
,
prepostprocess_dropout
),
None
,
None
,
attn_bias
,
d_key
,
d_value
,
d_model
,
n_head
,
attention_dropout
)
attn_output
=
post_process_layer
(
enc_input
,
attn_output
,
postprocess_cmd
,
prepostprocess_dropout
)
ffd_output
=
positionwise_feed_forward
(
pre_process_layer
(
attn_output
,
preprocess_cmd
,
prepostprocess_dropout
),
d_inner_hid
,
d_model
,
relu_dropout
)
return
post_process_layer
(
attn_output
,
ffd_output
,
postprocess_cmd
,
prepostprocess_dropout
)
def
encoder
(
enc_input
,
attn_bias
,
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
=
"n"
,
postprocess_cmd
=
"da"
):
"""
The encoder is composed of a stack of identical layers returned by calling
encoder_layer.
"""
for
i
in
range
(
n_layer
):
enc_output
=
encoder_layer
(
enc_input
,
attn_bias
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
,
)
enc_input
=
enc_output
enc_output
=
pre_process_layer
(
enc_output
,
preprocess_cmd
,
prepostprocess_dropout
)
return
enc_output
def
wrap_encoder_forFeature
(
src_vocab_size
,
max_length
,
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
,
weight_sharing
,
enc_inputs
=
None
,
bos_idx
=
0
):
"""
The wrapper assembles together all needed layers for the encoder.
img, src_pos, src_slf_attn_bias = enc_inputs
img
"""
conv_features
,
src_pos
,
src_slf_attn_bias
=
enc_inputs
#
b
,
t
,
c
=
conv_features
.
shape
enc_input
=
prepare_encoder
(
conv_features
,
src_pos
,
src_vocab_size
,
d_model
,
max_length
,
prepostprocess_dropout
,
bos_idx
=
bos_idx
,
word_emb_param_name
=
"src_word_emb_table"
)
enc_output
=
encoder
(
enc_input
,
src_slf_attn_bias
,
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
,
)
return
enc_output
def
wrap_encoder
(
src_vocab_size
,
max_length
,
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
,
weight_sharing
,
enc_inputs
=
None
,
bos_idx
=
0
):
"""
The wrapper assembles together all needed layers for the encoder.
img, src_pos, src_slf_attn_bias = enc_inputs
img
"""
src_word
,
src_pos
,
src_slf_attn_bias
=
enc_inputs
#
enc_input
=
prepare_decoder
(
src_word
,
src_pos
,
src_vocab_size
,
d_model
,
max_length
,
prepostprocess_dropout
,
bos_idx
=
bos_idx
,
word_emb_param_name
=
"src_word_emb_table"
)
enc_output
=
encoder
(
enc_input
,
src_slf_attn_bias
,
n_layer
,
n_head
,
d_key
,
d_value
,
d_model
,
d_inner_hid
,
prepostprocess_dropout
,
attention_dropout
,
relu_dropout
,
preprocess_cmd
,
postprocess_cmd
,
)
return
enc_output
ppocr/modeling/losses/__init__.py
View file @
aad3093a
...
...
@@ -11,3 +11,22 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
copy
def
build_loss
(
config
):
# det loss
from
.det_db_loss
import
DBLoss
# rec loss
from
.rec_ctc_loss
import
CTCLoss
support_dict
=
[
'DBLoss'
,
'CTCLoss'
]
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'loss only support {}'
.
format
(
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
ppocr/modeling/losses/det_basic_loss.py
View file @
aad3093a
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
Licensed under the Apache License, Version 2.0 (the "License");
#
you may not use this file except in compliance with the License.
#
You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
Unless required by applicable law or agreed to in writing, software
#
distributed under the License is distributed on an "AS IS" BASIS,
#
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
See the License for the specific language governing permissions and
#
limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
...
...
@@ -18,99 +18,189 @@ from __future__ import print_function
import
numpy
as
np
import
paddle.fluid
as
fluid
def
BalanceLoss
(
pred
,
gt
,
mask
,
balance_loss
=
True
,
main_loss_type
=
"DiceLoss"
,
negative_ratio
=
3
,
return_origin
=
False
,
eps
=
1e-6
):
"""
The BalanceLoss for Differentiable Binarization text detection
args:
pred (variable): predicted feature maps.
gt (variable): ground truth feature maps.
mask (variable): masked maps.
balance_loss (bool): whether balance loss or not, default is True
main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
negative_ratio (int|float): float, default is 3.
return_origin (bool): whether return unbalanced loss or not, default is False.
eps (float): default is 1e-6.
return: (variable) balanced loss
"""
positive
=
gt
*
mask
negative
=
(
1
-
gt
)
*
mask
positive_count
=
fluid
.
layers
.
reduce_sum
(
positive
)
positive_count_int
=
fluid
.
layers
.
cast
(
positive_count
,
dtype
=
np
.
int32
)
negative_count
=
min
(
fluid
.
layers
.
reduce_sum
(
negative
),
positive_count
*
negative_ratio
)
negative_count_int
=
fluid
.
layers
.
cast
(
negative_count
,
dtype
=
np
.
int32
)
if
main_loss_type
==
"CrossEntropy"
:
loss
=
fluid
.
layers
.
cross_entropy
(
input
=
pred
,
label
=
gt
,
soft_label
=
True
)
loss
=
fluid
.
layers
.
reduce_mean
(
loss
)
elif
main_loss_type
==
"Euclidean"
:
loss
=
fluid
.
layers
.
square
(
pred
-
gt
)
loss
=
fluid
.
layers
.
reduce_mean
(
loss
)
elif
main_loss_type
==
"DiceLoss"
:
loss
=
DiceLoss
(
pred
,
gt
,
mask
)
elif
main_loss_type
==
"BCELoss"
:
loss
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
pred
,
label
=
gt
)
elif
main_loss_type
==
"MaskL1Loss"
:
loss
=
MaskL1Loss
(
pred
,
gt
,
mask
)
else
:
loss_type
=
[
'CrossEntropy'
,
'DiceLoss'
,
'Euclidean'
,
'BCELoss'
,
'MaskL1Loss'
]
raise
Exception
(
"main_loss_type in BalanceLoss() can only be one of {}"
.
format
(
loss_type
))
if
not
balance_loss
:
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
class
BalanceLoss
(
nn
.
Layer
):
def
__init__
(
self
,
balance_loss
=
True
,
main_loss_type
=
'DiceLoss'
,
negative_ratio
=
3
,
return_origin
=
False
,
eps
=
1e-6
,
**
kwargs
):
"""
The BalanceLoss for Differentiable Binarization text detection
args:
balance_loss (bool): whether balance loss or not, default is True
main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
'Euclidean','BCELoss', 'MaskL1Loss'], default is 'DiceLoss'.
negative_ratio (int|float): float, default is 3.
return_origin (bool): whether return unbalanced loss or not, default is False.
eps (float): default is 1e-6.
"""
super
(
BalanceLoss
,
self
).
__init__
()
self
.
balance_loss
=
balance_loss
self
.
main_loss_type
=
main_loss_type
self
.
negative_ratio
=
negative_ratio
self
.
main_loss_type
=
main_loss_type
self
.
return_origin
=
return_origin
self
.
eps
=
eps
if
self
.
main_loss_type
==
"CrossEntropy"
:
self
.
loss
=
nn
.
CrossEntropyLoss
()
elif
self
.
main_loss_type
==
"Euclidean"
:
self
.
loss
=
nn
.
MSELoss
()
elif
self
.
main_loss_type
==
"DiceLoss"
:
self
.
loss
=
DiceLoss
(
self
.
eps
)
elif
self
.
main_loss_type
==
"BCELoss"
:
self
.
loss
=
BCELoss
(
reduction
=
'none'
)
elif
self
.
main_loss_type
==
"MaskL1Loss"
:
self
.
loss
=
MaskL1Loss
(
self
.
eps
)
else
:
loss_type
=
[
'CrossEntropy'
,
'DiceLoss'
,
'Euclidean'
,
'BCELoss'
,
'MaskL1Loss'
]
raise
Exception
(
"main_loss_type in BalanceLoss() can only be one of {}"
.
format
(
loss_type
))
def
forward
(
self
,
pred
,
gt
,
mask
=
None
):
"""
The BalanceLoss for Differentiable Binarization text detection
args:
pred (variable): predicted feature maps.
gt (variable): ground truth feature maps.
mask (variable): masked maps.
return: (variable) balanced loss
"""
# if self.main_loss_type in ['DiceLoss']:
# # For the loss that returns to scalar value, perform ohem on the mask
# mask = ohem_batch(pred, gt, mask, self.negative_ratio)
# loss = self.loss(pred, gt, mask)
# return loss
positive
=
gt
*
mask
negative
=
(
1
-
gt
)
*
mask
positive_count
=
int
(
positive
.
sum
())
negative_count
=
int
(
min
(
negative
.
sum
(),
positive_count
*
self
.
negative_ratio
))
loss
=
self
.
loss
(
pred
,
gt
,
mask
=
mask
)
if
not
self
.
balance_loss
:
return
loss
positive_loss
=
positive
*
loss
negative_loss
=
negative
*
loss
negative_loss
=
paddle
.
reshape
(
negative_loss
,
shape
=
[
-
1
])
if
negative_count
>
0
:
sort_loss
=
negative_loss
.
sort
(
descending
=
True
)
negative_loss
=
sort_loss
[:
negative_count
]
# negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int)
balance_loss
=
(
positive_loss
.
sum
()
+
negative_loss
.
sum
())
/
(
positive_count
+
negative_count
+
self
.
eps
)
else
:
balance_loss
=
positive_loss
.
sum
()
/
(
positive_count
+
self
.
eps
)
if
self
.
return_origin
:
return
balance_loss
,
loss
return
balance_loss
class
DiceLoss
(
nn
.
Layer
):
def
__init__
(
self
,
eps
=
1e-6
):
super
(
DiceLoss
,
self
).
__init__
()
self
.
eps
=
eps
def
forward
(
self
,
pred
,
gt
,
mask
,
weights
=
None
):
"""
DiceLoss function.
"""
assert
pred
.
shape
==
gt
.
shape
assert
pred
.
shape
==
mask
.
shape
if
weights
is
not
None
:
assert
weights
.
shape
==
mask
.
shape
mask
=
weights
*
mask
intersection
=
paddle
.
sum
(
pred
*
gt
*
mask
)
union
=
paddle
.
sum
(
pred
*
mask
)
+
paddle
.
sum
(
gt
*
mask
)
+
self
.
eps
loss
=
1
-
2.0
*
intersection
/
union
assert
loss
<=
1
return
loss
positive_loss
=
positive
*
loss
negative_loss
=
negative
*
loss
negative_loss
=
fluid
.
layers
.
reshape
(
negative_loss
,
shape
=
[
-
1
])
negative_loss
,
_
=
fluid
.
layers
.
topk
(
negative_loss
,
k
=
negative_count_int
)
balance_loss
=
(
fluid
.
layers
.
reduce_sum
(
positive_loss
)
+
fluid
.
layers
.
reduce_sum
(
negative_loss
))
/
(
positive_count
+
negative_count
+
eps
)
if
return_origin
:
return
balance_loss
,
loss
return
balance_loss
def
DiceLoss
(
pred
,
gt
,
mask
,
weights
=
None
,
eps
=
1e-6
):
"""
DiceLoss function.
"""
assert
pred
.
shape
==
gt
.
shape
assert
pred
.
shape
==
mask
.
shape
if
weights
is
not
None
:
assert
weights
.
shape
==
mask
.
shape
mask
=
weights
*
mask
intersection
=
fluid
.
layers
.
reduce_sum
(
pred
*
gt
*
mask
)
union
=
fluid
.
layers
.
reduce_sum
(
pred
*
mask
)
+
fluid
.
layers
.
reduce_sum
(
gt
*
mask
)
+
eps
loss
=
1
-
2.0
*
intersection
/
union
assert
loss
<=
1
return
loss
def
MaskL1Loss
(
pred
,
gt
,
mask
,
eps
=
1e-6
):
"""
Mask L1 Loss
"""
loss
=
fluid
.
layers
.
reduce_sum
((
fluid
.
layers
.
abs
(
pred
-
gt
)
*
mask
))
/
(
fluid
.
layers
.
reduce_sum
(
mask
)
+
eps
)
loss
=
fluid
.
layers
.
reduce_mean
(
loss
)
return
loss
class
MaskL1Loss
(
nn
.
Layer
):
def
__init__
(
self
,
eps
=
1e-6
):
super
(
MaskL1Loss
,
self
).
__init__
()
self
.
eps
=
eps
def
forward
(
self
,
pred
,
gt
,
mask
):
"""
Mask L1 Loss
"""
loss
=
(
paddle
.
abs
(
pred
-
gt
)
*
mask
).
sum
()
/
(
mask
.
sum
()
+
self
.
eps
)
loss
=
paddle
.
mean
(
loss
)
return
loss
class
BCELoss
(
nn
.
Layer
):
def
__init__
(
self
,
reduction
=
'mean'
):
super
(
BCELoss
,
self
).
__init__
()
self
.
reduction
=
reduction
def
forward
(
self
,
input
,
label
,
mask
=
None
,
weight
=
None
,
name
=
None
):
loss
=
F
.
binary_cross_entropy
(
input
,
label
,
reduction
=
self
.
reduction
)
return
loss
def
ohem_single
(
score
,
gt_text
,
training_mask
,
ohem_ratio
):
pos_num
=
(
int
)(
np
.
sum
(
gt_text
>
0.5
))
-
(
int
)(
np
.
sum
((
gt_text
>
0.5
)
&
(
training_mask
<=
0.5
)))
if
pos_num
==
0
:
# selected_mask = gt_text.copy() * 0 # may be not good
selected_mask
=
training_mask
selected_mask
=
selected_mask
.
reshape
(
1
,
selected_mask
.
shape
[
0
],
selected_mask
.
shape
[
1
]).
astype
(
'float32'
)
return
selected_mask
neg_num
=
(
int
)(
np
.
sum
(
gt_text
<=
0.5
))
neg_num
=
(
int
)(
min
(
pos_num
*
ohem_ratio
,
neg_num
))
if
neg_num
==
0
:
selected_mask
=
training_mask
selected_mask
=
selected_mask
.
reshape
(
1
,
selected_mask
.
shape
[
0
],
selected_mask
.
shape
[
1
]).
astype
(
'float32'
)
return
selected_mask
neg_score
=
score
[
gt_text
<=
0.5
]
# 将负样本得分从高到低排序
neg_score_sorted
=
np
.
sort
(
-
neg_score
)
threshold
=
-
neg_score_sorted
[
neg_num
-
1
]
# 选出 得分高的 负样本 和正样本 的 mask
selected_mask
=
((
score
>=
threshold
)
|
(
gt_text
>
0.5
))
&
(
training_mask
>
0.5
)
selected_mask
=
selected_mask
.
reshape
(
1
,
selected_mask
.
shape
[
0
],
selected_mask
.
shape
[
1
]).
astype
(
'float32'
)
return
selected_mask
def
ohem_batch
(
scores
,
gt_texts
,
training_masks
,
ohem_ratio
):
scores
=
scores
.
numpy
()
gt_texts
=
gt_texts
.
numpy
()
training_masks
=
training_masks
.
numpy
()
selected_masks
=
[]
for
i
in
range
(
scores
.
shape
[
0
]):
selected_masks
.
append
(
ohem_single
(
scores
[
i
,
:,
:],
gt_texts
[
i
,
:,
:],
training_masks
[
i
,
:,
:],
ohem_ratio
))
selected_masks
=
np
.
concatenate
(
selected_masks
,
0
)
selected_masks
=
paddle
.
to_variable
(
selected_masks
)
return
selected_masks
ppocr/modeling/losses/det_db_loss.py
View file @
aad3093a
#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
Licensed under the Apache License, Version 2.0 (the "License");
#
you may not use this file except in compliance with the License.
#
You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
Unless required by applicable law or agreed to in writing, software
#
distributed under the License is distributed on an "AS IS" BASIS,
#
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#
See the License for the specific language governing permissions and
#
limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
from
.det_basic_loss
import
BalanceLoss
,
MaskL1Loss
,
DiceLoss
class
DBLoss
(
object
):
class
DBLoss
(
nn
.
Layer
):
"""
Differentiable Binarization (DB) Loss Function
args:
param (dict): the super paramter for DB Loss
"""
def
__init__
(
self
,
params
):
def
__init__
(
self
,
balance_loss
=
True
,
main_loss_type
=
'DiceLoss'
,
alpha
=
5
,
beta
=
10
,
ohem_ratio
=
3
,
eps
=
1e-6
,
**
kwargs
):
super
(
DBLoss
,
self
).
__init__
()
self
.
balance_loss
=
params
[
'balance_loss'
]
self
.
main_loss_type
=
params
[
'main_loss_type'
]
self
.
alpha
=
params
[
'alpha'
]
self
.
beta
=
params
[
'beta'
]
self
.
ohem_ratio
=
params
[
'ohem_ratio'
]
self
.
alpha
=
alpha
self
.
beta
=
beta
self
.
dice_loss
=
DiceLoss
(
eps
=
eps
)
self
.
l1_loss
=
MaskL1Loss
(
eps
=
eps
)
self
.
bce_loss
=
BalanceLoss
(
balance_loss
=
balance_loss
,
main_loss_type
=
main_loss_type
,
negative_ratio
=
ohem_ratio
)
def
__call__
(
self
,
predicts
,
labels
):
label_shrink_map
=
labels
[
'shrink_map'
]
label_shrink_mask
=
labels
[
'shrink_mask'
]
label_threshold_map
=
labels
[
'threshold_map'
]
label_threshold_mask
=
labels
[
'threshold_mask'
]
pred
=
predicts
[
'maps'
]
shrink_maps
=
pred
[:,
0
,
:,
:]
threshold_maps
=
pred
[:,
1
,
:,
:]
binary_maps
=
pred
[:,
2
,
:,
:]
def
forward
(
self
,
predicts
,
labels
):
label_threshold_map
,
label_threshold_mask
,
label_shrink_map
,
label_shrink_mask
=
labels
[
1
:]
shrink_maps
=
predicts
[:,
0
,
:,
:]
threshold_maps
=
predicts
[:,
1
,
:,
:]
binary_maps
=
predicts
[:,
2
,
:,
:]
loss_shrink_maps
=
BalanceLoss
(
shrink_maps
,
label_shrink_map
,
label_shrink_mask
,
balance_loss
=
self
.
balance_loss
,
main_loss_type
=
self
.
main_loss_type
,
negative_ratio
=
self
.
ohem_ratio
)
loss_threshold_maps
=
MaskL1Loss
(
threshold_maps
,
label_threshold_map
,
label_threshold_mask
)
loss_binary_maps
=
DiceLoss
(
binary_maps
,
label_shrink_map
,
label_shrink_mask
)
loss_shrink_maps
=
self
.
bce_loss
(
shrink_maps
,
label_shrink_map
,
label_shrink_mask
)
loss_threshold_maps
=
self
.
l1_loss
(
threshold_maps
,
label_threshold_map
,
label_threshold_mask
)
loss_binary_maps
=
self
.
dice_loss
(
binary_maps
,
label_shrink_map
,
label_shrink_mask
)
loss_shrink_maps
=
self
.
alpha
*
loss_shrink_maps
loss_threshold_maps
=
self
.
beta
*
loss_threshold_maps
loss_all
=
loss_shrink_maps
+
loss_threshold_maps
\
+
loss_binary_maps
losses
=
{
'
total_
loss'
:
loss_all
,
\
"loss_shrink_maps"
:
loss_shrink_maps
,
\
"loss_threshold_maps"
:
loss_threshold_maps
,
\
"loss_binary_maps"
:
loss_binary_maps
}
loss_all
=
loss_shrink_maps
+
loss_threshold_maps
\
+
loss_binary_maps
losses
=
{
'loss'
:
loss_all
,
\
"loss_shrink_maps"
:
loss_shrink_maps
,
\
"loss_threshold_maps"
:
loss_threshold_maps
,
\
"loss_binary_maps"
:
loss_binary_maps
}
return
losses
ppocr/modeling/losses/det_east_loss.py
deleted
100755 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle.fluid
as
fluid
class
EASTLoss
(
object
):
"""
EAST Loss function
"""
def
__init__
(
self
,
params
=
None
):
super
(
EASTLoss
,
self
).
__init__
()
def
__call__
(
self
,
predicts
,
labels
):
f_score
=
predicts
[
'f_score'
]
f_geo
=
predicts
[
'f_geo'
]
l_score
=
labels
[
'score'
]
l_geo
=
labels
[
'geo'
]
l_mask
=
labels
[
'mask'
]
##dice_loss
intersection
=
fluid
.
layers
.
reduce_sum
(
f_score
*
l_score
*
l_mask
)
union
=
fluid
.
layers
.
reduce_sum
(
f_score
*
l_mask
)
\
+
fluid
.
layers
.
reduce_sum
(
l_score
*
l_mask
)
dice_loss
=
1
-
2
*
intersection
/
(
union
+
1e-5
)
#smoooth_l1_loss
channels
=
8
l_geo_split
=
fluid
.
layers
.
split
(
l_geo
,
num_or_sections
=
channels
+
1
,
dim
=
1
)
f_geo_split
=
fluid
.
layers
.
split
(
f_geo
,
num_or_sections
=
channels
,
dim
=
1
)
smooth_l1
=
0
for
i
in
range
(
0
,
channels
):
geo_diff
=
l_geo_split
[
i
]
-
f_geo_split
[
i
]
abs_geo_diff
=
fluid
.
layers
.
abs
(
geo_diff
)
smooth_l1_sign
=
fluid
.
layers
.
less_than
(
abs_geo_diff
,
l_score
)
smooth_l1_sign
=
fluid
.
layers
.
cast
(
smooth_l1_sign
,
dtype
=
'float32'
)
in_loss
=
abs_geo_diff
*
abs_geo_diff
*
smooth_l1_sign
+
\
(
abs_geo_diff
-
0.5
)
*
(
1.0
-
smooth_l1_sign
)
out_loss
=
l_geo_split
[
-
1
]
/
channels
*
in_loss
*
l_score
smooth_l1
+=
out_loss
smooth_l1_loss
=
fluid
.
layers
.
reduce_mean
(
smooth_l1
*
l_score
)
dice_loss
=
dice_loss
*
0.01
total_loss
=
dice_loss
+
smooth_l1_loss
losses
=
{
'total_loss'
:
total_loss
,
"dice_loss"
:
dice_loss
,
\
"smooth_l1_loss"
:
smooth_l1_loss
}
return
losses
ppocr/modeling/losses/det_sast_loss.py
deleted
100644 → 0
View file @
10f7e519
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle.fluid
as
fluid
class
SASTLoss
(
object
):
"""
SAST Loss function
"""
def
__init__
(
self
,
params
=
None
):
super
(
SASTLoss
,
self
).
__init__
()
def
__call__
(
self
,
predicts
,
labels
):
"""
tcl_pos: N x 128 x 3
tcl_mask: N x 128 x 1
tcl_label: N x X list or LoDTensor
"""
f_score
=
predicts
[
'f_score'
]
f_border
=
predicts
[
'f_border'
]
f_tvo
=
predicts
[
'f_tvo'
]
f_tco
=
predicts
[
'f_tco'
]
l_score
=
labels
[
'input_score'
]
l_border
=
labels
[
'input_border'
]
l_mask
=
labels
[
'input_mask'
]
l_tvo
=
labels
[
'input_tvo'
]
l_tco
=
labels
[
'input_tco'
]
#score_loss
intersection
=
fluid
.
layers
.
reduce_sum
(
f_score
*
l_score
*
l_mask
)
union
=
fluid
.
layers
.
reduce_sum
(
f_score
*
l_mask
)
+
fluid
.
layers
.
reduce_sum
(
l_score
*
l_mask
)
score_loss
=
1.0
-
2
*
intersection
/
(
union
+
1e-5
)
#border loss
l_border_split
,
l_border_norm
=
fluid
.
layers
.
split
(
l_border
,
num_or_sections
=
[
4
,
1
],
dim
=
1
)
f_border_split
=
f_border
l_border_norm_split
=
fluid
.
layers
.
expand
(
x
=
l_border_norm
,
expand_times
=
[
1
,
4
,
1
,
1
])
l_border_score
=
fluid
.
layers
.
expand
(
x
=
l_score
,
expand_times
=
[
1
,
4
,
1
,
1
])
l_border_mask
=
fluid
.
layers
.
expand
(
x
=
l_mask
,
expand_times
=
[
1
,
4
,
1
,
1
])
border_diff
=
l_border_split
-
f_border_split
abs_border_diff
=
fluid
.
layers
.
abs
(
border_diff
)
border_sign
=
abs_border_diff
<
1.0
border_sign
=
fluid
.
layers
.
cast
(
border_sign
,
dtype
=
'float32'
)
border_sign
.
stop_gradient
=
True
border_in_loss
=
0.5
*
abs_border_diff
*
abs_border_diff
*
border_sign
+
\
(
abs_border_diff
-
0.5
)
*
(
1.0
-
border_sign
)
border_out_loss
=
l_border_norm_split
*
border_in_loss
border_loss
=
fluid
.
layers
.
reduce_sum
(
border_out_loss
*
l_border_score
*
l_border_mask
)
/
\
(
fluid
.
layers
.
reduce_sum
(
l_border_score
*
l_border_mask
)
+
1e-5
)
#tvo_loss
l_tvo_split
,
l_tvo_norm
=
fluid
.
layers
.
split
(
l_tvo
,
num_or_sections
=
[
8
,
1
],
dim
=
1
)
f_tvo_split
=
f_tvo
l_tvo_norm_split
=
fluid
.
layers
.
expand
(
x
=
l_tvo_norm
,
expand_times
=
[
1
,
8
,
1
,
1
])
l_tvo_score
=
fluid
.
layers
.
expand
(
x
=
l_score
,
expand_times
=
[
1
,
8
,
1
,
1
])
l_tvo_mask
=
fluid
.
layers
.
expand
(
x
=
l_mask
,
expand_times
=
[
1
,
8
,
1
,
1
])
#
tvo_geo_diff
=
l_tvo_split
-
f_tvo_split
abs_tvo_geo_diff
=
fluid
.
layers
.
abs
(
tvo_geo_diff
)
tvo_sign
=
abs_tvo_geo_diff
<
1.0
tvo_sign
=
fluid
.
layers
.
cast
(
tvo_sign
,
dtype
=
'float32'
)
tvo_sign
.
stop_gradient
=
True
tvo_in_loss
=
0.5
*
abs_tvo_geo_diff
*
abs_tvo_geo_diff
*
tvo_sign
+
\
(
abs_tvo_geo_diff
-
0.5
)
*
(
1.0
-
tvo_sign
)
tvo_out_loss
=
l_tvo_norm_split
*
tvo_in_loss
tvo_loss
=
fluid
.
layers
.
reduce_sum
(
tvo_out_loss
*
l_tvo_score
*
l_tvo_mask
)
/
\
(
fluid
.
layers
.
reduce_sum
(
l_tvo_score
*
l_tvo_mask
)
+
1e-5
)
#tco_loss
l_tco_split
,
l_tco_norm
=
fluid
.
layers
.
split
(
l_tco
,
num_or_sections
=
[
2
,
1
],
dim
=
1
)
f_tco_split
=
f_tco
l_tco_norm_split
=
fluid
.
layers
.
expand
(
x
=
l_tco_norm
,
expand_times
=
[
1
,
2
,
1
,
1
])
l_tco_score
=
fluid
.
layers
.
expand
(
x
=
l_score
,
expand_times
=
[
1
,
2
,
1
,
1
])
l_tco_mask
=
fluid
.
layers
.
expand
(
x
=
l_mask
,
expand_times
=
[
1
,
2
,
1
,
1
])
#
tco_geo_diff
=
l_tco_split
-
f_tco_split
abs_tco_geo_diff
=
fluid
.
layers
.
abs
(
tco_geo_diff
)
tco_sign
=
abs_tco_geo_diff
<
1.0
tco_sign
=
fluid
.
layers
.
cast
(
tco_sign
,
dtype
=
'float32'
)
tco_sign
.
stop_gradient
=
True
tco_in_loss
=
0.5
*
abs_tco_geo_diff
*
abs_tco_geo_diff
*
tco_sign
+
\
(
abs_tco_geo_diff
-
0.5
)
*
(
1.0
-
tco_sign
)
tco_out_loss
=
l_tco_norm_split
*
tco_in_loss
tco_loss
=
fluid
.
layers
.
reduce_sum
(
tco_out_loss
*
l_tco_score
*
l_tco_mask
)
/
\
(
fluid
.
layers
.
reduce_sum
(
l_tco_score
*
l_tco_mask
)
+
1e-5
)
# total loss
tvo_lw
,
tco_lw
=
1.5
,
1.5
score_lw
,
border_lw
=
1.0
,
1.0
total_loss
=
score_loss
*
score_lw
+
border_loss
*
border_lw
+
\
tvo_loss
*
tvo_lw
+
tco_loss
*
tco_lw
losses
=
{
'total_loss'
:
total_loss
,
"score_loss"
:
score_loss
,
\
"border_loss"
:
border_loss
,
'tvo_loss'
:
tvo_loss
,
'tco_loss'
:
tco_loss
}
return
losses
\ No newline at end of file
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment