Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
021c1132
Commit
021c1132
authored
Dec 09, 2020
by
MissPenguin
Browse files
add east & sast
parent
8a5566c9
Changes
36
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
1798 additions
and
20 deletions
+1798
-20
ppocr/losses/det_sast_loss.py
ppocr/losses/det_sast_loss.py
+121
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+1
-0
ppocr/modeling/backbones/det_resnet_vd_sast.py
ppocr/modeling/backbones/det_resnet_vd_sast.py
+285
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+3
-1
ppocr/modeling/heads/det_east_head.py
ppocr/modeling/heads/det_east_head.py
+121
-0
ppocr/modeling/heads/det_sast_head.py
ppocr/modeling/heads/det_sast_head.py
+128
-0
ppocr/modeling/necks/__init__.py
ppocr/modeling/necks/__init__.py
+3
-1
ppocr/modeling/necks/east_fpn.py
ppocr/modeling/necks/east_fpn.py
+188
-0
ppocr/modeling/necks/sast_fpn.py
ppocr/modeling/necks/sast_fpn.py
+284
-0
ppocr/postprocess/__init__.py
ppocr/postprocess/__init__.py
+3
-1
ppocr/postprocess/east_postprocess.py
ppocr/postprocess/east_postprocess.py
+141
-0
ppocr/postprocess/locality_aware_nms.py
ppocr/postprocess/locality_aware_nms.py
+199
-0
ppocr/postprocess/rec_postprocess.py
ppocr/postprocess/rec_postprocess.py
+5
-3
ppocr/postprocess/sast_postprocess.py
ppocr/postprocess/sast_postprocess.py
+295
-0
setup.py
setup.py
+1
-1
tools/infer/predict_system.py
tools/infer/predict_system.py
+20
-13
No files found.
ppocr/losses/det_sast_loss.py
0 → 100644
View file @
021c1132
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
from
.det_basic_loss
import
DiceLoss
import
paddle.fluid
as
fluid
import
numpy
as
np
class
SASTLoss
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
eps
=
1e-6
,
**
kwargs
):
super
(
SASTLoss
,
self
).
__init__
()
self
.
dice_loss
=
DiceLoss
(
eps
=
eps
)
def
forward
(
self
,
predicts
,
labels
):
"""
tcl_pos: N x 128 x 3
tcl_mask: N x 128 x 1
tcl_label: N x X list or LoDTensor
"""
f_score
=
predicts
[
'f_score'
]
f_border
=
predicts
[
'f_border'
]
f_tvo
=
predicts
[
'f_tvo'
]
f_tco
=
predicts
[
'f_tco'
]
l_score
,
l_border
,
l_mask
,
l_tvo
,
l_tco
=
labels
[
1
:]
#score_loss
intersection
=
paddle
.
sum
(
f_score
*
l_score
*
l_mask
)
union
=
paddle
.
sum
(
f_score
*
l_mask
)
+
paddle
.
sum
(
l_score
*
l_mask
)
score_loss
=
1.0
-
2
*
intersection
/
(
union
+
1e-5
)
#border loss
l_border_split
,
l_border_norm
=
paddle
.
split
(
l_border
,
num_or_sections
=
[
4
,
1
],
axis
=
1
)
f_border_split
=
f_border
border_ex_shape
=
l_border_norm
.
shape
*
np
.
array
([
1
,
4
,
1
,
1
])
l_border_norm_split
=
paddle
.
expand
(
x
=
l_border_norm
,
shape
=
border_ex_shape
)
l_border_score
=
paddle
.
expand
(
x
=
l_score
,
shape
=
border_ex_shape
)
l_border_mask
=
paddle
.
expand
(
x
=
l_mask
,
shape
=
border_ex_shape
)
border_diff
=
l_border_split
-
f_border_split
abs_border_diff
=
paddle
.
abs
(
border_diff
)
border_sign
=
abs_border_diff
<
1.0
border_sign
=
paddle
.
cast
(
border_sign
,
dtype
=
'float32'
)
border_sign
.
stop_gradient
=
True
border_in_loss
=
0.5
*
abs_border_diff
*
abs_border_diff
*
border_sign
+
\
(
abs_border_diff
-
0.5
)
*
(
1.0
-
border_sign
)
border_out_loss
=
l_border_norm_split
*
border_in_loss
border_loss
=
paddle
.
sum
(
border_out_loss
*
l_border_score
*
l_border_mask
)
/
\
(
paddle
.
sum
(
l_border_score
*
l_border_mask
)
+
1e-5
)
#tvo_loss
l_tvo_split
,
l_tvo_norm
=
paddle
.
split
(
l_tvo
,
num_or_sections
=
[
8
,
1
],
axis
=
1
)
f_tvo_split
=
f_tvo
tvo_ex_shape
=
l_tvo_norm
.
shape
*
np
.
array
([
1
,
8
,
1
,
1
])
l_tvo_norm_split
=
paddle
.
expand
(
x
=
l_tvo_norm
,
shape
=
tvo_ex_shape
)
l_tvo_score
=
paddle
.
expand
(
x
=
l_score
,
shape
=
tvo_ex_shape
)
l_tvo_mask
=
paddle
.
expand
(
x
=
l_mask
,
shape
=
tvo_ex_shape
)
#
tvo_geo_diff
=
l_tvo_split
-
f_tvo_split
abs_tvo_geo_diff
=
paddle
.
abs
(
tvo_geo_diff
)
tvo_sign
=
abs_tvo_geo_diff
<
1.0
tvo_sign
=
paddle
.
cast
(
tvo_sign
,
dtype
=
'float32'
)
tvo_sign
.
stop_gradient
=
True
tvo_in_loss
=
0.5
*
abs_tvo_geo_diff
*
abs_tvo_geo_diff
*
tvo_sign
+
\
(
abs_tvo_geo_diff
-
0.5
)
*
(
1.0
-
tvo_sign
)
tvo_out_loss
=
l_tvo_norm_split
*
tvo_in_loss
tvo_loss
=
paddle
.
sum
(
tvo_out_loss
*
l_tvo_score
*
l_tvo_mask
)
/
\
(
paddle
.
sum
(
l_tvo_score
*
l_tvo_mask
)
+
1e-5
)
#tco_loss
l_tco_split
,
l_tco_norm
=
paddle
.
split
(
l_tco
,
num_or_sections
=
[
2
,
1
],
axis
=
1
)
f_tco_split
=
f_tco
tco_ex_shape
=
l_tco_norm
.
shape
*
np
.
array
([
1
,
2
,
1
,
1
])
l_tco_norm_split
=
paddle
.
expand
(
x
=
l_tco_norm
,
shape
=
tco_ex_shape
)
l_tco_score
=
paddle
.
expand
(
x
=
l_score
,
shape
=
tco_ex_shape
)
l_tco_mask
=
paddle
.
expand
(
x
=
l_mask
,
shape
=
tco_ex_shape
)
tco_geo_diff
=
l_tco_split
-
f_tco_split
abs_tco_geo_diff
=
paddle
.
abs
(
tco_geo_diff
)
tco_sign
=
abs_tco_geo_diff
<
1.0
tco_sign
=
paddle
.
cast
(
tco_sign
,
dtype
=
'float32'
)
tco_sign
.
stop_gradient
=
True
tco_in_loss
=
0.5
*
abs_tco_geo_diff
*
abs_tco_geo_diff
*
tco_sign
+
\
(
abs_tco_geo_diff
-
0.5
)
*
(
1.0
-
tco_sign
)
tco_out_loss
=
l_tco_norm_split
*
tco_in_loss
tco_loss
=
paddle
.
sum
(
tco_out_loss
*
l_tco_score
*
l_tco_mask
)
/
\
(
paddle
.
sum
(
l_tco_score
*
l_tco_mask
)
+
1e-5
)
# total loss
tvo_lw
,
tco_lw
=
1.5
,
1.5
score_lw
,
border_lw
=
1.0
,
1.0
total_loss
=
score_loss
*
score_lw
+
border_loss
*
border_lw
+
\
tvo_loss
*
tvo_lw
+
tco_loss
*
tco_lw
losses
=
{
'loss'
:
total_loss
,
"score_loss"
:
score_loss
,
\
"border_loss"
:
border_loss
,
'tvo_loss'
:
tvo_loss
,
'tco_loss'
:
tco_loss
}
return
losses
\ No newline at end of file
ppocr/modeling/backbones/__init__.py
View file @
021c1132
...
...
@@ -19,6 +19,7 @@ def build_backbone(config, model_type):
if
model_type
==
'det'
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd_sast
import
ResNet_SAST
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNet_SAST'
]
elif
model_type
==
'rec'
or
model_type
==
'cls'
:
from
.rec_mobilenet_v3
import
MobileNetV3
...
...
ppocr/modeling/backbones/det_resnet_vd_sast.py
0 → 100644
View file @
021c1132
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
__all__
=
[
"ResNet_SAST"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
inputs
):
if
self
.
is_vd_mode
:
inputs
=
self
.
_pool2d_avg
(
inputs
)
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet_SAST
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
super
(
ResNet_SAST
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
# depth = [3, 4, 6, 3]
depth
=
[
3
,
4
,
6
,
3
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
# num_channels = [64, 256, 512,
# 1024] if layers >= 50 else [64, 64, 128, 256]
# num_filters = [64, 128, 256, 512]
num_channels
=
[
64
,
256
,
512
,
1024
,
2048
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
,
512
]
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1_1"
)
self
.
conv1_2
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_2"
)
self
.
conv1_3
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_3"
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
stages
=
[]
self
.
out_channels
=
[
3
,
64
]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
else
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
basic_block
)
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
def
forward
(
self
,
inputs
):
out
=
[
inputs
]
y
=
self
.
conv1_1
(
inputs
)
y
=
self
.
conv1_2
(
y
)
y
=
self
.
conv1_3
(
y
)
out
.
append
(
y
)
y
=
self
.
pool2d_max
(
y
)
for
block
in
self
.
stages
:
y
=
block
(
y
)
out
.
append
(
y
)
return
out
\ No newline at end of file
ppocr/modeling/heads/__init__.py
View file @
021c1132
...
...
@@ -18,13 +18,15 @@ __all__ = ['build_head']
def
build_head
(
config
):
# det head
from
.det_db_head
import
DBHead
from
.det_east_head
import
EASTHead
from
.det_sast_head
import
SASTHead
# rec head
from
.rec_ctc_head
import
CTCHead
# cls head
from
.cls_head
import
ClsHead
support_dict
=
[
'DBHead'
,
'CTCHead'
,
'ClsHead'
]
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
]
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
...
...
ppocr/modeling/heads/det_east_head.py
0 → 100644
View file @
021c1132
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
EASTHead
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
in_channels
,
model_name
,
**
kwargs
):
super
(
EASTHead
,
self
).
__init__
()
self
.
model_name
=
model_name
if
self
.
model_name
==
"large"
:
num_outputs
=
[
128
,
64
,
1
,
8
]
else
:
num_outputs
=
[
64
,
32
,
1
,
8
]
self
.
det_conv1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
num_outputs
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"det_head1"
)
self
.
det_conv2
=
ConvBNLayer
(
in_channels
=
num_outputs
[
0
],
out_channels
=
num_outputs
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"det_head2"
)
self
.
score_conv
=
ConvBNLayer
(
in_channels
=
num_outputs
[
1
],
out_channels
=
num_outputs
[
2
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
"f_score"
)
self
.
geo_conv
=
ConvBNLayer
(
in_channels
=
num_outputs
[
1
],
out_channels
=
num_outputs
[
3
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
"f_geo"
)
def
forward
(
self
,
x
):
f_det
=
self
.
det_conv1
(
x
)
f_det
=
self
.
det_conv2
(
f_det
)
f_score
=
self
.
score_conv
(
f_det
)
f_score
=
F
.
sigmoid
(
f_score
)
f_geo
=
self
.
geo_conv
(
f_det
)
f_geo
=
(
F
.
sigmoid
(
f_geo
)
-
0.5
)
*
2
*
800
pred
=
{
'f_score'
:
f_score
,
'f_geo'
:
f_geo
}
return
pred
ppocr/modeling/heads/det_sast_head.py
0 → 100644
View file @
021c1132
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
SAST_Header1
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
SAST_Header1
,
self
).
__init__
()
out_channels
=
[
64
,
64
,
128
]
self
.
score_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_score1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_score2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_score3'
),
ConvBNLayer
(
out_channels
[
2
],
1
,
3
,
1
,
act
=
None
,
name
=
'f_score4'
)
)
self
.
border_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_border1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_border2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_border3'
),
ConvBNLayer
(
out_channels
[
2
],
4
,
3
,
1
,
act
=
None
,
name
=
'f_border4'
)
)
def
forward
(
self
,
x
):
f_score
=
self
.
score_conv
(
x
)
f_score
=
F
.
sigmoid
(
f_score
)
f_border
=
self
.
border_conv
(
x
)
return
f_score
,
f_border
class
SAST_Header2
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
SAST_Header2
,
self
).
__init__
()
out_channels
=
[
64
,
64
,
128
]
self
.
tvo_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tvo1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_tvo2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tvo3'
),
ConvBNLayer
(
out_channels
[
2
],
8
,
3
,
1
,
act
=
None
,
name
=
'f_tvo4'
)
)
self
.
tco_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tco1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_tco2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tco3'
),
ConvBNLayer
(
out_channels
[
2
],
2
,
3
,
1
,
act
=
None
,
name
=
'f_tco4'
)
)
def
forward
(
self
,
x
):
f_tvo
=
self
.
tvo_conv
(
x
)
f_tco
=
self
.
tco_conv
(
x
)
return
f_tvo
,
f_tco
class
SASTHead
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
SASTHead
,
self
).
__init__
()
self
.
head1
=
SAST_Header1
(
in_channels
)
self
.
head2
=
SAST_Header2
(
in_channels
)
def
forward
(
self
,
x
):
f_score
,
f_border
=
self
.
head1
(
x
)
f_tvo
,
f_tco
=
self
.
head2
(
x
)
predicts
=
{}
predicts
[
'f_score'
]
=
f_score
predicts
[
'f_border'
]
=
f_border
predicts
[
'f_tvo'
]
=
f_tvo
predicts
[
'f_tco'
]
=
f_tco
return
predicts
\ No newline at end of file
ppocr/modeling/necks/__init__.py
View file @
021c1132
...
...
@@ -16,8 +16,10 @@ __all__ = ['build_neck']
def
build_neck
(
config
):
from
.db_fpn
import
DBFPN
from
.east_fpn
import
EASTFPN
from
.sast_fpn
import
SASTFPN
from
.rnn
import
SequenceEncoder
support_dict
=
[
'DBFPN'
,
'SequenceEncoder'
]
support_dict
=
[
'DBFPN'
,
'EASTFPN'
,
'SASTFPN'
,
'SequenceEncoder'
]
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
...
...
ppocr/modeling/necks/east_fpn.py
0 → 100644
View file @
021c1132
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
DeConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
DeConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
deconv
=
nn
.
Conv2DTranspose
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
deconv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
EASTFPN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
model_name
,
**
kwargs
):
super
(
EASTFPN
,
self
).
__init__
()
self
.
model_name
=
model_name
if
self
.
model_name
==
"large"
:
self
.
out_channels
=
128
else
:
self
.
out_channels
=
64
self
.
in_channels
=
in_channels
[::
-
1
]
self
.
h1_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
+
self
.
in_channels
[
1
],
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_h_1"
)
self
.
h2_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
+
self
.
in_channels
[
2
],
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_h_2"
)
self
.
h3_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
+
self
.
in_channels
[
3
],
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_h_3"
)
self
.
g0_deconv
=
DeConvBNLayer
(
in_channels
=
self
.
in_channels
[
0
],
out_channels
=
self
.
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_0"
)
self
.
g1_deconv
=
DeConvBNLayer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_1"
)
self
.
g2_deconv
=
DeConvBNLayer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_2"
)
self
.
g3_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_3"
)
def
forward
(
self
,
x
):
f
=
x
[::
-
1
]
h
=
f
[
0
]
g
=
self
.
g0_deconv
(
h
)
h
=
paddle
.
concat
([
g
,
f
[
1
]],
axis
=
1
)
h
=
self
.
h1_conv
(
h
)
g
=
self
.
g1_deconv
(
h
)
h
=
paddle
.
concat
([
g
,
f
[
2
]],
axis
=
1
)
h
=
self
.
h2_conv
(
h
)
g
=
self
.
g2_deconv
(
h
)
h
=
paddle
.
concat
([
g
,
f
[
3
]],
axis
=
1
)
h
=
self
.
h3_conv
(
h
)
g
=
self
.
g3_conv
(
h
)
return
g
\ No newline at end of file
ppocr/modeling/necks/sast_fpn.py
0 → 100644
View file @
021c1132
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
DeConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
DeConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
deconv
=
nn
.
Conv2DTranspose
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
deconv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
FPN_Up_Fusion
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
(
FPN_Up_Fusion
,
self
).
__init__
()
in_channels
=
in_channels
[::
-
1
]
out_channels
=
[
256
,
256
,
192
,
192
,
128
]
self
.
h0_conv
=
ConvBNLayer
(
in_channels
[
0
],
out_channels
[
0
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h0'
)
self
.
h1_conv
=
ConvBNLayer
(
in_channels
[
1
],
out_channels
[
1
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h1'
)
self
.
h2_conv
=
ConvBNLayer
(
in_channels
[
2
],
out_channels
[
2
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h2'
)
self
.
h3_conv
=
ConvBNLayer
(
in_channels
[
3
],
out_channels
[
3
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h3'
)
self
.
h4_conv
=
ConvBNLayer
(
in_channels
[
4
],
out_channels
[
4
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h4'
)
self
.
g0_conv
=
DeConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g0'
)
self
.
g1_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_g1_1'
),
DeConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g1_2'
)
)
self
.
g2_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
2
],
out_channels
[
2
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_g2_1'
),
DeConvBNLayer
(
out_channels
[
2
],
out_channels
[
3
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g2_2'
)
)
self
.
g3_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
3
],
out_channels
[
3
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_g3_1'
),
DeConvBNLayer
(
out_channels
[
3
],
out_channels
[
4
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g3_2'
)
)
self
.
g4_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
4
],
out_channels
[
4
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_fusion_1'
),
ConvBNLayer
(
out_channels
[
4
],
out_channels
[
4
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_fusion_2'
)
)
def
_add_relu
(
self
,
x1
,
x2
):
x
=
paddle
.
add
(
x
=
x1
,
y
=
x2
)
x
=
F
.
relu
(
x
)
return
x
def
forward
(
self
,
x
):
f
=
x
[
2
:][::
-
1
]
h0
=
self
.
h0_conv
(
f
[
0
])
h1
=
self
.
h1_conv
(
f
[
1
])
h2
=
self
.
h2_conv
(
f
[
2
])
h3
=
self
.
h3_conv
(
f
[
3
])
h4
=
self
.
h4_conv
(
f
[
4
])
g0
=
self
.
g0_conv
(
h0
)
g1
=
self
.
_add_relu
(
g0
,
h1
)
g1
=
self
.
g1_conv
(
g1
)
g2
=
self
.
g2_conv
(
self
.
_add_relu
(
g1
,
h2
))
g3
=
self
.
g3_conv
(
self
.
_add_relu
(
g2
,
h3
))
g4
=
self
.
g4_conv
(
self
.
_add_relu
(
g3
,
h4
))
return
g4
class
FPN_Down_Fusion
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
(
FPN_Down_Fusion
,
self
).
__init__
()
out_channels
=
[
32
,
64
,
128
]
self
.
h0_conv
=
ConvBNLayer
(
in_channels
[
0
],
out_channels
[
0
],
3
,
1
,
act
=
None
,
name
=
'fpn_down_h0'
)
self
.
h1_conv
=
ConvBNLayer
(
in_channels
[
1
],
out_channels
[
1
],
3
,
1
,
act
=
None
,
name
=
'fpn_down_h1'
)
self
.
h2_conv
=
ConvBNLayer
(
in_channels
[
2
],
out_channels
[
2
],
3
,
1
,
act
=
None
,
name
=
'fpn_down_h2'
)
self
.
g0_conv
=
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
2
,
act
=
None
,
name
=
'fpn_down_g0'
)
self
.
g1_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_down_g1_1'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
3
,
2
,
act
=
None
,
name
=
'fpn_down_g1_2'
)
)
self
.
g2_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
2
],
out_channels
[
2
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_down_fusion_1'
),
ConvBNLayer
(
out_channels
[
2
],
out_channels
[
2
],
1
,
1
,
act
=
None
,
name
=
'fpn_down_fusion_2'
)
)
def
forward
(
self
,
x
):
f
=
x
[:
3
]
h0
=
self
.
h0_conv
(
f
[
0
])
h1
=
self
.
h1_conv
(
f
[
1
])
h2
=
self
.
h2_conv
(
f
[
2
])
g0
=
self
.
g0_conv
(
h0
)
g1
=
paddle
.
add
(
x
=
g0
,
y
=
h1
)
g1
=
F
.
relu
(
g1
)
g1
=
self
.
g1_conv
(
g1
)
g2
=
paddle
.
add
(
x
=
g1
,
y
=
h2
)
g2
=
F
.
relu
(
g2
)
g2
=
self
.
g2_conv
(
g2
)
return
g2
class
Cross_Attention
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
(
Cross_Attention
,
self
).
__init__
()
self
.
theta_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_theta'
)
self
.
phi_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_phi'
)
self
.
g_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_g'
)
self
.
fh_weight_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fh_weight'
)
self
.
fh_sc_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fh_sc'
)
self
.
fv_weight_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fv_weight'
)
self
.
fv_sc_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fv_sc'
)
self
.
f_attn_conv
=
ConvBNLayer
(
in_channels
*
2
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_attn'
)
def
_cal_fweight
(
self
,
f
,
shape
):
f_theta
,
f_phi
,
f_g
=
f
#flatten
f_theta
=
paddle
.
transpose
(
f_theta
,
[
0
,
2
,
3
,
1
])
f_theta
=
paddle
.
reshape
(
f_theta
,
[
shape
[
0
]
*
shape
[
1
],
shape
[
2
],
128
])
f_phi
=
paddle
.
transpose
(
f_phi
,
[
0
,
2
,
3
,
1
])
f_phi
=
paddle
.
reshape
(
f_phi
,
[
shape
[
0
]
*
shape
[
1
],
shape
[
2
],
128
])
f_g
=
paddle
.
transpose
(
f_g
,
[
0
,
2
,
3
,
1
])
f_g
=
paddle
.
reshape
(
f_g
,
[
shape
[
0
]
*
shape
[
1
],
shape
[
2
],
128
])
#correlation
f_attn
=
paddle
.
matmul
(
f_theta
,
paddle
.
transpose
(
f_phi
,
[
0
,
2
,
1
]))
#scale
f_attn
=
f_attn
/
(
128
**
0.5
)
f_attn
=
F
.
softmax
(
f_attn
)
#weighted sum
f_weight
=
paddle
.
matmul
(
f_attn
,
f_g
)
f_weight
=
paddle
.
reshape
(
f_weight
,
[
shape
[
0
],
shape
[
1
],
shape
[
2
],
128
])
return
f_weight
def
forward
(
self
,
f_common
):
f_shape
=
paddle
.
shape
(
f_common
)
# print('f_shape: ', f_shape)
f_theta
=
self
.
theta_conv
(
f_common
)
f_phi
=
self
.
phi_conv
(
f_common
)
f_g
=
self
.
g_conv
(
f_common
)
######## horizon ########
fh_weight
=
self
.
_cal_fweight
([
f_theta
,
f_phi
,
f_g
],
[
f_shape
[
0
],
f_shape
[
2
],
f_shape
[
3
]])
fh_weight
=
paddle
.
transpose
(
fh_weight
,
[
0
,
3
,
1
,
2
])
fh_weight
=
self
.
fh_weight_conv
(
fh_weight
)
#short cut
fh_sc
=
self
.
fh_sc_conv
(
f_common
)
f_h
=
F
.
relu
(
fh_weight
+
fh_sc
)
######## vertical ########
fv_theta
=
paddle
.
transpose
(
f_theta
,
[
0
,
1
,
3
,
2
])
fv_phi
=
paddle
.
transpose
(
f_phi
,
[
0
,
1
,
3
,
2
])
fv_g
=
paddle
.
transpose
(
f_g
,
[
0
,
1
,
3
,
2
])
fv_weight
=
self
.
_cal_fweight
([
fv_theta
,
fv_phi
,
fv_g
],
[
f_shape
[
0
],
f_shape
[
3
],
f_shape
[
2
]])
fv_weight
=
paddle
.
transpose
(
fv_weight
,
[
0
,
3
,
2
,
1
])
fv_weight
=
self
.
fv_weight_conv
(
fv_weight
)
#short cut
fv_sc
=
self
.
fv_sc_conv
(
f_common
)
f_v
=
F
.
relu
(
fv_weight
+
fv_sc
)
######## merge ########
f_attn
=
paddle
.
concat
([
f_h
,
f_v
],
axis
=
1
)
f_attn
=
self
.
f_attn_conv
(
f_attn
)
return
f_attn
class
SASTFPN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
with_cab
=
False
,
**
kwargs
):
super
(
SASTFPN
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
with_cab
=
with_cab
self
.
FPN_Down_Fusion
=
FPN_Down_Fusion
(
self
.
in_channels
)
self
.
FPN_Up_Fusion
=
FPN_Up_Fusion
(
self
.
in_channels
)
self
.
out_channels
=
128
self
.
cross_attention
=
Cross_Attention
(
self
.
out_channels
)
def
forward
(
self
,
x
):
#down fpn
f_down
=
self
.
FPN_Down_Fusion
(
x
)
#up fpn
f_up
=
self
.
FPN_Up_Fusion
(
x
)
#fusion
f_common
=
paddle
.
add
(
x
=
f_down
,
y
=
f_up
)
f_common
=
F
.
relu
(
f_common
)
if
self
.
with_cab
:
# print('enhence f_common with CAB.')
f_common
=
self
.
cross_attention
(
f_common
)
return
f_common
ppocr/postprocess/__init__.py
View file @
021c1132
...
...
@@ -24,11 +24,13 @@ __all__ = ['build_post_process']
def
build_post_process
(
config
,
global_config
=
None
):
from
.db_postprocess
import
DBPostProcess
from
.east_postprocess
import
EASTPostProcess
from
.sast_postprocess
import
SASTPostProcess
from
.rec_postprocess
import
CTCLabelDecode
,
AttnLabelDecode
from
.cls_postprocess
import
ClsPostProcess
support_dict
=
[
'DBPostProcess'
,
'CTCLabelDecode'
,
'AttnLabelDecode'
,
'ClsPostProcess'
'DBPostProcess'
,
'EASTPostProcess'
,
'SASTPostProcess'
,
'CTCLabelDecode'
,
'AttnLabelDecode'
,
'ClsPostProcess'
]
config
=
copy
.
deepcopy
(
config
)
...
...
ppocr/postprocess/east_postprocess.py
0 → 100644
View file @
021c1132
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
from
.locality_aware_nms
import
nms_locality
import
cv2
import
os
import
sys
# __dir__ = os.path.dirname(os.path.abspath(__file__))
# sys.path.append(__dir__)
# sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
class
EASTPostProcess
(
object
):
"""
The post process for EAST.
"""
def
__init__
(
self
,
score_thresh
=
0.8
,
cover_thresh
=
0.1
,
nms_thresh
=
0.2
,
**
kwargs
):
self
.
score_thresh
=
score_thresh
self
.
cover_thresh
=
cover_thresh
self
.
nms_thresh
=
nms_thresh
# c++ la-nms is faster, but only support python 3.5
self
.
is_python35
=
False
if
sys
.
version_info
.
major
==
3
and
sys
.
version_info
.
minor
==
5
:
self
.
is_python35
=
True
def
restore_rectangle_quad
(
self
,
origin
,
geometry
):
"""
Restore rectangle from quadrangle.
"""
# quad
origin_concat
=
np
.
concatenate
(
(
origin
,
origin
,
origin
,
origin
),
axis
=
1
)
# (n, 8)
pred_quads
=
origin_concat
-
geometry
pred_quads
=
pred_quads
.
reshape
((
-
1
,
4
,
2
))
# (n, 4, 2)
return
pred_quads
def
detect
(
self
,
score_map
,
geo_map
,
score_thresh
=
0.8
,
cover_thresh
=
0.1
,
nms_thresh
=
0.2
):
"""
restore text boxes from score map and geo map
"""
score_map
=
score_map
[
0
]
geo_map
=
np
.
swapaxes
(
geo_map
,
1
,
0
)
geo_map
=
np
.
swapaxes
(
geo_map
,
1
,
2
)
# filter the score map
xy_text
=
np
.
argwhere
(
score_map
>
score_thresh
)
if
len
(
xy_text
)
==
0
:
return
[]
# sort the text boxes via the y axis
xy_text
=
xy_text
[
np
.
argsort
(
xy_text
[:,
0
])]
#restore quad proposals
text_box_restored
=
self
.
restore_rectangle_quad
(
xy_text
[:,
::
-
1
]
*
4
,
geo_map
[
xy_text
[:,
0
],
xy_text
[:,
1
],
:])
boxes
=
np
.
zeros
((
text_box_restored
.
shape
[
0
],
9
),
dtype
=
np
.
float32
)
boxes
[:,
:
8
]
=
text_box_restored
.
reshape
((
-
1
,
8
))
boxes
[:,
8
]
=
score_map
[
xy_text
[:,
0
],
xy_text
[:,
1
]]
if
self
.
is_python35
:
import
lanms
boxes
=
lanms
.
merge_quadrangle_n9
(
boxes
,
nms_thresh
)
else
:
boxes
=
nms_locality
(
boxes
.
astype
(
np
.
float64
),
nms_thresh
)
if
boxes
.
shape
[
0
]
==
0
:
return
[]
# Here we filter some low score boxes by the average score map,
# this is different from the orginal paper.
for
i
,
box
in
enumerate
(
boxes
):
mask
=
np
.
zeros_like
(
score_map
,
dtype
=
np
.
uint8
)
cv2
.
fillPoly
(
mask
,
box
[:
8
].
reshape
(
(
-
1
,
4
,
2
)).
astype
(
np
.
int32
)
//
4
,
1
)
boxes
[
i
,
8
]
=
cv2
.
mean
(
score_map
,
mask
)[
0
]
boxes
=
boxes
[
boxes
[:,
8
]
>
cover_thresh
]
return
boxes
def
sort_poly
(
self
,
p
):
"""
Sort polygons.
"""
min_axis
=
np
.
argmin
(
np
.
sum
(
p
,
axis
=
1
))
p
=
p
[[
min_axis
,
(
min_axis
+
1
)
%
4
,
\
(
min_axis
+
2
)
%
4
,
(
min_axis
+
3
)
%
4
]]
if
abs
(
p
[
0
,
0
]
-
p
[
1
,
0
])
>
abs
(
p
[
0
,
1
]
-
p
[
1
,
1
]):
return
p
else
:
return
p
[[
0
,
3
,
2
,
1
]]
def
__call__
(
self
,
outs_dict
,
shape_list
):
score_list
=
outs_dict
[
'f_score'
]
geo_list
=
outs_dict
[
'f_geo'
]
img_num
=
len
(
shape_list
)
dt_boxes_list
=
[]
for
ino
in
range
(
img_num
):
score
=
score_list
[
ino
].
numpy
()
geo
=
geo_list
[
ino
].
numpy
()
boxes
=
self
.
detect
(
score_map
=
score
,
geo_map
=
geo
,
score_thresh
=
self
.
score_thresh
,
cover_thresh
=
self
.
cover_thresh
,
nms_thresh
=
self
.
nms_thresh
)
boxes_norm
=
[]
if
len
(
boxes
)
>
0
:
h
,
w
=
score
.
shape
[
1
:]
src_h
,
src_w
,
ratio_h
,
ratio_w
=
shape_list
[
ino
]
boxes
=
boxes
[:,
:
8
].
reshape
((
-
1
,
4
,
2
))
boxes
[:,
:,
0
]
/=
ratio_w
boxes
[:,
:,
1
]
/=
ratio_h
for
i_box
,
box
in
enumerate
(
boxes
):
box
=
self
.
sort_poly
(
box
.
astype
(
np
.
int32
))
if
np
.
linalg
.
norm
(
box
[
0
]
-
box
[
1
])
<
5
\
or
np
.
linalg
.
norm
(
box
[
3
]
-
box
[
0
])
<
5
:
continue
boxes_norm
.
append
(
box
)
dt_boxes_list
.
append
({
'points'
:
np
.
array
(
boxes_norm
)})
return
dt_boxes_list
\ No newline at end of file
ppocr/postprocess/locality_aware_nms.py
0 → 100644
View file @
021c1132
"""
Locality aware nms.
"""
import
numpy
as
np
from
shapely.geometry
import
Polygon
def
intersection
(
g
,
p
):
"""
Intersection.
"""
g
=
Polygon
(
g
[:
8
].
reshape
((
4
,
2
)))
p
=
Polygon
(
p
[:
8
].
reshape
((
4
,
2
)))
g
=
g
.
buffer
(
0
)
p
=
p
.
buffer
(
0
)
if
not
g
.
is_valid
or
not
p
.
is_valid
:
return
0
inter
=
Polygon
(
g
).
intersection
(
Polygon
(
p
)).
area
union
=
g
.
area
+
p
.
area
-
inter
if
union
==
0
:
return
0
else
:
return
inter
/
union
def
intersection_iog
(
g
,
p
):
"""
Intersection_iog.
"""
g
=
Polygon
(
g
[:
8
].
reshape
((
4
,
2
)))
p
=
Polygon
(
p
[:
8
].
reshape
((
4
,
2
)))
if
not
g
.
is_valid
or
not
p
.
is_valid
:
return
0
inter
=
Polygon
(
g
).
intersection
(
Polygon
(
p
)).
area
#union = g.area + p.area - inter
union
=
p
.
area
if
union
==
0
:
print
(
"p_area is very small"
)
return
0
else
:
return
inter
/
union
def
weighted_merge
(
g
,
p
):
"""
Weighted merge.
"""
g
[:
8
]
=
(
g
[
8
]
*
g
[:
8
]
+
p
[
8
]
*
p
[:
8
])
/
(
g
[
8
]
+
p
[
8
])
g
[
8
]
=
(
g
[
8
]
+
p
[
8
])
return
g
def
standard_nms
(
S
,
thres
):
"""
Standard nms.
"""
order
=
np
.
argsort
(
S
[:,
8
])[::
-
1
]
keep
=
[]
while
order
.
size
>
0
:
i
=
order
[
0
]
keep
.
append
(
i
)
ovr
=
np
.
array
([
intersection
(
S
[
i
],
S
[
t
])
for
t
in
order
[
1
:]])
inds
=
np
.
where
(
ovr
<=
thres
)[
0
]
order
=
order
[
inds
+
1
]
return
S
[
keep
]
def
standard_nms_inds
(
S
,
thres
):
"""
Standard nms, retun inds.
"""
order
=
np
.
argsort
(
S
[:,
8
])[::
-
1
]
keep
=
[]
while
order
.
size
>
0
:
i
=
order
[
0
]
keep
.
append
(
i
)
ovr
=
np
.
array
([
intersection
(
S
[
i
],
S
[
t
])
for
t
in
order
[
1
:]])
inds
=
np
.
where
(
ovr
<=
thres
)[
0
]
order
=
order
[
inds
+
1
]
return
keep
def
nms
(
S
,
thres
):
"""
nms.
"""
order
=
np
.
argsort
(
S
[:,
8
])[::
-
1
]
keep
=
[]
while
order
.
size
>
0
:
i
=
order
[
0
]
keep
.
append
(
i
)
ovr
=
np
.
array
([
intersection
(
S
[
i
],
S
[
t
])
for
t
in
order
[
1
:]])
inds
=
np
.
where
(
ovr
<=
thres
)[
0
]
order
=
order
[
inds
+
1
]
return
keep
def
soft_nms
(
boxes_in
,
Nt_thres
=
0.3
,
threshold
=
0.8
,
sigma
=
0.5
,
method
=
2
):
"""
soft_nms
:para boxes_in, N x 9 (coords + score)
:para threshould, eliminate cases min score(0.001)
:para Nt_thres, iou_threshi
:para sigma, gaussian weght
:method, linear or gaussian
"""
boxes
=
boxes_in
.
copy
()
N
=
boxes
.
shape
[
0
]
if
N
is
None
or
N
<
1
:
return
np
.
array
([])
pos
,
maxpos
=
0
,
0
weight
=
0.0
inds
=
np
.
arange
(
N
)
tbox
,
sbox
=
boxes
[
0
].
copy
(),
boxes
[
0
].
copy
()
for
i
in
range
(
N
):
maxscore
=
boxes
[
i
,
8
]
maxpos
=
i
tbox
=
boxes
[
i
].
copy
()
ti
=
inds
[
i
]
pos
=
i
+
1
#get max box
while
pos
<
N
:
if
maxscore
<
boxes
[
pos
,
8
]:
maxscore
=
boxes
[
pos
,
8
]
maxpos
=
pos
pos
=
pos
+
1
#add max box as a detection
boxes
[
i
,
:]
=
boxes
[
maxpos
,
:]
inds
[
i
]
=
inds
[
maxpos
]
#swap
boxes
[
maxpos
,
:]
=
tbox
inds
[
maxpos
]
=
ti
tbox
=
boxes
[
i
].
copy
()
pos
=
i
+
1
#NMS iteration
while
pos
<
N
:
sbox
=
boxes
[
pos
].
copy
()
ts_iou_val
=
intersection
(
tbox
,
sbox
)
if
ts_iou_val
>
0
:
if
method
==
1
:
if
ts_iou_val
>
Nt_thres
:
weight
=
1
-
ts_iou_val
else
:
weight
=
1
elif
method
==
2
:
weight
=
np
.
exp
(
-
1.0
*
ts_iou_val
**
2
/
sigma
)
else
:
if
ts_iou_val
>
Nt_thres
:
weight
=
0
else
:
weight
=
1
boxes
[
pos
,
8
]
=
weight
*
boxes
[
pos
,
8
]
#if box score falls below thresold, discard the box by
#swaping last box update N
if
boxes
[
pos
,
8
]
<
threshold
:
boxes
[
pos
,
:]
=
boxes
[
N
-
1
,
:]
inds
[
pos
]
=
inds
[
N
-
1
]
N
=
N
-
1
pos
=
pos
-
1
pos
=
pos
+
1
return
boxes
[:
N
]
def
nms_locality
(
polys
,
thres
=
0.3
):
"""
locality aware nms of EAST
:param polys: a N*9 numpy array. first 8 coordinates, then prob
:return: boxes after nms
"""
S
=
[]
p
=
None
for
g
in
polys
:
if
p
is
not
None
and
intersection
(
g
,
p
)
>
thres
:
p
=
weighted_merge
(
g
,
p
)
else
:
if
p
is
not
None
:
S
.
append
(
p
)
p
=
g
if
p
is
not
None
:
S
.
append
(
p
)
if
len
(
S
)
==
0
:
return
np
.
array
([])
return
standard_nms
(
np
.
array
(
S
),
thres
)
if
__name__
==
'__main__'
:
# 343,350,448,135,474,143,369,359
print
(
Polygon
(
np
.
array
([[
343
,
350
],
[
448
,
135
],
[
474
,
143
],
[
369
,
359
]]))
.
area
)
\ No newline at end of file
ppocr/postprocess/rec_postprocess.py
View file @
021c1132
...
...
@@ -23,14 +23,16 @@ class BaseRecLabelDecode(object):
character_dict_path
=
None
,
character_type
=
'ch'
,
use_space_char
=
False
):
support_character_type
=
[
'ch'
,
'en'
,
'en_sensitive'
]
support_character_type
=
[
'ch'
,
'en'
,
'en_sensitive'
,
'french'
,
'german'
,
'japan'
,
'korean'
]
assert
character_type
in
support_character_type
,
"Only {} are supported now but get {}"
.
format
(
support_character_type
,
self
.
character_str
)
if
character_type
==
"en"
:
self
.
character_str
=
"0123456789abcdefghijklmnopqrstuvwxyz"
dict_character
=
list
(
self
.
character_str
)
elif
character_type
==
"ch"
:
elif
character_type
in
[
"ch"
,
"french"
,
"german"
,
"japan"
,
"korean"
]
:
self
.
character_str
=
""
assert
character_dict_path
is
not
None
,
"character_dict_path should not be None when character_type is ch"
with
open
(
character_dict_path
,
"rb"
)
as
fin
:
...
...
@@ -150,4 +152,4 @@ class AttnLabelDecode(BaseRecLabelDecode):
else
:
assert
False
,
"unsupport type %s in get_beg_end_flag_idx"
\
%
beg_or_end
return
idx
\ No newline at end of file
return
idx
ppocr/postprocess/sast_postprocess.py
0 → 100644
View file @
021c1132
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
__file__
)
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
join
(
__dir__
,
'..'
))
import
numpy
as
np
from
.locality_aware_nms
import
nms_locality
# import lanms
import
cv2
import
time
class
SASTPostProcess
(
object
):
"""
The post process for SAST.
"""
def
__init__
(
self
,
score_thresh
=
0.5
,
nms_thresh
=
0.2
,
sample_pts_num
=
2
,
shrink_ratio_of_width
=
0.3
,
expand_scale
=
1.0
,
tcl_map_thresh
=
0.5
,
**
kwargs
):
self
.
score_thresh
=
score_thresh
self
.
nms_thresh
=
nms_thresh
self
.
sample_pts_num
=
sample_pts_num
self
.
shrink_ratio_of_width
=
shrink_ratio_of_width
self
.
expand_scale
=
expand_scale
self
.
tcl_map_thresh
=
tcl_map_thresh
# c++ la-nms is faster, but only support python 3.5
self
.
is_python35
=
False
if
sys
.
version_info
.
major
==
3
and
sys
.
version_info
.
minor
==
5
:
self
.
is_python35
=
True
def
point_pair2poly
(
self
,
point_pair_list
):
"""
Transfer vertical point_pairs into poly point in clockwise.
"""
# constract poly
point_num
=
len
(
point_pair_list
)
*
2
point_list
=
[
0
]
*
point_num
for
idx
,
point_pair
in
enumerate
(
point_pair_list
):
point_list
[
idx
]
=
point_pair
[
0
]
point_list
[
point_num
-
1
-
idx
]
=
point_pair
[
1
]
return
np
.
array
(
point_list
).
reshape
(
-
1
,
2
)
def
shrink_quad_along_width
(
self
,
quad
,
begin_width_ratio
=
0.
,
end_width_ratio
=
1.
):
"""
Generate shrink_quad_along_width.
"""
ratio_pair
=
np
.
array
([[
begin_width_ratio
],
[
end_width_ratio
]],
dtype
=
np
.
float32
)
p0_1
=
quad
[
0
]
+
(
quad
[
1
]
-
quad
[
0
])
*
ratio_pair
p3_2
=
quad
[
3
]
+
(
quad
[
2
]
-
quad
[
3
])
*
ratio_pair
return
np
.
array
([
p0_1
[
0
],
p0_1
[
1
],
p3_2
[
1
],
p3_2
[
0
]])
def
expand_poly_along_width
(
self
,
poly
,
shrink_ratio_of_width
=
0.3
):
"""
expand poly along width.
"""
point_num
=
poly
.
shape
[
0
]
left_quad
=
np
.
array
([
poly
[
0
],
poly
[
1
],
poly
[
-
2
],
poly
[
-
1
]],
dtype
=
np
.
float32
)
left_ratio
=
-
shrink_ratio_of_width
*
np
.
linalg
.
norm
(
left_quad
[
0
]
-
left_quad
[
3
])
/
\
(
np
.
linalg
.
norm
(
left_quad
[
0
]
-
left_quad
[
1
])
+
1e-6
)
left_quad_expand
=
self
.
shrink_quad_along_width
(
left_quad
,
left_ratio
,
1.0
)
right_quad
=
np
.
array
([
poly
[
point_num
//
2
-
2
],
poly
[
point_num
//
2
-
1
],
poly
[
point_num
//
2
],
poly
[
point_num
//
2
+
1
]],
dtype
=
np
.
float32
)
right_ratio
=
1.0
+
\
shrink_ratio_of_width
*
np
.
linalg
.
norm
(
right_quad
[
0
]
-
right_quad
[
3
])
/
\
(
np
.
linalg
.
norm
(
right_quad
[
0
]
-
right_quad
[
1
])
+
1e-6
)
right_quad_expand
=
self
.
shrink_quad_along_width
(
right_quad
,
0.0
,
right_ratio
)
poly
[
0
]
=
left_quad_expand
[
0
]
poly
[
-
1
]
=
left_quad_expand
[
-
1
]
poly
[
point_num
//
2
-
1
]
=
right_quad_expand
[
1
]
poly
[
point_num
//
2
]
=
right_quad_expand
[
2
]
return
poly
def
restore_quad
(
self
,
tcl_map
,
tcl_map_thresh
,
tvo_map
):
"""Restore quad."""
xy_text
=
np
.
argwhere
(
tcl_map
[:,
:,
0
]
>
tcl_map_thresh
)
xy_text
=
xy_text
[:,
::
-
1
]
# (n, 2)
# Sort the text boxes via the y axis
xy_text
=
xy_text
[
np
.
argsort
(
xy_text
[:,
1
])]
scores
=
tcl_map
[
xy_text
[:,
1
],
xy_text
[:,
0
],
0
]
scores
=
scores
[:,
np
.
newaxis
]
# Restore
point_num
=
int
(
tvo_map
.
shape
[
-
1
]
/
2
)
assert
point_num
==
4
tvo_map
=
tvo_map
[
xy_text
[:,
1
],
xy_text
[:,
0
],
:]
xy_text_tile
=
np
.
tile
(
xy_text
,
(
1
,
point_num
))
# (n, point_num * 2)
quads
=
xy_text_tile
-
tvo_map
return
scores
,
quads
,
xy_text
def
quad_area
(
self
,
quad
):
"""
compute area of a quad.
"""
edge
=
[
(
quad
[
1
][
0
]
-
quad
[
0
][
0
])
*
(
quad
[
1
][
1
]
+
quad
[
0
][
1
]),
(
quad
[
2
][
0
]
-
quad
[
1
][
0
])
*
(
quad
[
2
][
1
]
+
quad
[
1
][
1
]),
(
quad
[
3
][
0
]
-
quad
[
2
][
0
])
*
(
quad
[
3
][
1
]
+
quad
[
2
][
1
]),
(
quad
[
0
][
0
]
-
quad
[
3
][
0
])
*
(
quad
[
0
][
1
]
+
quad
[
3
][
1
])
]
return
np
.
sum
(
edge
)
/
2.
def
nms
(
self
,
dets
):
if
self
.
is_python35
:
import
lanms
dets
=
lanms
.
merge_quadrangle_n9
(
dets
,
self
.
nms_thresh
)
else
:
dets
=
nms_locality
(
dets
,
self
.
nms_thresh
)
return
dets
def
cluster_by_quads_tco
(
self
,
tcl_map
,
tcl_map_thresh
,
quads
,
tco_map
):
"""
Cluster pixels in tcl_map based on quads.
"""
instance_count
=
quads
.
shape
[
0
]
+
1
# contain background
instance_label_map
=
np
.
zeros
(
tcl_map
.
shape
[:
2
],
dtype
=
np
.
int32
)
if
instance_count
==
1
:
return
instance_count
,
instance_label_map
# predict text center
xy_text
=
np
.
argwhere
(
tcl_map
[:,
:,
0
]
>
tcl_map_thresh
)
n
=
xy_text
.
shape
[
0
]
xy_text
=
xy_text
[:,
::
-
1
]
# (n, 2)
tco
=
tco_map
[
xy_text
[:,
1
],
xy_text
[:,
0
],
:]
# (n, 2)
pred_tc
=
xy_text
-
tco
# get gt text center
m
=
quads
.
shape
[
0
]
gt_tc
=
np
.
mean
(
quads
,
axis
=
1
)
# (m, 2)
pred_tc_tile
=
np
.
tile
(
pred_tc
[:,
np
.
newaxis
,
:],
(
1
,
m
,
1
))
# (n, m, 2)
gt_tc_tile
=
np
.
tile
(
gt_tc
[
np
.
newaxis
,
:,
:],
(
n
,
1
,
1
))
# (n, m, 2)
dist_mat
=
np
.
linalg
.
norm
(
pred_tc_tile
-
gt_tc_tile
,
axis
=
2
)
# (n, m)
xy_text_assign
=
np
.
argmin
(
dist_mat
,
axis
=
1
)
+
1
# (n,)
instance_label_map
[
xy_text
[:,
1
],
xy_text
[:,
0
]]
=
xy_text_assign
return
instance_count
,
instance_label_map
def
estimate_sample_pts_num
(
self
,
quad
,
xy_text
):
"""
Estimate sample points number.
"""
eh
=
(
np
.
linalg
.
norm
(
quad
[
0
]
-
quad
[
3
])
+
np
.
linalg
.
norm
(
quad
[
1
]
-
quad
[
2
]))
/
2.0
ew
=
(
np
.
linalg
.
norm
(
quad
[
0
]
-
quad
[
1
])
+
np
.
linalg
.
norm
(
quad
[
2
]
-
quad
[
3
]))
/
2.0
dense_sample_pts_num
=
max
(
2
,
int
(
ew
))
dense_xy_center_line
=
xy_text
[
np
.
linspace
(
0
,
xy_text
.
shape
[
0
]
-
1
,
dense_sample_pts_num
,
endpoint
=
True
,
dtype
=
np
.
float32
).
astype
(
np
.
int32
)]
dense_xy_center_line_diff
=
dense_xy_center_line
[
1
:]
-
dense_xy_center_line
[:
-
1
]
estimate_arc_len
=
np
.
sum
(
np
.
linalg
.
norm
(
dense_xy_center_line_diff
,
axis
=
1
))
sample_pts_num
=
max
(
2
,
int
(
estimate_arc_len
/
eh
))
return
sample_pts_num
def
detect_sast
(
self
,
tcl_map
,
tvo_map
,
tbo_map
,
tco_map
,
ratio_w
,
ratio_h
,
src_w
,
src_h
,
shrink_ratio_of_width
=
0.3
,
tcl_map_thresh
=
0.5
,
offset_expand
=
1.0
,
out_strid
=
4.0
):
"""
first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
"""
# restore quad
scores
,
quads
,
xy_text
=
self
.
restore_quad
(
tcl_map
,
tcl_map_thresh
,
tvo_map
)
dets
=
np
.
hstack
((
quads
,
scores
)).
astype
(
np
.
float32
,
copy
=
False
)
dets
=
self
.
nms
(
dets
)
if
dets
.
shape
[
0
]
==
0
:
return
[]
quads
=
dets
[:,
:
-
1
].
reshape
(
-
1
,
4
,
2
)
# Compute quad area
quad_areas
=
[]
for
quad
in
quads
:
quad_areas
.
append
(
-
self
.
quad_area
(
quad
))
# instance segmentation
# instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
instance_count
,
instance_label_map
=
self
.
cluster_by_quads_tco
(
tcl_map
,
tcl_map_thresh
,
quads
,
tco_map
)
# restore single poly with tcl instance.
poly_list
=
[]
for
instance_idx
in
range
(
1
,
instance_count
):
xy_text
=
np
.
argwhere
(
instance_label_map
==
instance_idx
)[:,
::
-
1
]
quad
=
quads
[
instance_idx
-
1
]
q_area
=
quad_areas
[
instance_idx
-
1
]
if
q_area
<
5
:
continue
#
len1
=
float
(
np
.
linalg
.
norm
(
quad
[
0
]
-
quad
[
1
]))
len2
=
float
(
np
.
linalg
.
norm
(
quad
[
1
]
-
quad
[
2
]))
min_len
=
min
(
len1
,
len2
)
if
min_len
<
3
:
continue
# filter small CC
if
xy_text
.
shape
[
0
]
<=
0
:
continue
# filter low confidence instance
xy_text_scores
=
tcl_map
[
xy_text
[:,
1
],
xy_text
[:,
0
],
0
]
if
np
.
sum
(
xy_text_scores
)
/
quad_areas
[
instance_idx
-
1
]
<
0.1
:
# if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
continue
# sort xy_text
left_center_pt
=
np
.
array
([[(
quad
[
0
,
0
]
+
quad
[
-
1
,
0
])
/
2.0
,
(
quad
[
0
,
1
]
+
quad
[
-
1
,
1
])
/
2.0
]])
# (1, 2)
right_center_pt
=
np
.
array
([[(
quad
[
1
,
0
]
+
quad
[
2
,
0
])
/
2.0
,
(
quad
[
1
,
1
]
+
quad
[
2
,
1
])
/
2.0
]])
# (1, 2)
proj_unit_vec
=
(
right_center_pt
-
left_center_pt
)
/
\
(
np
.
linalg
.
norm
(
right_center_pt
-
left_center_pt
)
+
1e-6
)
proj_value
=
np
.
sum
(
xy_text
*
proj_unit_vec
,
axis
=
1
)
xy_text
=
xy_text
[
np
.
argsort
(
proj_value
)]
# Sample pts in tcl map
if
self
.
sample_pts_num
==
0
:
sample_pts_num
=
self
.
estimate_sample_pts_num
(
quad
,
xy_text
)
else
:
sample_pts_num
=
self
.
sample_pts_num
xy_center_line
=
xy_text
[
np
.
linspace
(
0
,
xy_text
.
shape
[
0
]
-
1
,
sample_pts_num
,
endpoint
=
True
,
dtype
=
np
.
float32
).
astype
(
np
.
int32
)]
point_pair_list
=
[]
for
x
,
y
in
xy_center_line
:
# get corresponding offset
offset
=
tbo_map
[
y
,
x
,
:].
reshape
(
2
,
2
)
if
offset_expand
!=
1.0
:
offset_length
=
np
.
linalg
.
norm
(
offset
,
axis
=
1
,
keepdims
=
True
)
expand_length
=
np
.
clip
(
offset_length
*
(
offset_expand
-
1
),
a_min
=
0.5
,
a_max
=
3.0
)
offset_detal
=
offset
/
offset_length
*
expand_length
offset
=
offset
+
offset_detal
# original point
ori_yx
=
np
.
array
([
y
,
x
],
dtype
=
np
.
float32
)
point_pair
=
(
ori_yx
+
offset
)[:,
::
-
1
]
*
out_strid
/
np
.
array
([
ratio_w
,
ratio_h
]).
reshape
(
-
1
,
2
)
point_pair_list
.
append
(
point_pair
)
# ndarry: (x, 2), expand poly along width
detected_poly
=
self
.
point_pair2poly
(
point_pair_list
)
detected_poly
=
self
.
expand_poly_along_width
(
detected_poly
,
shrink_ratio_of_width
)
detected_poly
[:,
0
]
=
np
.
clip
(
detected_poly
[:,
0
],
a_min
=
0
,
a_max
=
src_w
)
detected_poly
[:,
1
]
=
np
.
clip
(
detected_poly
[:,
1
],
a_min
=
0
,
a_max
=
src_h
)
poly_list
.
append
(
detected_poly
)
return
poly_list
def
__call__
(
self
,
outs_dict
,
shape_list
):
score_list
=
outs_dict
[
'f_score'
]
border_list
=
outs_dict
[
'f_border'
]
tvo_list
=
outs_dict
[
'f_tvo'
]
tco_list
=
outs_dict
[
'f_tco'
]
img_num
=
len
(
shape_list
)
poly_lists
=
[]
for
ino
in
range
(
img_num
):
p_score
=
score_list
[
ino
].
transpose
((
1
,
2
,
0
)).
numpy
()
p_border
=
border_list
[
ino
].
transpose
((
1
,
2
,
0
)).
numpy
()
p_tvo
=
tvo_list
[
ino
].
transpose
((
1
,
2
,
0
)).
numpy
()
p_tco
=
tco_list
[
ino
].
transpose
((
1
,
2
,
0
)).
numpy
()
src_h
,
src_w
,
ratio_h
,
ratio_w
=
shape_list
[
ino
]
poly_list
=
self
.
detect_sast
(
p_score
,
p_tvo
,
p_border
,
p_tco
,
ratio_w
,
ratio_h
,
src_w
,
src_h
,
shrink_ratio_of_width
=
self
.
shrink_ratio_of_width
,
tcl_map_thresh
=
self
.
tcl_map_thresh
,
offset_expand
=
self
.
expand_scale
)
poly_lists
.
append
({
'points'
:
np
.
array
(
poly_list
)})
return
poly_lists
setup.py
View file @
021c1132
...
...
@@ -32,7 +32,7 @@ setup(
package_dir
=
{
'paddleocr'
:
''
},
include_package_data
=
True
,
entry_points
=
{
"console_scripts"
:
[
"paddleocr= paddleocr.paddleocr:main"
]},
version
=
'
0
.0
.3
'
,
version
=
'
2
.0'
,
install_requires
=
requirements
,
license
=
'Apache License 2.0'
,
description
=
'Awesome OCR toolkits based on PaddlePaddle (8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices'
,
...
...
tools/infer/predict_system.py
View file @
021c1132
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
import
os
import
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
os
.
path
.
abspath
(
os
.
path
.
join
(
__dir__
,
'../..'
)))
...
...
@@ -30,12 +31,15 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from
ppocr.utils.logging
import
get_logger
from
tools.infer.utility
import
draw_ocr_box_txt
logger
=
get_logger
()
class
TextSystem
(
object
):
def
__init__
(
self
,
args
):
self
.
text_detector
=
predict_det
.
TextDetector
(
args
)
self
.
text_recognizer
=
predict_rec
.
TextRecognizer
(
args
)
self
.
use_angle_cls
=
args
.
use_angle_cls
self
.
drop_score
=
args
.
drop_score
if
self
.
use_angle_cls
:
self
.
text_classifier
=
predict_cls
.
TextClassifier
(
args
)
...
...
@@ -81,7 +85,8 @@ class TextSystem(object):
def
__call__
(
self
,
img
):
ori_im
=
img
.
copy
()
dt_boxes
,
elapse
=
self
.
text_detector
(
img
)
logger
.
info
(
"dt_boxes num : {}, elapse : {}"
.
format
(
len
(
dt_boxes
),
elapse
))
logger
.
info
(
"dt_boxes num : {}, elapse : {}"
.
format
(
len
(
dt_boxes
),
elapse
))
if
dt_boxes
is
None
:
return
None
,
None
img_crop_list
=
[]
...
...
@@ -99,9 +104,16 @@ class TextSystem(object):
len
(
img_crop_list
),
elapse
))
rec_res
,
elapse
=
self
.
text_recognizer
(
img_crop_list
)
logger
.
info
(
"rec_res num : {}, elapse : {}"
.
format
(
len
(
rec_res
),
elapse
))
logger
.
info
(
"rec_res num : {}, elapse : {}"
.
format
(
len
(
rec_res
),
elapse
))
# self.print_draw_crop_rec_res(img_crop_list, rec_res)
return
dt_boxes
,
rec_res
filter_boxes
,
filter_rec_res
=
[],
[]
for
box
,
rec_reuslt
in
zip
(
dt_boxes
,
rec_res
):
text
,
score
=
rec_reuslt
if
score
>=
self
.
drop_score
:
filter_boxes
.
append
(
box
)
filter_rec_res
.
append
(
rec_reuslt
)
return
filter_boxes
,
filter_rec_res
def
sorted_boxes
(
dt_boxes
):
...
...
@@ -117,8 +129,8 @@ def sorted_boxes(dt_boxes):
_boxes
=
list
(
sorted_boxes
)
for
i
in
range
(
num_boxes
-
1
):
if
abs
(
_boxes
[
i
+
1
][
0
][
1
]
-
_boxes
[
i
][
0
][
1
])
<
10
and
\
(
_boxes
[
i
+
1
][
0
][
0
]
<
_boxes
[
i
][
0
][
0
]):
if
abs
(
_boxes
[
i
+
1
][
0
][
1
]
-
_boxes
[
i
][
0
][
1
])
<
10
and
\
(
_boxes
[
i
+
1
][
0
][
0
]
<
_boxes
[
i
][
0
][
0
]):
tmp
=
_boxes
[
i
]
_boxes
[
i
]
=
_boxes
[
i
+
1
]
_boxes
[
i
+
1
]
=
tmp
...
...
@@ -143,12 +155,8 @@ def main(args):
elapse
=
time
.
time
()
-
starttime
logger
.
info
(
"Predict time of %s: %.3fs"
%
(
image_file
,
elapse
))
dt_num
=
len
(
dt_boxes
)
for
dno
in
range
(
dt_num
):
text
,
score
=
rec_res
[
dno
]
if
score
>=
drop_score
:
text_str
=
"%s, %.3f"
%
(
text
,
score
)
logger
.
info
(
text_str
)
for
text
,
score
in
rec_res
:
logger
.
info
(
"{}, {:.3f}"
.
format
(
text
,
score
))
if
is_visualize
:
image
=
Image
.
fromarray
(
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2RGB
))
...
...
@@ -174,5 +182,4 @@ def main(args):
if
__name__
==
"__main__"
:
logger
=
get_logger
()
main
(
utility
.
parse_args
())
main
(
utility
.
parse_args
())
\ No newline at end of file
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment