Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
bd7f8f72
Unverified
Commit
bd7f8f72
authored
Dec 09, 2020
by
MissPenguin
Committed by
GitHub
Dec 09, 2020
Browse files
Merge pull request #1363 from MissPenguin/dygraph
add east & sast
parents
3c9d3f6b
d42bf7a0
Changes
26
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2827 additions
and
9 deletions
+2827
-9
configs/det/det_mv3_east.yml
configs/det/det_mv3_east.yml
+111
-0
configs/det/det_r50_vd_east.yml
configs/det/det_r50_vd_east.yml
+110
-0
configs/det/det_r50_vd_sast_icdar15.yml
configs/det/det_r50_vd_sast_icdar15.yml
+110
-0
configs/det/det_r50_vd_sast_totaltext.yml
configs/det/det_r50_vd_sast_totaltext.yml
+109
-0
ppocr/data/imaug/__init__.py
ppocr/data/imaug/__init__.py
+3
-0
ppocr/data/imaug/east_process.py
ppocr/data/imaug/east_process.py
+439
-0
ppocr/data/imaug/label_ops.py
ppocr/data/imaug/label_ops.py
+13
-1
ppocr/data/imaug/operators.py
ppocr/data/imaug/operators.py
+43
-5
ppocr/data/imaug/sast_process.py
ppocr/data/imaug/sast_process.py
+689
-0
ppocr/losses/__init__.py
ppocr/losses/__init__.py
+3
-1
ppocr/losses/det_east_loss.py
ppocr/losses/det_east_loss.py
+63
-0
ppocr/losses/det_sast_loss.py
ppocr/losses/det_sast_loss.py
+121
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+1
-0
ppocr/modeling/backbones/det_resnet_vd_sast.py
ppocr/modeling/backbones/det_resnet_vd_sast.py
+285
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+3
-1
ppocr/modeling/heads/det_east_head.py
ppocr/modeling/heads/det_east_head.py
+121
-0
ppocr/modeling/heads/det_sast_head.py
ppocr/modeling/heads/det_sast_head.py
+128
-0
ppocr/modeling/necks/__init__.py
ppocr/modeling/necks/__init__.py
+3
-1
ppocr/modeling/necks/east_fpn.py
ppocr/modeling/necks/east_fpn.py
+188
-0
ppocr/modeling/necks/sast_fpn.py
ppocr/modeling/necks/sast_fpn.py
+284
-0
No files found.
configs/det/det_mv3_east.yml
0 → 100644
View file @
bd7f8f72
Global
:
use_gpu
:
true
epoch_num
:
10000
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/east_mv3/
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights
:
True
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/MobileNetV3_large_x0_5_pretrained
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
save_res_path
:
./output/det_east/predicts_east.txt
Architecture
:
model_type
:
det
algorithm
:
EAST
Transform
:
Backbone
:
name
:
MobileNetV3
scale
:
0.5
model_name
:
large
Neck
:
name
:
EASTFPN
model_name
:
small
Head
:
name
:
EASTHead
model_name
:
small
Loss
:
name
:
EASTLoss
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
# name: Cosine
learning_rate
:
0.001
# warmup_epoch: 0
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
EASTPostProcess
score_thresh
:
0.8
cover_thresh
:
0.1
nms_thresh
:
0.2
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
EASTProcessTrain
:
image_shape
:
[
512
,
512
]
background_ratio
:
0.125
min_crop_side_ratio
:
0.1
min_text_size
:
10
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
score_map'
,
'
geo_map'
,
'
training_mask'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
16
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
limit_side_len
:
2400
limit_type
:
max
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
\ No newline at end of file
configs/det/det_r50_vd_east.yml
0 → 100644
View file @
bd7f8f72
Global
:
use_gpu
:
true
epoch_num
:
10000
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/east_r50_vd/
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights
:
True
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_pretrained/
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
save_res_path
:
./output/det_east/predicts_east.txt
Architecture
:
model_type
:
det
algorithm
:
EAST
Transform
:
Backbone
:
name
:
ResNet
layers
:
50
Neck
:
name
:
EASTFPN
model_name
:
large
Head
:
name
:
EASTHead
model_name
:
large
Loss
:
name
:
EASTLoss
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
# name: Cosine
learning_rate
:
0.001
# warmup_epoch: 0
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
EASTPostProcess
score_thresh
:
0.8
cover_thresh
:
0.1
nms_thresh
:
0.2
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/train_icdar2015_label.txt
ratio_list
:
[
1.0
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
EASTProcessTrain
:
image_shape
:
[
512
,
512
]
background_ratio
:
0.125
min_crop_side_ratio
:
0.1
min_text_size
:
10
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
score_map'
,
'
geo_map'
,
'
training_mask'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
8
num_workers
:
8
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
limit_side_len
:
2400
limit_type
:
max
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
\ No newline at end of file
configs/det/det_r50_vd_sast_icdar15.yml
0 → 100644
View file @
bd7f8f72
Global
:
use_gpu
:
true
epoch_num
:
5000
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/sast_r50_vd_ic15/
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights
:
True
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained/
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
save_res_path
:
./output/sast_r50_vd_ic15/predicts_sast.txt
Architecture
:
model_type
:
det
algorithm
:
SAST
Transform
:
Backbone
:
name
:
ResNet_SAST
layers
:
50
Neck
:
name
:
SASTFPN
with_cab
:
True
Head
:
name
:
SASTHead
Loss
:
name
:
SASTLoss
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
# name: Cosine
learning_rate
:
0.001
# warmup_epoch: 0
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
SASTPostProcess
score_thresh
:
0.5
sample_pts_num
:
2
nms_thresh
:
0.2
expand_scale
:
1.0
shrink_ratio_of_width
:
0.3
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_path
:
[
./train_data/art_latin_icdar_14pt/train_no_tt_test/train_label_json.txt
,
./train_data/total_text_icdar_14pt/train_label_json.txt
]
data_ratio_list
:
[
0.5
,
0.5
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
SASTProcessTrain
:
image_shape
:
[
512
,
512
]
min_crop_side_ratio
:
0.3
min_crop_size
:
24
min_text_size
:
4
max_text_size
:
512
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
score_map'
,
'
border_map'
,
'
training_mask'
,
'
tvo_map'
,
'
tco_map'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
4
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/icdar2015/text_localization/
label_file_list
:
-
./train_data/icdar2015/text_localization/test_icdar2015_label.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
resize_long
:
1536
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
\ No newline at end of file
configs/det/det_r50_vd_sast_totaltext.yml
0 → 100644
View file @
bd7f8f72
Global
:
use_gpu
:
true
epoch_num
:
5000
log_smooth_window
:
20
print_batch_step
:
2
save_model_dir
:
./output/sast_r50_vd_tt/
save_epoch_step
:
1000
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
4000
,
5000
]
# if pretrained_model is saved in static mode, load_static_weights must set to True
load_static_weights
:
True
cal_metric_during_train
:
False
pretrained_model
:
./pretrain_models/ResNet50_vd_ssld_pretrained/
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
save_res_path
:
./output/sast_r50_vd_tt/predicts_sast.txt
Architecture
:
model_type
:
det
algorithm
:
SAST
Transform
:
Backbone
:
name
:
ResNet_SAST
layers
:
50
Neck
:
name
:
SASTFPN
with_cab
:
True
Head
:
name
:
SASTHead
Loss
:
name
:
SASTLoss
Optimizer
:
name
:
Adam
beta1
:
0.9
beta2
:
0.999
lr
:
# name: Cosine
learning_rate
:
0.001
# warmup_epoch: 0
regularizer
:
name
:
'
L2'
factor
:
0
PostProcess
:
name
:
SASTPostProcess
score_thresh
:
0.5
sample_pts_num
:
6
nms_thresh
:
0.2
expand_scale
:
1.2
shrink_ratio_of_width
:
0.2
Metric
:
name
:
DetMetric
main_indicator
:
hmean
Train
:
dataset
:
name
:
SimpleDataSet
label_file_list
:
[
./train_data/icdar2013/train_label_json.txt
,
./train_data/icdar2015/train_label_json.txt
,
./train_data/icdar17_mlt_latin/train_label_json.txt
,
./train_data/coco_text_icdar_4pts/train_label_json.txt
]
ratio_list
:
[
0.1
,
0.45
,
0.3
,
0.15
]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
SASTProcessTrain
:
image_shape
:
[
512
,
512
]
min_crop_side_ratio
:
0.3
min_crop_size
:
24
min_text_size
:
4
max_text_size
:
512
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
score_map'
,
'
border_map'
,
'
training_mask'
,
'
tvo_map'
,
'
tco_map'
]
# dataloader will return list in this order
loader
:
shuffle
:
True
drop_last
:
False
batch_size_per_card
:
4
num_workers
:
4
Eval
:
dataset
:
name
:
SimpleDataSet
data_dir
:
./train_data/
label_file_list
:
-
./train_data/total_text_icdar_14pt/test_label_json.txt
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
DetLabelEncode
:
# Class handling label
-
DetResizeForTest
:
resize_long
:
768
-
NormalizeImage
:
scale
:
1./255.
mean
:
[
0.485
,
0.456
,
0.406
]
std
:
[
0.229
,
0.224
,
0.225
]
order
:
'
hwc'
-
ToCHWImage
:
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
shape'
,
'
polys'
,
'
ignore_tags'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
1
# must be 1
num_workers
:
2
\ No newline at end of file
ppocr/data/imaug/__init__.py
View file @
bd7f8f72
...
@@ -26,6 +26,9 @@ from .randaugment import RandAugment
...
@@ -26,6 +26,9 @@ from .randaugment import RandAugment
from
.operators
import
*
from
.operators
import
*
from
.label_ops
import
*
from
.label_ops
import
*
from
.east_process
import
*
from
.sast_process
import
*
def
transform
(
data
,
ops
=
None
):
def
transform
(
data
,
ops
=
None
):
""" transform """
""" transform """
...
...
ppocr/data/imaug/east_process.py
0 → 100644
View file @
bd7f8f72
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
import
math
import
cv2
import
numpy
as
np
import
json
import
sys
import
os
__all__
=
[
'EASTProcessTrain'
]
class
EASTProcessTrain
(
object
):
def
__init__
(
self
,
image_shape
=
[
512
,
512
],
background_ratio
=
0.125
,
min_crop_side_ratio
=
0.1
,
min_text_size
=
10
,
**
kwargs
):
self
.
input_size
=
image_shape
[
1
]
self
.
random_scale
=
np
.
array
([
0.5
,
1
,
2.0
,
3.0
])
self
.
background_ratio
=
background_ratio
self
.
min_crop_side_ratio
=
min_crop_side_ratio
self
.
min_text_size
=
min_text_size
def
preprocess
(
self
,
im
):
input_size
=
self
.
input_size
im_shape
=
im
.
shape
im_size_min
=
np
.
min
(
im_shape
[
0
:
2
])
im_size_max
=
np
.
max
(
im_shape
[
0
:
2
])
im_scale
=
float
(
input_size
)
/
float
(
im_size_max
)
im
=
cv2
.
resize
(
im
,
None
,
None
,
fx
=
im_scale
,
fy
=
im_scale
)
img_mean
=
[
0.485
,
0.456
,
0.406
]
img_std
=
[
0.229
,
0.224
,
0.225
]
# im = im[:, :, ::-1].astype(np.float32)
im
=
im
/
255
im
-=
img_mean
im
/=
img_std
new_h
,
new_w
,
_
=
im
.
shape
im_padded
=
np
.
zeros
((
input_size
,
input_size
,
3
),
dtype
=
np
.
float32
)
im_padded
[:
new_h
,
:
new_w
,
:]
=
im
im_padded
=
im_padded
.
transpose
((
2
,
0
,
1
))
im_padded
=
im_padded
[
np
.
newaxis
,
:]
return
im_padded
,
im_scale
def
rotate_im_poly
(
self
,
im
,
text_polys
):
"""
rotate image with 90 / 180 / 270 degre
"""
im_w
,
im_h
=
im
.
shape
[
1
],
im
.
shape
[
0
]
dst_im
=
im
.
copy
()
dst_polys
=
[]
rand_degree_ratio
=
np
.
random
.
rand
()
rand_degree_cnt
=
1
if
0.333
<
rand_degree_ratio
<
0.666
:
rand_degree_cnt
=
2
elif
rand_degree_ratio
>
0.666
:
rand_degree_cnt
=
3
for
i
in
range
(
rand_degree_cnt
):
dst_im
=
np
.
rot90
(
dst_im
)
rot_degree
=
-
90
*
rand_degree_cnt
rot_angle
=
rot_degree
*
math
.
pi
/
180.0
n_poly
=
text_polys
.
shape
[
0
]
cx
,
cy
=
0.5
*
im_w
,
0.5
*
im_h
ncx
,
ncy
=
0.5
*
dst_im
.
shape
[
1
],
0.5
*
dst_im
.
shape
[
0
]
for
i
in
range
(
n_poly
):
wordBB
=
text_polys
[
i
]
poly
=
[]
for
j
in
range
(
4
):
sx
,
sy
=
wordBB
[
j
][
0
],
wordBB
[
j
][
1
]
dx
=
math
.
cos
(
rot_angle
)
*
(
sx
-
cx
)
\
-
math
.
sin
(
rot_angle
)
*
(
sy
-
cy
)
+
ncx
dy
=
math
.
sin
(
rot_angle
)
*
(
sx
-
cx
)
\
+
math
.
cos
(
rot_angle
)
*
(
sy
-
cy
)
+
ncy
poly
.
append
([
dx
,
dy
])
dst_polys
.
append
(
poly
)
dst_polys
=
np
.
array
(
dst_polys
,
dtype
=
np
.
float32
)
return
dst_im
,
dst_polys
def
polygon_area
(
self
,
poly
):
"""
compute area of a polygon
:param poly:
:return:
"""
edge
=
[(
poly
[
1
][
0
]
-
poly
[
0
][
0
])
*
(
poly
[
1
][
1
]
+
poly
[
0
][
1
]),
(
poly
[
2
][
0
]
-
poly
[
1
][
0
])
*
(
poly
[
2
][
1
]
+
poly
[
1
][
1
]),
(
poly
[
3
][
0
]
-
poly
[
2
][
0
])
*
(
poly
[
3
][
1
]
+
poly
[
2
][
1
]),
(
poly
[
0
][
0
]
-
poly
[
3
][
0
])
*
(
poly
[
0
][
1
]
+
poly
[
3
][
1
])]
return
np
.
sum
(
edge
)
/
2.
def
check_and_validate_polys
(
self
,
polys
,
tags
,
img_height
,
img_width
):
"""
check so that the text poly is in the same direction,
and also filter some invalid polygons
:param polys:
:param tags:
:return:
"""
h
,
w
=
img_height
,
img_width
if
polys
.
shape
[
0
]
==
0
:
return
polys
polys
[:,
:,
0
]
=
np
.
clip
(
polys
[:,
:,
0
],
0
,
w
-
1
)
polys
[:,
:,
1
]
=
np
.
clip
(
polys
[:,
:,
1
],
0
,
h
-
1
)
validated_polys
=
[]
validated_tags
=
[]
for
poly
,
tag
in
zip
(
polys
,
tags
):
p_area
=
self
.
polygon_area
(
poly
)
#invalid poly
if
abs
(
p_area
)
<
1
:
continue
if
p_area
>
0
:
#'poly in wrong direction'
if
not
tag
:
tag
=
True
#reversed cases should be ignore
poly
=
poly
[(
0
,
3
,
2
,
1
),
:]
validated_polys
.
append
(
poly
)
validated_tags
.
append
(
tag
)
return
np
.
array
(
validated_polys
),
np
.
array
(
validated_tags
)
def
draw_img_polys
(
self
,
img
,
polys
):
if
len
(
img
.
shape
)
==
4
:
img
=
np
.
squeeze
(
img
,
axis
=
0
)
if
img
.
shape
[
0
]
==
3
:
img
=
img
.
transpose
((
1
,
2
,
0
))
img
[:,
:,
2
]
+=
123.68
img
[:,
:,
1
]
+=
116.78
img
[:,
:,
0
]
+=
103.94
cv2
.
imwrite
(
"tmp.jpg"
,
img
)
img
=
cv2
.
imread
(
"tmp.jpg"
)
for
box
in
polys
:
box
=
box
.
astype
(
np
.
int32
).
reshape
((
-
1
,
1
,
2
))
cv2
.
polylines
(
img
,
[
box
],
True
,
color
=
(
255
,
255
,
0
),
thickness
=
2
)
import
random
ino
=
random
.
randint
(
0
,
100
)
cv2
.
imwrite
(
"tmp_%d.jpg"
%
ino
,
img
)
return
def
shrink_poly
(
self
,
poly
,
r
):
"""
fit a poly inside the origin poly, maybe bugs here...
used for generate the score map
:param poly: the text poly
:param r: r in the paper
:return: the shrinked poly
"""
# shrink ratio
R
=
0.3
# find the longer pair
dist0
=
np
.
linalg
.
norm
(
poly
[
0
]
-
poly
[
1
])
dist1
=
np
.
linalg
.
norm
(
poly
[
2
]
-
poly
[
3
])
dist2
=
np
.
linalg
.
norm
(
poly
[
0
]
-
poly
[
3
])
dist3
=
np
.
linalg
.
norm
(
poly
[
1
]
-
poly
[
2
])
if
dist0
+
dist1
>
dist2
+
dist3
:
# first move (p0, p1), (p2, p3), then (p0, p3), (p1, p2)
## p0, p1
theta
=
np
.
arctan2
((
poly
[
1
][
1
]
-
poly
[
0
][
1
]),
(
poly
[
1
][
0
]
-
poly
[
0
][
0
]))
poly
[
0
][
0
]
+=
R
*
r
[
0
]
*
np
.
cos
(
theta
)
poly
[
0
][
1
]
+=
R
*
r
[
0
]
*
np
.
sin
(
theta
)
poly
[
1
][
0
]
-=
R
*
r
[
1
]
*
np
.
cos
(
theta
)
poly
[
1
][
1
]
-=
R
*
r
[
1
]
*
np
.
sin
(
theta
)
## p2, p3
theta
=
np
.
arctan2
((
poly
[
2
][
1
]
-
poly
[
3
][
1
]),
(
poly
[
2
][
0
]
-
poly
[
3
][
0
]))
poly
[
3
][
0
]
+=
R
*
r
[
3
]
*
np
.
cos
(
theta
)
poly
[
3
][
1
]
+=
R
*
r
[
3
]
*
np
.
sin
(
theta
)
poly
[
2
][
0
]
-=
R
*
r
[
2
]
*
np
.
cos
(
theta
)
poly
[
2
][
1
]
-=
R
*
r
[
2
]
*
np
.
sin
(
theta
)
## p0, p3
theta
=
np
.
arctan2
((
poly
[
3
][
0
]
-
poly
[
0
][
0
]),
(
poly
[
3
][
1
]
-
poly
[
0
][
1
]))
poly
[
0
][
0
]
+=
R
*
r
[
0
]
*
np
.
sin
(
theta
)
poly
[
0
][
1
]
+=
R
*
r
[
0
]
*
np
.
cos
(
theta
)
poly
[
3
][
0
]
-=
R
*
r
[
3
]
*
np
.
sin
(
theta
)
poly
[
3
][
1
]
-=
R
*
r
[
3
]
*
np
.
cos
(
theta
)
## p1, p2
theta
=
np
.
arctan2
((
poly
[
2
][
0
]
-
poly
[
1
][
0
]),
(
poly
[
2
][
1
]
-
poly
[
1
][
1
]))
poly
[
1
][
0
]
+=
R
*
r
[
1
]
*
np
.
sin
(
theta
)
poly
[
1
][
1
]
+=
R
*
r
[
1
]
*
np
.
cos
(
theta
)
poly
[
2
][
0
]
-=
R
*
r
[
2
]
*
np
.
sin
(
theta
)
poly
[
2
][
1
]
-=
R
*
r
[
2
]
*
np
.
cos
(
theta
)
else
:
## p0, p3
# print poly
theta
=
np
.
arctan2
((
poly
[
3
][
0
]
-
poly
[
0
][
0
]),
(
poly
[
3
][
1
]
-
poly
[
0
][
1
]))
poly
[
0
][
0
]
+=
R
*
r
[
0
]
*
np
.
sin
(
theta
)
poly
[
0
][
1
]
+=
R
*
r
[
0
]
*
np
.
cos
(
theta
)
poly
[
3
][
0
]
-=
R
*
r
[
3
]
*
np
.
sin
(
theta
)
poly
[
3
][
1
]
-=
R
*
r
[
3
]
*
np
.
cos
(
theta
)
## p1, p2
theta
=
np
.
arctan2
((
poly
[
2
][
0
]
-
poly
[
1
][
0
]),
(
poly
[
2
][
1
]
-
poly
[
1
][
1
]))
poly
[
1
][
0
]
+=
R
*
r
[
1
]
*
np
.
sin
(
theta
)
poly
[
1
][
1
]
+=
R
*
r
[
1
]
*
np
.
cos
(
theta
)
poly
[
2
][
0
]
-=
R
*
r
[
2
]
*
np
.
sin
(
theta
)
poly
[
2
][
1
]
-=
R
*
r
[
2
]
*
np
.
cos
(
theta
)
## p0, p1
theta
=
np
.
arctan2
((
poly
[
1
][
1
]
-
poly
[
0
][
1
]),
(
poly
[
1
][
0
]
-
poly
[
0
][
0
]))
poly
[
0
][
0
]
+=
R
*
r
[
0
]
*
np
.
cos
(
theta
)
poly
[
0
][
1
]
+=
R
*
r
[
0
]
*
np
.
sin
(
theta
)
poly
[
1
][
0
]
-=
R
*
r
[
1
]
*
np
.
cos
(
theta
)
poly
[
1
][
1
]
-=
R
*
r
[
1
]
*
np
.
sin
(
theta
)
## p2, p3
theta
=
np
.
arctan2
((
poly
[
2
][
1
]
-
poly
[
3
][
1
]),
(
poly
[
2
][
0
]
-
poly
[
3
][
0
]))
poly
[
3
][
0
]
+=
R
*
r
[
3
]
*
np
.
cos
(
theta
)
poly
[
3
][
1
]
+=
R
*
r
[
3
]
*
np
.
sin
(
theta
)
poly
[
2
][
0
]
-=
R
*
r
[
2
]
*
np
.
cos
(
theta
)
poly
[
2
][
1
]
-=
R
*
r
[
2
]
*
np
.
sin
(
theta
)
return
poly
def
generate_quad
(
self
,
im_size
,
polys
,
tags
):
"""
Generate quadrangle.
"""
h
,
w
=
im_size
poly_mask
=
np
.
zeros
((
h
,
w
),
dtype
=
np
.
uint8
)
score_map
=
np
.
zeros
((
h
,
w
),
dtype
=
np
.
uint8
)
# (x1, y1, ..., x4, y4, short_edge_norm)
geo_map
=
np
.
zeros
((
h
,
w
,
9
),
dtype
=
np
.
float32
)
# mask used during traning, to ignore some hard areas
training_mask
=
np
.
ones
((
h
,
w
),
dtype
=
np
.
uint8
)
for
poly_idx
,
poly_tag
in
enumerate
(
zip
(
polys
,
tags
)):
poly
=
poly_tag
[
0
]
tag
=
poly_tag
[
1
]
r
=
[
None
,
None
,
None
,
None
]
for
i
in
range
(
4
):
dist1
=
np
.
linalg
.
norm
(
poly
[
i
]
-
poly
[(
i
+
1
)
%
4
])
dist2
=
np
.
linalg
.
norm
(
poly
[
i
]
-
poly
[(
i
-
1
)
%
4
])
r
[
i
]
=
min
(
dist1
,
dist2
)
# score map
shrinked_poly
=
self
.
shrink_poly
(
poly
.
copy
(),
r
).
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:]
cv2
.
fillPoly
(
score_map
,
shrinked_poly
,
1
)
cv2
.
fillPoly
(
poly_mask
,
shrinked_poly
,
poly_idx
+
1
)
# if the poly is too small, then ignore it during training
poly_h
=
min
(
np
.
linalg
.
norm
(
poly
[
0
]
-
poly
[
3
]),
np
.
linalg
.
norm
(
poly
[
1
]
-
poly
[
2
]))
poly_w
=
min
(
np
.
linalg
.
norm
(
poly
[
0
]
-
poly
[
1
]),
np
.
linalg
.
norm
(
poly
[
2
]
-
poly
[
3
]))
if
min
(
poly_h
,
poly_w
)
<
self
.
min_text_size
:
cv2
.
fillPoly
(
training_mask
,
poly
.
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:],
0
)
if
tag
:
cv2
.
fillPoly
(
training_mask
,
poly
.
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:],
0
)
xy_in_poly
=
np
.
argwhere
(
poly_mask
==
(
poly_idx
+
1
))
# geo map.
y_in_poly
=
xy_in_poly
[:,
0
]
x_in_poly
=
xy_in_poly
[:,
1
]
poly
[:,
0
]
=
np
.
minimum
(
np
.
maximum
(
poly
[:,
0
],
0
),
w
)
poly
[:,
1
]
=
np
.
minimum
(
np
.
maximum
(
poly
[:,
1
],
0
),
h
)
for
pno
in
range
(
4
):
geo_channel_beg
=
pno
*
2
geo_map
[
y_in_poly
,
x_in_poly
,
geo_channel_beg
]
=
\
x_in_poly
-
poly
[
pno
,
0
]
geo_map
[
y_in_poly
,
x_in_poly
,
geo_channel_beg
+
1
]
=
\
y_in_poly
-
poly
[
pno
,
1
]
geo_map
[
y_in_poly
,
x_in_poly
,
8
]
=
\
1.0
/
max
(
min
(
poly_h
,
poly_w
),
1.0
)
return
score_map
,
geo_map
,
training_mask
def
crop_area
(
self
,
im
,
polys
,
tags
,
crop_background
=
False
,
max_tries
=
50
):
"""
make random crop from the input image
:param im:
:param polys:
:param tags:
:param crop_background:
:param max_tries:
:return:
"""
h
,
w
,
_
=
im
.
shape
pad_h
=
h
//
10
pad_w
=
w
//
10
h_array
=
np
.
zeros
((
h
+
pad_h
*
2
),
dtype
=
np
.
int32
)
w_array
=
np
.
zeros
((
w
+
pad_w
*
2
),
dtype
=
np
.
int32
)
for
poly
in
polys
:
poly
=
np
.
round
(
poly
,
decimals
=
0
).
astype
(
np
.
int32
)
minx
=
np
.
min
(
poly
[:,
0
])
maxx
=
np
.
max
(
poly
[:,
0
])
w_array
[
minx
+
pad_w
:
maxx
+
pad_w
]
=
1
miny
=
np
.
min
(
poly
[:,
1
])
maxy
=
np
.
max
(
poly
[:,
1
])
h_array
[
miny
+
pad_h
:
maxy
+
pad_h
]
=
1
# ensure the cropped area not across a text
h_axis
=
np
.
where
(
h_array
==
0
)[
0
]
w_axis
=
np
.
where
(
w_array
==
0
)[
0
]
if
len
(
h_axis
)
==
0
or
len
(
w_axis
)
==
0
:
return
im
,
polys
,
tags
for
i
in
range
(
max_tries
):
xx
=
np
.
random
.
choice
(
w_axis
,
size
=
2
)
xmin
=
np
.
min
(
xx
)
-
pad_w
xmax
=
np
.
max
(
xx
)
-
pad_w
xmin
=
np
.
clip
(
xmin
,
0
,
w
-
1
)
xmax
=
np
.
clip
(
xmax
,
0
,
w
-
1
)
yy
=
np
.
random
.
choice
(
h_axis
,
size
=
2
)
ymin
=
np
.
min
(
yy
)
-
pad_h
ymax
=
np
.
max
(
yy
)
-
pad_h
ymin
=
np
.
clip
(
ymin
,
0
,
h
-
1
)
ymax
=
np
.
clip
(
ymax
,
0
,
h
-
1
)
if
xmax
-
xmin
<
self
.
min_crop_side_ratio
*
w
or
\
ymax
-
ymin
<
self
.
min_crop_side_ratio
*
h
:
# area too small
continue
if
polys
.
shape
[
0
]
!=
0
:
poly_axis_in_area
=
(
polys
[:,
:,
0
]
>=
xmin
)
\
&
(
polys
[:,
:,
0
]
<=
xmax
)
\
&
(
polys
[:,
:,
1
]
>=
ymin
)
\
&
(
polys
[:,
:,
1
]
<=
ymax
)
selected_polys
=
np
.
where
(
np
.
sum
(
poly_axis_in_area
,
axis
=
1
)
==
4
)[
0
]
else
:
selected_polys
=
[]
if
len
(
selected_polys
)
==
0
:
# no text in this area
if
crop_background
:
im
=
im
[
ymin
:
ymax
+
1
,
xmin
:
xmax
+
1
,
:]
polys
=
[]
tags
=
[]
return
im
,
polys
,
tags
else
:
continue
im
=
im
[
ymin
:
ymax
+
1
,
xmin
:
xmax
+
1
,
:]
polys
=
polys
[
selected_polys
]
tags
=
tags
[
selected_polys
]
polys
[:,
:,
0
]
-=
xmin
polys
[:,
:,
1
]
-=
ymin
return
im
,
polys
,
tags
return
im
,
polys
,
tags
def
crop_background_infor
(
self
,
im
,
text_polys
,
text_tags
):
im
,
text_polys
,
text_tags
=
self
.
crop_area
(
im
,
text_polys
,
text_tags
,
crop_background
=
True
)
if
len
(
text_polys
)
>
0
:
return
None
# pad and resize image
input_size
=
self
.
input_size
im
,
ratio
=
self
.
preprocess
(
im
)
score_map
=
np
.
zeros
((
input_size
,
input_size
),
dtype
=
np
.
float32
)
geo_map
=
np
.
zeros
((
input_size
,
input_size
,
9
),
dtype
=
np
.
float32
)
training_mask
=
np
.
ones
((
input_size
,
input_size
),
dtype
=
np
.
float32
)
return
im
,
score_map
,
geo_map
,
training_mask
def
crop_foreground_infor
(
self
,
im
,
text_polys
,
text_tags
):
im
,
text_polys
,
text_tags
=
self
.
crop_area
(
im
,
text_polys
,
text_tags
,
crop_background
=
False
)
if
text_polys
.
shape
[
0
]
==
0
:
return
None
#continue for all ignore case
if
np
.
sum
((
text_tags
*
1.0
))
>=
text_tags
.
size
:
return
None
# pad and resize image
input_size
=
self
.
input_size
im
,
ratio
=
self
.
preprocess
(
im
)
text_polys
[:,
:,
0
]
*=
ratio
text_polys
[:,
:,
1
]
*=
ratio
_
,
_
,
new_h
,
new_w
=
im
.
shape
# print(im.shape)
# self.draw_img_polys(im, text_polys)
score_map
,
geo_map
,
training_mask
=
self
.
generate_quad
(
(
new_h
,
new_w
),
text_polys
,
text_tags
)
return
im
,
score_map
,
geo_map
,
training_mask
def
__call__
(
self
,
data
):
im
=
data
[
'image'
]
text_polys
=
data
[
'polys'
]
text_tags
=
data
[
'ignore_tags'
]
if
im
is
None
:
return
None
if
text_polys
.
shape
[
0
]
==
0
:
return
None
#add rotate cases
if
np
.
random
.
rand
()
<
0.5
:
im
,
text_polys
=
self
.
rotate_im_poly
(
im
,
text_polys
)
h
,
w
,
_
=
im
.
shape
text_polys
,
text_tags
=
self
.
check_and_validate_polys
(
text_polys
,
text_tags
,
h
,
w
)
if
text_polys
.
shape
[
0
]
==
0
:
return
None
# random scale this image
rd_scale
=
np
.
random
.
choice
(
self
.
random_scale
)
im
=
cv2
.
resize
(
im
,
dsize
=
None
,
fx
=
rd_scale
,
fy
=
rd_scale
)
text_polys
*=
rd_scale
if
np
.
random
.
rand
()
<
self
.
background_ratio
:
outs
=
self
.
crop_background_infor
(
im
,
text_polys
,
text_tags
)
else
:
outs
=
self
.
crop_foreground_infor
(
im
,
text_polys
,
text_tags
)
if
outs
is
None
:
return
None
im
,
score_map
,
geo_map
,
training_mask
=
outs
score_map
=
score_map
[
np
.
newaxis
,
::
4
,
::
4
].
astype
(
np
.
float32
)
geo_map
=
np
.
swapaxes
(
geo_map
,
1
,
2
)
geo_map
=
np
.
swapaxes
(
geo_map
,
1
,
0
)
geo_map
=
geo_map
[:,
::
4
,
::
4
].
astype
(
np
.
float32
)
training_mask
=
training_mask
[
np
.
newaxis
,
::
4
,
::
4
]
training_mask
=
training_mask
.
astype
(
np
.
float32
)
data
[
'image'
]
=
im
[
0
]
data
[
'score_map'
]
=
score_map
data
[
'geo_map'
]
=
geo_map
data
[
'training_mask'
]
=
training_mask
# print(im.shape, score_map.shape, geo_map.shape, training_mask.shape)
return
data
\ No newline at end of file
ppocr/data/imaug/label_ops.py
View file @
bd7f8f72
...
@@ -52,6 +52,7 @@ class DetLabelEncode(object):
...
@@ -52,6 +52,7 @@ class DetLabelEncode(object):
txt_tags
.
append
(
True
)
txt_tags
.
append
(
True
)
else
:
else
:
txt_tags
.
append
(
False
)
txt_tags
.
append
(
False
)
boxes
=
self
.
expand_points_num
(
boxes
)
boxes
=
np
.
array
(
boxes
,
dtype
=
np
.
float32
)
boxes
=
np
.
array
(
boxes
,
dtype
=
np
.
float32
)
txt_tags
=
np
.
array
(
txt_tags
,
dtype
=
np
.
bool
)
txt_tags
=
np
.
array
(
txt_tags
,
dtype
=
np
.
bool
)
...
@@ -70,6 +71,17 @@ class DetLabelEncode(object):
...
@@ -70,6 +71,17 @@ class DetLabelEncode(object):
rect
[
3
]
=
pts
[
np
.
argmax
(
diff
)]
rect
[
3
]
=
pts
[
np
.
argmax
(
diff
)]
return
rect
return
rect
def
expand_points_num
(
self
,
boxes
):
max_points_num
=
0
for
box
in
boxes
:
if
len
(
box
)
>
max_points_num
:
max_points_num
=
len
(
box
)
ex_boxes
=
[]
for
box
in
boxes
:
ex_box
=
box
+
[
box
[
-
1
]]
*
(
max_points_num
-
len
(
box
))
ex_boxes
.
append
(
ex_box
)
return
ex_boxes
class
BaseRecLabelEncode
(
object
):
class
BaseRecLabelEncode
(
object
):
""" Convert between text-label and text-index """
""" Convert between text-label and text-index """
...
@@ -83,7 +95,7 @@ class BaseRecLabelEncode(object):
...
@@ -83,7 +95,7 @@ class BaseRecLabelEncode(object):
'ch'
,
'en'
,
'en_sensitive'
,
'french'
,
'german'
,
'japan'
,
'korean'
'ch'
,
'en'
,
'en_sensitive'
,
'french'
,
'german'
,
'japan'
,
'korean'
]
]
assert
character_type
in
support_character_type
,
"Only {} are supported now but get {}"
.
format
(
assert
character_type
in
support_character_type
,
"Only {} are supported now but get {}"
.
format
(
support_character_type
,
self
.
character_
str
)
support_character_type
,
character_
type
)
self
.
max_text_len
=
max_text_length
self
.
max_text_len
=
max_text_length
if
character_type
==
"en"
:
if
character_type
==
"en"
:
...
...
ppocr/data/imaug/operators.py
View file @
bd7f8f72
...
@@ -120,26 +120,37 @@ class DetResizeForTest(object):
...
@@ -120,26 +120,37 @@ class DetResizeForTest(object):
if
'limit_side_len'
in
kwargs
:
if
'limit_side_len'
in
kwargs
:
self
.
limit_side_len
=
kwargs
[
'limit_side_len'
]
self
.
limit_side_len
=
kwargs
[
'limit_side_len'
]
self
.
limit_type
=
kwargs
.
get
(
'limit_type'
,
'min'
)
self
.
limit_type
=
kwargs
.
get
(
'limit_type'
,
'min'
)
if
'resize_long'
in
kwargs
:
self
.
resize_type
=
2
self
.
resize_long
=
kwargs
.
get
(
'resize_long'
,
960
)
else
:
else
:
self
.
limit_side_len
=
736
self
.
limit_side_len
=
736
self
.
limit_type
=
'min'
self
.
limit_type
=
'min'
def
__call__
(
self
,
data
):
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
img
=
data
[
'image'
]
src_h
,
src_w
,
_
=
img
.
shape
if
self
.
resize_type
==
0
:
if
self
.
resize_type
==
0
:
img
,
shape
=
self
.
resize_image_type0
(
img
)
# img, shape = self.resize_image_type0(img)
img
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_type0
(
img
)
elif
self
.
resize_type
==
2
:
img
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_type2
(
img
)
else
:
else
:
img
,
shape
=
self
.
resize_image_type1
(
img
)
# img, shape = self.resize_image_type1(img)
img
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_type1
(
img
)
data
[
'image'
]
=
img
data
[
'image'
]
=
img
data
[
'shape'
]
=
shape
data
[
'shape'
]
=
np
.
array
([
src_h
,
src_w
,
ratio_h
,
ratio_w
])
return
data
return
data
def
resize_image_type1
(
self
,
img
):
def
resize_image_type1
(
self
,
img
):
resize_h
,
resize_w
=
self
.
image_shape
resize_h
,
resize_w
=
self
.
image_shape
ori_h
,
ori_w
=
img
.
shape
[:
2
]
# (h, w, c)
ori_h
,
ori_w
=
img
.
shape
[:
2
]
# (h, w, c)
ratio_h
=
float
(
resize_h
)
/
ori_h
ratio_w
=
float
(
resize_w
)
/
ori_w
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
return
img
,
np
.
array
([
ori_h
,
ori_w
])
# return img, np.array([ori_h, ori_w])
return
img
,
[
ratio_h
,
ratio_w
]
def
resize_image_type0
(
self
,
img
):
def
resize_image_type0
(
self
,
img
):
"""
"""
...
@@ -182,4 +193,31 @@ class DetResizeForTest(object):
...
@@ -182,4 +193,31 @@ class DetResizeForTest(object):
except
:
except
:
print
(
img
.
shape
,
resize_w
,
resize_h
)
print
(
img
.
shape
,
resize_w
,
resize_h
)
sys
.
exit
(
0
)
sys
.
exit
(
0
)
return
img
,
np
.
array
([
h
,
w
])
ratio_h
=
resize_h
/
float
(
h
)
ratio_w
=
resize_w
/
float
(
w
)
# return img, np.array([h, w])
return
img
,
[
ratio_h
,
ratio_w
]
def
resize_image_type2
(
self
,
img
):
h
,
w
,
_
=
img
.
shape
resize_w
=
w
resize_h
=
h
# Fix the longer side
if
resize_h
>
resize_w
:
ratio
=
float
(
self
.
resize_long
)
/
resize_h
else
:
ratio
=
float
(
self
.
resize_long
)
/
resize_w
resize_h
=
int
(
resize_h
*
ratio
)
resize_w
=
int
(
resize_w
*
ratio
)
max_stride
=
128
resize_h
=
(
resize_h
+
max_stride
-
1
)
//
max_stride
*
max_stride
resize_w
=
(
resize_w
+
max_stride
-
1
)
//
max_stride
*
max_stride
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
ratio_h
=
resize_h
/
float
(
h
)
ratio_w
=
resize_w
/
float
(
w
)
return
img
,
[
ratio_h
,
ratio_w
]
ppocr/data/imaug/sast_process.py
0 → 100644
View file @
bd7f8f72
This diff is collapsed.
Click to expand it.
ppocr/losses/__init__.py
View file @
bd7f8f72
...
@@ -18,6 +18,8 @@ import copy
...
@@ -18,6 +18,8 @@ import copy
def
build_loss
(
config
):
def
build_loss
(
config
):
# det loss
# det loss
from
.det_db_loss
import
DBLoss
from
.det_db_loss
import
DBLoss
from
.det_east_loss
import
EASTLoss
from
.det_sast_loss
import
SASTLoss
# rec loss
# rec loss
from
.rec_ctc_loss
import
CTCLoss
from
.rec_ctc_loss
import
CTCLoss
...
@@ -25,7 +27,7 @@ def build_loss(config):
...
@@ -25,7 +27,7 @@ def build_loss(config):
# cls loss
# cls loss
from
.cls_loss
import
ClsLoss
from
.cls_loss
import
ClsLoss
support_dict
=
[
'DBLoss'
,
'CTCLoss'
,
'ClsLoss'
]
support_dict
=
[
'DBLoss'
,
'EASTLoss'
,
'SASTLoss'
,
'CTCLoss'
,
'ClsLoss'
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
...
...
ppocr/losses/det_east_loss.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
from
.det_basic_loss
import
DiceLoss
class
EASTLoss
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
eps
=
1e-6
,
**
kwargs
):
super
(
EASTLoss
,
self
).
__init__
()
self
.
dice_loss
=
DiceLoss
(
eps
=
eps
)
def
forward
(
self
,
predicts
,
labels
):
l_score
,
l_geo
,
l_mask
=
labels
[
1
:]
f_score
=
predicts
[
'f_score'
]
f_geo
=
predicts
[
'f_geo'
]
dice_loss
=
self
.
dice_loss
(
f_score
,
l_score
,
l_mask
)
#smoooth_l1_loss
channels
=
8
l_geo_split
=
paddle
.
split
(
l_geo
,
num_or_sections
=
channels
+
1
,
axis
=
1
)
f_geo_split
=
paddle
.
split
(
f_geo
,
num_or_sections
=
channels
,
axis
=
1
)
smooth_l1
=
0
for
i
in
range
(
0
,
channels
):
geo_diff
=
l_geo_split
[
i
]
-
f_geo_split
[
i
]
abs_geo_diff
=
paddle
.
abs
(
geo_diff
)
smooth_l1_sign
=
paddle
.
less_than
(
abs_geo_diff
,
l_score
)
smooth_l1_sign
=
paddle
.
cast
(
smooth_l1_sign
,
dtype
=
'float32'
)
in_loss
=
abs_geo_diff
*
abs_geo_diff
*
smooth_l1_sign
+
\
(
abs_geo_diff
-
0.5
)
*
(
1.0
-
smooth_l1_sign
)
out_loss
=
l_geo_split
[
-
1
]
/
channels
*
in_loss
*
l_score
smooth_l1
+=
out_loss
smooth_l1_loss
=
paddle
.
mean
(
smooth_l1
*
l_score
)
dice_loss
=
dice_loss
*
0.01
total_loss
=
dice_loss
+
smooth_l1_loss
losses
=
{
"loss"
:
total_loss
,
\
"dice_loss"
:
dice_loss
,
\
"smooth_l1_loss"
:
smooth_l1_loss
}
return
losses
ppocr/losses/det_sast_loss.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
from
.det_basic_loss
import
DiceLoss
import
paddle.fluid
as
fluid
import
numpy
as
np
class
SASTLoss
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
eps
=
1e-6
,
**
kwargs
):
super
(
SASTLoss
,
self
).
__init__
()
self
.
dice_loss
=
DiceLoss
(
eps
=
eps
)
def
forward
(
self
,
predicts
,
labels
):
"""
tcl_pos: N x 128 x 3
tcl_mask: N x 128 x 1
tcl_label: N x X list or LoDTensor
"""
f_score
=
predicts
[
'f_score'
]
f_border
=
predicts
[
'f_border'
]
f_tvo
=
predicts
[
'f_tvo'
]
f_tco
=
predicts
[
'f_tco'
]
l_score
,
l_border
,
l_mask
,
l_tvo
,
l_tco
=
labels
[
1
:]
#score_loss
intersection
=
paddle
.
sum
(
f_score
*
l_score
*
l_mask
)
union
=
paddle
.
sum
(
f_score
*
l_mask
)
+
paddle
.
sum
(
l_score
*
l_mask
)
score_loss
=
1.0
-
2
*
intersection
/
(
union
+
1e-5
)
#border loss
l_border_split
,
l_border_norm
=
paddle
.
split
(
l_border
,
num_or_sections
=
[
4
,
1
],
axis
=
1
)
f_border_split
=
f_border
border_ex_shape
=
l_border_norm
.
shape
*
np
.
array
([
1
,
4
,
1
,
1
])
l_border_norm_split
=
paddle
.
expand
(
x
=
l_border_norm
,
shape
=
border_ex_shape
)
l_border_score
=
paddle
.
expand
(
x
=
l_score
,
shape
=
border_ex_shape
)
l_border_mask
=
paddle
.
expand
(
x
=
l_mask
,
shape
=
border_ex_shape
)
border_diff
=
l_border_split
-
f_border_split
abs_border_diff
=
paddle
.
abs
(
border_diff
)
border_sign
=
abs_border_diff
<
1.0
border_sign
=
paddle
.
cast
(
border_sign
,
dtype
=
'float32'
)
border_sign
.
stop_gradient
=
True
border_in_loss
=
0.5
*
abs_border_diff
*
abs_border_diff
*
border_sign
+
\
(
abs_border_diff
-
0.5
)
*
(
1.0
-
border_sign
)
border_out_loss
=
l_border_norm_split
*
border_in_loss
border_loss
=
paddle
.
sum
(
border_out_loss
*
l_border_score
*
l_border_mask
)
/
\
(
paddle
.
sum
(
l_border_score
*
l_border_mask
)
+
1e-5
)
#tvo_loss
l_tvo_split
,
l_tvo_norm
=
paddle
.
split
(
l_tvo
,
num_or_sections
=
[
8
,
1
],
axis
=
1
)
f_tvo_split
=
f_tvo
tvo_ex_shape
=
l_tvo_norm
.
shape
*
np
.
array
([
1
,
8
,
1
,
1
])
l_tvo_norm_split
=
paddle
.
expand
(
x
=
l_tvo_norm
,
shape
=
tvo_ex_shape
)
l_tvo_score
=
paddle
.
expand
(
x
=
l_score
,
shape
=
tvo_ex_shape
)
l_tvo_mask
=
paddle
.
expand
(
x
=
l_mask
,
shape
=
tvo_ex_shape
)
#
tvo_geo_diff
=
l_tvo_split
-
f_tvo_split
abs_tvo_geo_diff
=
paddle
.
abs
(
tvo_geo_diff
)
tvo_sign
=
abs_tvo_geo_diff
<
1.0
tvo_sign
=
paddle
.
cast
(
tvo_sign
,
dtype
=
'float32'
)
tvo_sign
.
stop_gradient
=
True
tvo_in_loss
=
0.5
*
abs_tvo_geo_diff
*
abs_tvo_geo_diff
*
tvo_sign
+
\
(
abs_tvo_geo_diff
-
0.5
)
*
(
1.0
-
tvo_sign
)
tvo_out_loss
=
l_tvo_norm_split
*
tvo_in_loss
tvo_loss
=
paddle
.
sum
(
tvo_out_loss
*
l_tvo_score
*
l_tvo_mask
)
/
\
(
paddle
.
sum
(
l_tvo_score
*
l_tvo_mask
)
+
1e-5
)
#tco_loss
l_tco_split
,
l_tco_norm
=
paddle
.
split
(
l_tco
,
num_or_sections
=
[
2
,
1
],
axis
=
1
)
f_tco_split
=
f_tco
tco_ex_shape
=
l_tco_norm
.
shape
*
np
.
array
([
1
,
2
,
1
,
1
])
l_tco_norm_split
=
paddle
.
expand
(
x
=
l_tco_norm
,
shape
=
tco_ex_shape
)
l_tco_score
=
paddle
.
expand
(
x
=
l_score
,
shape
=
tco_ex_shape
)
l_tco_mask
=
paddle
.
expand
(
x
=
l_mask
,
shape
=
tco_ex_shape
)
tco_geo_diff
=
l_tco_split
-
f_tco_split
abs_tco_geo_diff
=
paddle
.
abs
(
tco_geo_diff
)
tco_sign
=
abs_tco_geo_diff
<
1.0
tco_sign
=
paddle
.
cast
(
tco_sign
,
dtype
=
'float32'
)
tco_sign
.
stop_gradient
=
True
tco_in_loss
=
0.5
*
abs_tco_geo_diff
*
abs_tco_geo_diff
*
tco_sign
+
\
(
abs_tco_geo_diff
-
0.5
)
*
(
1.0
-
tco_sign
)
tco_out_loss
=
l_tco_norm_split
*
tco_in_loss
tco_loss
=
paddle
.
sum
(
tco_out_loss
*
l_tco_score
*
l_tco_mask
)
/
\
(
paddle
.
sum
(
l_tco_score
*
l_tco_mask
)
+
1e-5
)
# total loss
tvo_lw
,
tco_lw
=
1.5
,
1.5
score_lw
,
border_lw
=
1.0
,
1.0
total_loss
=
score_loss
*
score_lw
+
border_loss
*
border_lw
+
\
tvo_loss
*
tvo_lw
+
tco_loss
*
tco_lw
losses
=
{
'loss'
:
total_loss
,
"score_loss"
:
score_loss
,
\
"border_loss"
:
border_loss
,
'tvo_loss'
:
tvo_loss
,
'tco_loss'
:
tco_loss
}
return
losses
\ No newline at end of file
ppocr/modeling/backbones/__init__.py
View file @
bd7f8f72
...
@@ -19,6 +19,7 @@ def build_backbone(config, model_type):
...
@@ -19,6 +19,7 @@ def build_backbone(config, model_type):
if
model_type
==
'det'
:
if
model_type
==
'det'
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd_sast
import
ResNet_SAST
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNet_SAST'
]
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNet_SAST'
]
elif
model_type
==
'rec'
or
model_type
==
'cls'
:
elif
model_type
==
'rec'
or
model_type
==
'cls'
:
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_mobilenet_v3
import
MobileNetV3
...
...
ppocr/modeling/backbones/det_resnet_vd_sast.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
__all__
=
[
"ResNet_SAST"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
inputs
):
if
self
.
is_vd_mode
:
inputs
=
self
.
_pool2d_avg
(
inputs
)
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet_SAST
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
super
(
ResNet_SAST
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
# depth = [3, 4, 6, 3]
depth
=
[
3
,
4
,
6
,
3
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
# num_channels = [64, 256, 512,
# 1024] if layers >= 50 else [64, 64, 128, 256]
# num_filters = [64, 128, 256, 512]
num_channels
=
[
64
,
256
,
512
,
1024
,
2048
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
,
512
]
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1_1"
)
self
.
conv1_2
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_2"
)
self
.
conv1_3
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_3"
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
stages
=
[]
self
.
out_channels
=
[
3
,
64
]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
else
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
basic_block
)
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
def
forward
(
self
,
inputs
):
out
=
[
inputs
]
y
=
self
.
conv1_1
(
inputs
)
y
=
self
.
conv1_2
(
y
)
y
=
self
.
conv1_3
(
y
)
out
.
append
(
y
)
y
=
self
.
pool2d_max
(
y
)
for
block
in
self
.
stages
:
y
=
block
(
y
)
out
.
append
(
y
)
return
out
\ No newline at end of file
ppocr/modeling/heads/__init__.py
View file @
bd7f8f72
...
@@ -18,13 +18,15 @@ __all__ = ['build_head']
...
@@ -18,13 +18,15 @@ __all__ = ['build_head']
def
build_head
(
config
):
def
build_head
(
config
):
# det head
# det head
from
.det_db_head
import
DBHead
from
.det_db_head
import
DBHead
from
.det_east_head
import
EASTHead
from
.det_sast_head
import
SASTHead
# rec head
# rec head
from
.rec_ctc_head
import
CTCHead
from
.rec_ctc_head
import
CTCHead
# cls head
# cls head
from
.cls_head
import
ClsHead
from
.cls_head
import
ClsHead
support_dict
=
[
'DBHead'
,
'CTCHead'
,
'ClsHead'
]
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
]
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
...
...
ppocr/modeling/heads/det_east_head.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
EASTHead
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
in_channels
,
model_name
,
**
kwargs
):
super
(
EASTHead
,
self
).
__init__
()
self
.
model_name
=
model_name
if
self
.
model_name
==
"large"
:
num_outputs
=
[
128
,
64
,
1
,
8
]
else
:
num_outputs
=
[
64
,
32
,
1
,
8
]
self
.
det_conv1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
num_outputs
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"det_head1"
)
self
.
det_conv2
=
ConvBNLayer
(
in_channels
=
num_outputs
[
0
],
out_channels
=
num_outputs
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"det_head2"
)
self
.
score_conv
=
ConvBNLayer
(
in_channels
=
num_outputs
[
1
],
out_channels
=
num_outputs
[
2
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
"f_score"
)
self
.
geo_conv
=
ConvBNLayer
(
in_channels
=
num_outputs
[
1
],
out_channels
=
num_outputs
[
3
],
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
"f_geo"
)
def
forward
(
self
,
x
):
f_det
=
self
.
det_conv1
(
x
)
f_det
=
self
.
det_conv2
(
f_det
)
f_score
=
self
.
score_conv
(
f_det
)
f_score
=
F
.
sigmoid
(
f_score
)
f_geo
=
self
.
geo_conv
(
f_det
)
f_geo
=
(
F
.
sigmoid
(
f_geo
)
-
0.5
)
*
2
*
800
pred
=
{
'f_score'
:
f_score
,
'f_geo'
:
f_geo
}
return
pred
ppocr/modeling/heads/det_sast_head.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
SAST_Header1
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
SAST_Header1
,
self
).
__init__
()
out_channels
=
[
64
,
64
,
128
]
self
.
score_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_score1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_score2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_score3'
),
ConvBNLayer
(
out_channels
[
2
],
1
,
3
,
1
,
act
=
None
,
name
=
'f_score4'
)
)
self
.
border_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_border1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_border2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_border3'
),
ConvBNLayer
(
out_channels
[
2
],
4
,
3
,
1
,
act
=
None
,
name
=
'f_border4'
)
)
def
forward
(
self
,
x
):
f_score
=
self
.
score_conv
(
x
)
f_score
=
F
.
sigmoid
(
f_score
)
f_border
=
self
.
border_conv
(
x
)
return
f_score
,
f_border
class
SAST_Header2
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
SAST_Header2
,
self
).
__init__
()
out_channels
=
[
64
,
64
,
128
]
self
.
tvo_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tvo1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_tvo2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tvo3'
),
ConvBNLayer
(
out_channels
[
2
],
8
,
3
,
1
,
act
=
None
,
name
=
'f_tvo4'
)
)
self
.
tco_conv
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
,
out_channels
[
0
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tco1'
),
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'f_tco2'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
1
,
1
,
act
=
'relu'
,
name
=
'f_tco3'
),
ConvBNLayer
(
out_channels
[
2
],
2
,
3
,
1
,
act
=
None
,
name
=
'f_tco4'
)
)
def
forward
(
self
,
x
):
f_tvo
=
self
.
tvo_conv
(
x
)
f_tco
=
self
.
tco_conv
(
x
)
return
f_tvo
,
f_tco
class
SASTHead
(
nn
.
Layer
):
"""
"""
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
SASTHead
,
self
).
__init__
()
self
.
head1
=
SAST_Header1
(
in_channels
)
self
.
head2
=
SAST_Header2
(
in_channels
)
def
forward
(
self
,
x
):
f_score
,
f_border
=
self
.
head1
(
x
)
f_tvo
,
f_tco
=
self
.
head2
(
x
)
predicts
=
{}
predicts
[
'f_score'
]
=
f_score
predicts
[
'f_border'
]
=
f_border
predicts
[
'f_tvo'
]
=
f_tvo
predicts
[
'f_tco'
]
=
f_tco
return
predicts
\ No newline at end of file
ppocr/modeling/necks/__init__.py
View file @
bd7f8f72
...
@@ -16,8 +16,10 @@ __all__ = ['build_neck']
...
@@ -16,8 +16,10 @@ __all__ = ['build_neck']
def
build_neck
(
config
):
def
build_neck
(
config
):
from
.db_fpn
import
DBFPN
from
.db_fpn
import
DBFPN
from
.east_fpn
import
EASTFPN
from
.sast_fpn
import
SASTFPN
from
.rnn
import
SequenceEncoder
from
.rnn
import
SequenceEncoder
support_dict
=
[
'DBFPN'
,
'SequenceEncoder'
]
support_dict
=
[
'DBFPN'
,
'EASTFPN'
,
'SASTFPN'
,
'SequenceEncoder'
]
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
...
...
ppocr/modeling/necks/east_fpn.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
DeConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
DeConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
deconv
=
nn
.
Conv2DTranspose
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
deconv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
EASTFPN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
model_name
,
**
kwargs
):
super
(
EASTFPN
,
self
).
__init__
()
self
.
model_name
=
model_name
if
self
.
model_name
==
"large"
:
self
.
out_channels
=
128
else
:
self
.
out_channels
=
64
self
.
in_channels
=
in_channels
[::
-
1
]
self
.
h1_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
+
self
.
in_channels
[
1
],
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_h_1"
)
self
.
h2_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
+
self
.
in_channels
[
2
],
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_h_2"
)
self
.
h3_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
+
self
.
in_channels
[
3
],
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_h_3"
)
self
.
g0_deconv
=
DeConvBNLayer
(
in_channels
=
self
.
in_channels
[
0
],
out_channels
=
self
.
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_0"
)
self
.
g1_deconv
=
DeConvBNLayer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_1"
)
self
.
g2_deconv
=
DeConvBNLayer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
4
,
stride
=
2
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_2"
)
self
.
g3_conv
=
ConvBNLayer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
if_act
=
True
,
act
=
'relu'
,
name
=
"unet_g_3"
)
def
forward
(
self
,
x
):
f
=
x
[::
-
1
]
h
=
f
[
0
]
g
=
self
.
g0_deconv
(
h
)
h
=
paddle
.
concat
([
g
,
f
[
1
]],
axis
=
1
)
h
=
self
.
h1_conv
(
h
)
g
=
self
.
g1_deconv
(
h
)
h
=
paddle
.
concat
([
g
,
f
[
2
]],
axis
=
1
)
h
=
self
.
h2_conv
(
h
)
g
=
self
.
g2_deconv
(
h
)
h
=
paddle
.
concat
([
g
,
f
[
3
]],
axis
=
1
)
h
=
self
.
h3_conv
(
h
)
g
=
self
.
g3_conv
(
h
)
return
g
\ No newline at end of file
ppocr/modeling/necks/sast_fpn.py
0 → 100644
View file @
bd7f8f72
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
DeConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
DeConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
deconv
=
nn
.
Conv2DTranspose
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
"bn_"
+
name
+
"_offset"
),
moving_mean_name
=
"bn_"
+
name
+
"_mean"
,
moving_variance_name
=
"bn_"
+
name
+
"_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
deconv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
FPN_Up_Fusion
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
(
FPN_Up_Fusion
,
self
).
__init__
()
in_channels
=
in_channels
[::
-
1
]
out_channels
=
[
256
,
256
,
192
,
192
,
128
]
self
.
h0_conv
=
ConvBNLayer
(
in_channels
[
0
],
out_channels
[
0
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h0'
)
self
.
h1_conv
=
ConvBNLayer
(
in_channels
[
1
],
out_channels
[
1
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h1'
)
self
.
h2_conv
=
ConvBNLayer
(
in_channels
[
2
],
out_channels
[
2
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h2'
)
self
.
h3_conv
=
ConvBNLayer
(
in_channels
[
3
],
out_channels
[
3
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h3'
)
self
.
h4_conv
=
ConvBNLayer
(
in_channels
[
4
],
out_channels
[
4
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_h4'
)
self
.
g0_conv
=
DeConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g0'
)
self
.
g1_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_g1_1'
),
DeConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g1_2'
)
)
self
.
g2_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
2
],
out_channels
[
2
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_g2_1'
),
DeConvBNLayer
(
out_channels
[
2
],
out_channels
[
3
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g2_2'
)
)
self
.
g3_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
3
],
out_channels
[
3
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_g3_1'
),
DeConvBNLayer
(
out_channels
[
3
],
out_channels
[
4
],
4
,
2
,
act
=
None
,
name
=
'fpn_up_g3_2'
)
)
self
.
g4_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
4
],
out_channels
[
4
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_up_fusion_1'
),
ConvBNLayer
(
out_channels
[
4
],
out_channels
[
4
],
1
,
1
,
act
=
None
,
name
=
'fpn_up_fusion_2'
)
)
def
_add_relu
(
self
,
x1
,
x2
):
x
=
paddle
.
add
(
x
=
x1
,
y
=
x2
)
x
=
F
.
relu
(
x
)
return
x
def
forward
(
self
,
x
):
f
=
x
[
2
:][::
-
1
]
h0
=
self
.
h0_conv
(
f
[
0
])
h1
=
self
.
h1_conv
(
f
[
1
])
h2
=
self
.
h2_conv
(
f
[
2
])
h3
=
self
.
h3_conv
(
f
[
3
])
h4
=
self
.
h4_conv
(
f
[
4
])
g0
=
self
.
g0_conv
(
h0
)
g1
=
self
.
_add_relu
(
g0
,
h1
)
g1
=
self
.
g1_conv
(
g1
)
g2
=
self
.
g2_conv
(
self
.
_add_relu
(
g1
,
h2
))
g3
=
self
.
g3_conv
(
self
.
_add_relu
(
g2
,
h3
))
g4
=
self
.
g4_conv
(
self
.
_add_relu
(
g3
,
h4
))
return
g4
class
FPN_Down_Fusion
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
(
FPN_Down_Fusion
,
self
).
__init__
()
out_channels
=
[
32
,
64
,
128
]
self
.
h0_conv
=
ConvBNLayer
(
in_channels
[
0
],
out_channels
[
0
],
3
,
1
,
act
=
None
,
name
=
'fpn_down_h0'
)
self
.
h1_conv
=
ConvBNLayer
(
in_channels
[
1
],
out_channels
[
1
],
3
,
1
,
act
=
None
,
name
=
'fpn_down_h1'
)
self
.
h2_conv
=
ConvBNLayer
(
in_channels
[
2
],
out_channels
[
2
],
3
,
1
,
act
=
None
,
name
=
'fpn_down_h2'
)
self
.
g0_conv
=
ConvBNLayer
(
out_channels
[
0
],
out_channels
[
1
],
3
,
2
,
act
=
None
,
name
=
'fpn_down_g0'
)
self
.
g1_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
1
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_down_g1_1'
),
ConvBNLayer
(
out_channels
[
1
],
out_channels
[
2
],
3
,
2
,
act
=
None
,
name
=
'fpn_down_g1_2'
)
)
self
.
g2_conv
=
nn
.
Sequential
(
ConvBNLayer
(
out_channels
[
2
],
out_channels
[
2
],
3
,
1
,
act
=
'relu'
,
name
=
'fpn_down_fusion_1'
),
ConvBNLayer
(
out_channels
[
2
],
out_channels
[
2
],
1
,
1
,
act
=
None
,
name
=
'fpn_down_fusion_2'
)
)
def
forward
(
self
,
x
):
f
=
x
[:
3
]
h0
=
self
.
h0_conv
(
f
[
0
])
h1
=
self
.
h1_conv
(
f
[
1
])
h2
=
self
.
h2_conv
(
f
[
2
])
g0
=
self
.
g0_conv
(
h0
)
g1
=
paddle
.
add
(
x
=
g0
,
y
=
h1
)
g1
=
F
.
relu
(
g1
)
g1
=
self
.
g1_conv
(
g1
)
g2
=
paddle
.
add
(
x
=
g1
,
y
=
h2
)
g2
=
F
.
relu
(
g2
)
g2
=
self
.
g2_conv
(
g2
)
return
g2
class
Cross_Attention
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
):
super
(
Cross_Attention
,
self
).
__init__
()
self
.
theta_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_theta'
)
self
.
phi_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_phi'
)
self
.
g_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_g'
)
self
.
fh_weight_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fh_weight'
)
self
.
fh_sc_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fh_sc'
)
self
.
fv_weight_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fv_weight'
)
self
.
fv_sc_conv
=
ConvBNLayer
(
in_channels
,
in_channels
,
1
,
1
,
act
=
None
,
name
=
'fv_sc'
)
self
.
f_attn_conv
=
ConvBNLayer
(
in_channels
*
2
,
in_channels
,
1
,
1
,
act
=
'relu'
,
name
=
'f_attn'
)
def
_cal_fweight
(
self
,
f
,
shape
):
f_theta
,
f_phi
,
f_g
=
f
#flatten
f_theta
=
paddle
.
transpose
(
f_theta
,
[
0
,
2
,
3
,
1
])
f_theta
=
paddle
.
reshape
(
f_theta
,
[
shape
[
0
]
*
shape
[
1
],
shape
[
2
],
128
])
f_phi
=
paddle
.
transpose
(
f_phi
,
[
0
,
2
,
3
,
1
])
f_phi
=
paddle
.
reshape
(
f_phi
,
[
shape
[
0
]
*
shape
[
1
],
shape
[
2
],
128
])
f_g
=
paddle
.
transpose
(
f_g
,
[
0
,
2
,
3
,
1
])
f_g
=
paddle
.
reshape
(
f_g
,
[
shape
[
0
]
*
shape
[
1
],
shape
[
2
],
128
])
#correlation
f_attn
=
paddle
.
matmul
(
f_theta
,
paddle
.
transpose
(
f_phi
,
[
0
,
2
,
1
]))
#scale
f_attn
=
f_attn
/
(
128
**
0.5
)
f_attn
=
F
.
softmax
(
f_attn
)
#weighted sum
f_weight
=
paddle
.
matmul
(
f_attn
,
f_g
)
f_weight
=
paddle
.
reshape
(
f_weight
,
[
shape
[
0
],
shape
[
1
],
shape
[
2
],
128
])
return
f_weight
def
forward
(
self
,
f_common
):
f_shape
=
paddle
.
shape
(
f_common
)
# print('f_shape: ', f_shape)
f_theta
=
self
.
theta_conv
(
f_common
)
f_phi
=
self
.
phi_conv
(
f_common
)
f_g
=
self
.
g_conv
(
f_common
)
######## horizon ########
fh_weight
=
self
.
_cal_fweight
([
f_theta
,
f_phi
,
f_g
],
[
f_shape
[
0
],
f_shape
[
2
],
f_shape
[
3
]])
fh_weight
=
paddle
.
transpose
(
fh_weight
,
[
0
,
3
,
1
,
2
])
fh_weight
=
self
.
fh_weight_conv
(
fh_weight
)
#short cut
fh_sc
=
self
.
fh_sc_conv
(
f_common
)
f_h
=
F
.
relu
(
fh_weight
+
fh_sc
)
######## vertical ########
fv_theta
=
paddle
.
transpose
(
f_theta
,
[
0
,
1
,
3
,
2
])
fv_phi
=
paddle
.
transpose
(
f_phi
,
[
0
,
1
,
3
,
2
])
fv_g
=
paddle
.
transpose
(
f_g
,
[
0
,
1
,
3
,
2
])
fv_weight
=
self
.
_cal_fweight
([
fv_theta
,
fv_phi
,
fv_g
],
[
f_shape
[
0
],
f_shape
[
3
],
f_shape
[
2
]])
fv_weight
=
paddle
.
transpose
(
fv_weight
,
[
0
,
3
,
2
,
1
])
fv_weight
=
self
.
fv_weight_conv
(
fv_weight
)
#short cut
fv_sc
=
self
.
fv_sc_conv
(
f_common
)
f_v
=
F
.
relu
(
fv_weight
+
fv_sc
)
######## merge ########
f_attn
=
paddle
.
concat
([
f_h
,
f_v
],
axis
=
1
)
f_attn
=
self
.
f_attn_conv
(
f_attn
)
return
f_attn
class
SASTFPN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
with_cab
=
False
,
**
kwargs
):
super
(
SASTFPN
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
with_cab
=
with_cab
self
.
FPN_Down_Fusion
=
FPN_Down_Fusion
(
self
.
in_channels
)
self
.
FPN_Up_Fusion
=
FPN_Up_Fusion
(
self
.
in_channels
)
self
.
out_channels
=
128
self
.
cross_attention
=
Cross_Attention
(
self
.
out_channels
)
def
forward
(
self
,
x
):
#down fpn
f_down
=
self
.
FPN_Down_Fusion
(
x
)
#up fpn
f_up
=
self
.
FPN_Up_Fusion
(
x
)
#fusion
f_common
=
paddle
.
add
(
x
=
f_down
,
y
=
f_up
)
f_common
=
F
.
relu
(
f_common
)
if
self
.
with_cab
:
# print('enhence f_common with CAB.')
f_common
=
self
.
cross_attention
(
f_common
)
return
f_common
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment