Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
c1fd4664
Commit
c1fd4664
authored
Dec 30, 2020
by
tink2123
Browse files
add srn for dygraph
parent
de3e2e7c
Changes
28
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
618 additions
and
41 deletions
+618
-41
configs/rec/rec_mv3_none_bilstm_ctc.yml
configs/rec/rec_mv3_none_bilstm_ctc.yml
+3
-3
configs/rec/rec_mv3_none_none_ctc.yml
configs/rec/rec_mv3_none_none_ctc.yml
+2
-2
configs/rec/rec_mv3_tps_bilstm_ctc.yml
configs/rec/rec_mv3_tps_bilstm_ctc.yml
+2
-2
configs/rec/rec_r34_vd_none_bilstm_ctc.yml
configs/rec/rec_r34_vd_none_bilstm_ctc.yml
+2
-2
configs/rec/rec_r34_vd_none_none_ctc.yml
configs/rec/rec_r34_vd_none_none_ctc.yml
+2
-2
configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
+2
-2
configs/rec/rec_r50_fpn_srn.yml
configs/rec/rec_r50_fpn_srn.yml
+106
-0
ppocr/data/__init__.py
ppocr/data/__init__.py
+2
-2
ppocr/data/imaug/__init__.py
ppocr/data/imaug/__init__.py
+1
-1
ppocr/data/imaug/label_ops.py
ppocr/data/imaug/label_ops.py
+48
-0
ppocr/data/imaug/rec_img_aug.py
ppocr/data/imaug/rec_img_aug.py
+75
-15
ppocr/data/lmdb_dataset.py
ppocr/data/lmdb_dataset.py
+2
-2
ppocr/losses/__init__.py
ppocr/losses/__init__.py
+4
-1
ppocr/losses/rec_srn_loss.py
ppocr/losses/rec_srn_loss.py
+47
-0
ppocr/metrics/__init__.py
ppocr/metrics/__init__.py
+1
-0
ppocr/metrics/rec_metric.py
ppocr/metrics/rec_metric.py
+1
-3
ppocr/modeling/architectures/base_model.py
ppocr/modeling/architectures/base_model.py
+5
-2
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+2
-1
ppocr/modeling/backbones/rec_resnet_fpn.py
ppocr/modeling/backbones/rec_resnet_fpn.py
+307
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+4
-1
No files found.
configs/rec/rec_mv3_none_bilstm_ctc.yml
View file @
c1fd4664
Global
:
use_gpu
:
t
rue
use_gpu
:
T
rue
epoch_num
:
72
log_smooth_window
:
20
print_batch_step
:
10
...
...
@@ -59,7 +59,7 @@ Metric:
Train
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
...
...
@@ -78,7 +78,7 @@ Train:
Eval
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
...
...
configs/rec/rec_mv3_none_none_ctc.yml
View file @
c1fd4664
...
...
@@ -58,7 +58,7 @@ Metric:
Train
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
...
...
@@ -77,7 +77,7 @@ Train:
Eval
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
...
...
configs/rec/rec_mv3_tps_bilstm_ctc.yml
View file @
c1fd4664
...
...
@@ -63,7 +63,7 @@ Metric:
Train
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
...
...
@@ -82,7 +82,7 @@ Train:
Eval
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
...
...
configs/rec/rec_r34_vd_none_bilstm_ctc.yml
View file @
c1fd4664
...
...
@@ -58,7 +58,7 @@ Metric:
Train
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
...
...
@@ -77,7 +77,7 @@ Train:
Eval
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
...
...
configs/rec/rec_r34_vd_none_none_ctc.yml
View file @
c1fd4664
...
...
@@ -56,7 +56,7 @@ Metric:
Train
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
...
...
@@ -75,7 +75,7 @@ Train:
Eval
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
...
...
configs/rec/rec_r34_vd_tps_bilstm_ctc.yml
View file @
c1fd4664
...
...
@@ -62,7 +62,7 @@ Metric:
Train
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/training/
transforms
:
-
DecodeImage
:
# load image
...
...
@@ -81,7 +81,7 @@ Train:
Eval
:
dataset
:
name
:
LMDBDat
e
Set
name
:
LMDBDat
a
Set
data_dir
:
./train_data/data_lmdb_release/validation/
transforms
:
-
DecodeImage
:
# load image
...
...
configs/rec/rec_r50_fpn_srn.yml
0 → 100644
View file @
c1fd4664
Global
:
use_gpu
:
True
epoch_num
:
72
log_smooth_window
:
20
print_batch_step
:
5
save_model_dir
:
./output/rec/srn
save_epoch_step
:
3
# evaluation is run every 5000 iterations after the 4000th iteration
eval_batch_step
:
[
0
,
5000
]
# if pretrained_model is saved in static mode, load_static_weights must set to True
cal_metric_during_train
:
True
pretrained_model
:
checkpoints
:
save_inference_dir
:
use_visualdl
:
False
infer_img
:
doc/imgs_words/ch/word_1.jpg
# for data or label process
character_dict_path
:
character_type
:
en
max_text_length
:
25
num_heads
:
8
infer_mode
:
False
use_space_char
:
False
Optimizer
:
name
:
Adam
lr
:
name
:
Cosine
learning_rate
:
0.0001
Architecture
:
model_type
:
rec
algorithm
:
SRN
in_channels
:
1
Transform
:
Backbone
:
name
:
ResNetFPN
Head
:
name
:
SRNHead
max_text_length
:
25
num_heads
:
8
num_encoder_TUs
:
2
num_decoder_TUs
:
4
hidden_dims
:
512
Loss
:
name
:
SRNLoss
PostProcess
:
name
:
SRNLabelDecode
Metric
:
name
:
RecMetric
main_indicator
:
acc
Train
:
dataset
:
name
:
LMDBDataSet
data_dir
:
./train_data/srn_train_data_duiqi
#label_file_list: ["./train_data/ic15_data/1.txt"]
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
SRNLabelEncode
:
# Class handling label
-
SRNRecResizeImg
:
image_shape
:
[
1
,
64
,
256
]
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
,
'
encoder_word_pos'
,
'
gsrm_word_pos'
,
'
gsrm_slf_attn_bias1'
,
'
gsrm_slf_attn_bias2'
]
# dataloader will return list in this order
loader
:
shuffle
:
False
batch_size_per_card
:
64
drop_last
:
True
num_workers
:
4
Eval
:
dataset
:
name
:
LMDBDataSet
data_dir
:
./train_data/data_lmdb_release/evaluation
transforms
:
-
DecodeImage
:
# load image
img_mode
:
BGR
channel_first
:
False
-
SRNLabelEncode
:
# Class handling label
-
SRNRecResizeImg
:
image_shape
:
[
1
,
64
,
256
]
-
KeepKeys
:
keep_keys
:
[
'
image'
,
'
label'
,
'
length'
,
'
encoder_word_pos'
,
'
gsrm_word_pos'
,
'
gsrm_slf_attn_bias1'
,
'
gsrm_slf_attn_bias2'
]
loader
:
shuffle
:
False
drop_last
:
False
batch_size_per_card
:
32
num_workers
:
4
ppocr/data/__init__.py
View file @
c1fd4664
...
...
@@ -33,7 +33,7 @@ import paddle.distributed as dist
from
ppocr.data.imaug
import
transform
,
create_operators
from
ppocr.data.simple_dataset
import
SimpleDataSet
from
ppocr.data.lmdb_dataset
import
LMDBDat
e
Set
from
ppocr.data.lmdb_dataset
import
LMDBDat
a
Set
__all__
=
[
'build_dataloader'
,
'transform'
,
'create_operators'
]
...
...
@@ -54,7 +54,7 @@ signal.signal(signal.SIGTERM, term_mp)
def
build_dataloader
(
config
,
mode
,
device
,
logger
):
config
=
copy
.
deepcopy
(
config
)
support_dict
=
[
'SimpleDataSet'
,
'LMDBDat
e
Set'
]
support_dict
=
[
'SimpleDataSet'
,
'LMDBDat
a
Set'
]
module_name
=
config
[
mode
][
'dataset'
][
'name'
]
assert
module_name
in
support_dict
,
Exception
(
'DataSet only support {}'
.
format
(
support_dict
))
...
...
ppocr/data/imaug/__init__.py
View file @
c1fd4664
...
...
@@ -21,7 +21,7 @@ from .make_border_map import MakeBorderMap
from
.make_shrink_map
import
MakeShrinkMap
from
.random_crop_data
import
EastRandomCropData
,
PSERandomCrop
from
.rec_img_aug
import
RecAug
,
RecResizeImg
,
ClsResizeImg
from
.rec_img_aug
import
RecAug
,
RecResizeImg
,
ClsResizeImg
,
SRNRecResizeImg
from
.randaugment
import
RandAugment
from
.operators
import
*
from
.label_ops
import
*
...
...
ppocr/data/imaug/label_ops.py
View file @
c1fd4664
...
...
@@ -98,6 +98,8 @@ class BaseRecLabelEncode(object):
support_character_type
,
character_type
)
self
.
max_text_len
=
max_text_length
self
.
beg_str
=
"sos"
self
.
end_str
=
"eos"
if
character_type
==
"en"
:
self
.
character_str
=
"0123456789abcdefghijklmnopqrstuvwxyz"
dict_character
=
list
(
self
.
character_str
)
...
...
@@ -213,3 +215,49 @@ class AttnLabelEncode(BaseRecLabelEncode):
assert
False
,
"Unsupport type %s in get_beg_end_flag_idx"
\
%
beg_or_end
return
idx
class
SRNLabelEncode
(
BaseRecLabelEncode
):
""" Convert between text-label and text-index """
def
__init__
(
self
,
max_text_length
=
25
,
character_dict_path
=
None
,
character_type
=
'en'
,
use_space_char
=
False
,
**
kwargs
):
super
(
SRNLabelEncode
,
self
).
__init__
(
max_text_length
,
character_dict_path
,
character_type
,
use_space_char
)
def
add_special_char
(
self
,
dict_character
):
dict_character
=
dict_character
+
[
self
.
beg_str
,
self
.
end_str
]
return
dict_character
def
__call__
(
self
,
data
):
text
=
data
[
'label'
]
text
=
self
.
encode
(
text
)
char_num
=
len
(
self
.
character_str
)
if
text
is
None
:
return
None
if
len
(
text
)
>
self
.
max_text_len
:
return
None
data
[
'length'
]
=
np
.
array
(
len
(
text
))
text
=
text
+
[
char_num
]
*
(
self
.
max_text_len
-
len
(
text
))
data
[
'label'
]
=
np
.
array
(
text
)
return
data
def
get_ignored_tokens
(
self
):
beg_idx
=
self
.
get_beg_end_flag_idx
(
"beg"
)
end_idx
=
self
.
get_beg_end_flag_idx
(
"end"
)
return
[
beg_idx
,
end_idx
]
def
get_beg_end_flag_idx
(
self
,
beg_or_end
):
if
beg_or_end
==
"beg"
:
idx
=
np
.
array
(
self
.
dict
[
self
.
beg_str
])
elif
beg_or_end
==
"end"
:
idx
=
np
.
array
(
self
.
dict
[
self
.
end_str
])
else
:
assert
False
,
"Unsupport type %s in get_beg_end_flag_idx"
\
%
beg_or_end
return
idx
ppocr/data/imaug/rec_img_aug.py
View file @
c1fd4664
...
...
@@ -12,20 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
cv2
import
numpy
as
np
...
...
@@ -77,6 +63,26 @@ class RecResizeImg(object):
return
data
class
SRNRecResizeImg
(
object
):
def
__init__
(
self
,
image_shape
,
num_heads
,
max_text_length
,
**
kwargs
):
self
.
image_shape
=
image_shape
self
.
num_heads
=
num_heads
self
.
max_text_length
=
max_text_length
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
norm_img
=
resize_norm_img_srn
(
img
,
self
.
image_shape
)
data
[
'image'
]
=
norm_img
[
encoder_word_pos
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
,
gsrm_slf_attn_bias2
]
=
\
srn_other_inputs
(
self
.
image_shape
,
self
.
num_heads
,
self
.
max_text_length
)
data
[
'encoder_word_pos'
]
=
encoder_word_pos
data
[
'gsrm_word_pos'
]
=
gsrm_word_pos
data
[
'gsrm_slf_attn_bias1'
]
=
gsrm_slf_attn_bias1
data
[
'gsrm_slf_attn_bias2'
]
=
gsrm_slf_attn_bias2
return
data
def
resize_norm_img
(
img
,
image_shape
):
imgC
,
imgH
,
imgW
=
image_shape
h
=
img
.
shape
[
0
]
...
...
@@ -103,7 +109,7 @@ def resize_norm_img(img, image_shape):
def
resize_norm_img_chinese
(
img
,
image_shape
):
imgC
,
imgH
,
imgW
=
image_shape
# todo: change to 0 and modified image shape
max_wh_ratio
=
0
max_wh_ratio
=
imgW
*
1.0
/
imgH
h
,
w
=
img
.
shape
[
0
],
img
.
shape
[
1
]
ratio
=
w
*
1.0
/
h
max_wh_ratio
=
max
(
max_wh_ratio
,
ratio
)
...
...
@@ -126,6 +132,60 @@ def resize_norm_img_chinese(img, image_shape):
return
padding_im
def
resize_norm_img_srn
(
img
,
image_shape
):
imgC
,
imgH
,
imgW
=
image_shape
img_black
=
np
.
zeros
((
imgH
,
imgW
))
im_hei
=
img
.
shape
[
0
]
im_wid
=
img
.
shape
[
1
]
if
im_wid
<=
im_hei
*
1
:
img_new
=
cv2
.
resize
(
img
,
(
imgH
*
1
,
imgH
))
elif
im_wid
<=
im_hei
*
2
:
img_new
=
cv2
.
resize
(
img
,
(
imgH
*
2
,
imgH
))
elif
im_wid
<=
im_hei
*
3
:
img_new
=
cv2
.
resize
(
img
,
(
imgH
*
3
,
imgH
))
else
:
img_new
=
cv2
.
resize
(
img
,
(
imgW
,
imgH
))
img_np
=
np
.
asarray
(
img_new
)
img_np
=
cv2
.
cvtColor
(
img_np
,
cv2
.
COLOR_BGR2GRAY
)
img_black
[:,
0
:
img_np
.
shape
[
1
]]
=
img_np
img_black
=
img_black
[:,
:,
np
.
newaxis
]
row
,
col
,
c
=
img_black
.
shape
c
=
1
return
np
.
reshape
(
img_black
,
(
c
,
row
,
col
)).
astype
(
np
.
float32
)
def
srn_other_inputs
(
image_shape
,
num_heads
,
max_text_length
):
imgC
,
imgH
,
imgW
=
image_shape
feature_dim
=
int
((
imgH
/
8
)
*
(
imgW
/
8
))
encoder_word_pos
=
np
.
array
(
range
(
0
,
feature_dim
)).
reshape
(
(
feature_dim
,
1
)).
astype
(
'int64'
)
gsrm_word_pos
=
np
.
array
(
range
(
0
,
max_text_length
)).
reshape
(
(
max_text_length
,
1
)).
astype
(
'int64'
)
gsrm_attn_bias_data
=
np
.
ones
((
1
,
max_text_length
,
max_text_length
))
gsrm_slf_attn_bias1
=
np
.
triu
(
gsrm_attn_bias_data
,
1
).
reshape
(
[
1
,
max_text_length
,
max_text_length
])
gsrm_slf_attn_bias1
=
np
.
tile
(
gsrm_slf_attn_bias1
,
[
num_heads
,
1
,
1
])
*
[
-
1e9
]
gsrm_slf_attn_bias2
=
np
.
tril
(
gsrm_attn_bias_data
,
-
1
).
reshape
(
[
1
,
max_text_length
,
max_text_length
])
gsrm_slf_attn_bias2
=
np
.
tile
(
gsrm_slf_attn_bias2
,
[
num_heads
,
1
,
1
])
*
[
-
1e9
]
return
[
encoder_word_pos
,
gsrm_word_pos
,
gsrm_slf_attn_bias1
,
gsrm_slf_attn_bias2
]
def
flag
():
"""
flag
...
...
ppocr/data/lmdb_dataset.py
View file @
c1fd4664
...
...
@@ -20,9 +20,9 @@ import cv2
from
.imaug
import
transform
,
create_operators
class
LMDBDat
e
Set
(
Dataset
):
class
LMDBDat
a
Set
(
Dataset
):
def
__init__
(
self
,
config
,
mode
,
logger
):
super
(
LMDBDat
e
Set
,
self
).
__init__
()
super
(
LMDBDat
a
Set
,
self
).
__init__
()
global_config
=
config
[
'Global'
]
dataset_config
=
config
[
mode
][
'dataset'
]
...
...
ppocr/losses/__init__.py
View file @
c1fd4664
...
...
@@ -23,11 +23,14 @@ def build_loss(config):
# rec loss
from
.rec_ctc_loss
import
CTCLoss
from
.rec_srn_loss
import
SRNLoss
# cls loss
from
.cls_loss
import
ClsLoss
support_dict
=
[
'DBLoss'
,
'EASTLoss'
,
'SASTLoss'
,
'CTCLoss'
,
'ClsLoss'
]
support_dict
=
[
'DBLoss'
,
'EASTLoss'
,
'SASTLoss'
,
'CTCLoss'
,
'ClsLoss'
,
'SRNLoss'
]
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
...
...
ppocr/losses/rec_srn_loss.py
0 → 100644
View file @
c1fd4664
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
class
SRNLoss
(
nn
.
Layer
):
def
__init__
(
self
,
**
kwargs
):
super
(
SRNLoss
,
self
).
__init__
()
self
.
loss_func
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
reduction
=
"sum"
)
def
forward
(
self
,
predicts
,
batch
):
predict
=
predicts
[
'predict'
]
word_predict
=
predicts
[
'word_out'
]
gsrm_predict
=
predicts
[
'gsrm_out'
]
label
=
batch
[
1
]
casted_label
=
paddle
.
cast
(
x
=
label
,
dtype
=
'int64'
)
casted_label
=
paddle
.
reshape
(
x
=
casted_label
,
shape
=
[
-
1
,
1
])
cost_word
=
self
.
loss_func
(
word_predict
,
label
=
casted_label
)
cost_gsrm
=
self
.
loss_func
(
gsrm_predict
,
label
=
casted_label
)
cost_vsfd
=
self
.
loss_func
(
predict
,
label
=
casted_label
)
cost_word
=
paddle
.
reshape
(
x
=
paddle
.
sum
(
cost_word
),
shape
=
[
1
])
cost_gsrm
=
paddle
.
reshape
(
x
=
paddle
.
sum
(
cost_gsrm
),
shape
=
[
1
])
cost_vsfd
=
paddle
.
reshape
(
x
=
paddle
.
sum
(
cost_vsfd
),
shape
=
[
1
])
sum_cost
=
cost_word
+
cost_vsfd
*
2.0
+
cost_gsrm
*
0.15
return
{
'loss'
:
sum_cost
,
'word_loss'
:
cost_word
,
'img_loss'
:
cost_vsfd
}
ppocr/metrics/__init__.py
View file @
c1fd4664
...
...
@@ -26,6 +26,7 @@ def build_metric(config):
from
.det_metric
import
DetMetric
from
.rec_metric
import
RecMetric
from
.cls_metric
import
ClsMetric
from
.rec_metric
import
RecMetric
support_dict
=
[
'DetMetric'
,
'RecMetric'
,
'ClsMetric'
]
...
...
ppocr/metrics/rec_metric.py
View file @
c1fd4664
...
...
@@ -31,8 +31,6 @@ class RecMetric(object):
if
pred
==
target
:
correct_num
+=
1
all_num
+=
1
# if all_num < 10 and kwargs.get('show_str', False):
# print('{} -> {}'.format(pred, target))
self
.
correct_num
+=
correct_num
self
.
all_num
+=
all_num
self
.
norm_edit_dis
+=
norm_edit_dis
...
...
@@ -48,7 +46,7 @@ class RecMetric(object):
'norm_edit_dis': 0,
}
"""
acc
=
self
.
correct_num
/
self
.
all_num
acc
=
1.0
*
self
.
correct_num
/
self
.
all_num
norm_edit_dis
=
1
-
self
.
norm_edit_dis
/
self
.
all_num
self
.
reset
()
return
{
'acc'
:
acc
,
'norm_edit_dis'
:
norm_edit_dis
}
...
...
ppocr/modeling/architectures/base_model.py
View file @
c1fd4664
...
...
@@ -68,11 +68,14 @@ class BaseModel(nn.Layer):
config
[
"Head"
][
'in_channels'
]
=
in_channels
self
.
head
=
build_head
(
config
[
"Head"
])
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
data
=
None
):
if
self
.
use_transform
:
x
=
self
.
transform
(
x
)
x
=
self
.
backbone
(
x
)
if
self
.
use_neck
:
x
=
self
.
neck
(
x
)
if
data
is
None
:
x
=
self
.
head
(
x
)
else
:
x
=
self
.
head
(
x
,
data
)
return
x
ppocr/modeling/backbones/__init__.py
View file @
c1fd4664
...
...
@@ -24,7 +24,8 @@ def build_backbone(config, model_type):
elif
model_type
==
'rec'
or
model_type
==
'cls'
:
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_resnet_vd
import
ResNet
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNet_FPN'
]
from
.rec_resnet_fpn
import
ResNetFPN
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
]
else
:
raise
NotImplementedError
...
...
ppocr/modeling/backbones/rec_resnet_fpn.py
0 → 100644
View file @
c1fd4664
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
,
ParamAttr
from
paddle.nn
import
functional
as
F
import
paddle.fluid
as
fluid
import
paddle
import
numpy
as
np
__all__
=
[
"ResNetFPN"
]
class
ResNetFPN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
1
,
layers
=
50
,
**
kwargs
):
super
(
ResNetFPN
,
self
).
__init__
()
supported_layers
=
{
18
:
{
'depth'
:
[
2
,
2
,
2
,
2
],
'block_class'
:
BasicBlock
},
34
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BasicBlock
},
50
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BottleneckBlock
},
101
:
{
'depth'
:
[
3
,
4
,
23
,
3
],
'block_class'
:
BottleneckBlock
},
152
:
{
'depth'
:
[
3
,
8
,
36
,
3
],
'block_class'
:
BottleneckBlock
}
}
stride_list
=
[(
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
1
,
1
)]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
depth
=
supported_layers
[
layers
][
'depth'
]
self
.
F
=
[]
self
.
conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
64
,
kernel_size
=
7
,
stride
=
2
,
act
=
"relu"
,
name
=
"conv1"
)
self
.
block_list
=
[]
in_ch
=
64
if
layers
>=
50
:
for
block
in
range
(
len
(
self
.
depth
)):
for
i
in
range
(
self
.
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
block_list
=
self
.
add_sublayer
(
"bottleneckBlock_{}_{}"
.
format
(
block
,
i
),
BottleneckBlock
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block
],
stride
=
stride_list
[
block
]
if
i
==
0
else
1
,
name
=
conv_name
))
in_ch
=
num_filters
[
block
]
*
4
self
.
block_list
.
append
(
block_list
)
self
.
F
.
append
(
block_list
)
else
:
for
block
in
range
(
len
(
self
.
depth
)):
for
i
in
range
(
self
.
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
basic_block
=
self
.
add_sublayer
(
conv_name
,
BasicBlock
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block
],
stride
=
stride_list
[
block
]
if
i
==
0
else
1
,
is_first
=
block
==
i
==
0
,
name
=
conv_name
))
in_ch
=
basic_block
.
out_channels
self
.
block_list
.
append
(
basic_block
)
out_ch_list
=
[
in_ch
//
4
,
in_ch
//
2
,
in_ch
]
self
.
base_block
=
[]
self
.
conv_trans
=
[]
self
.
bn_block
=
[]
for
i
in
[
-
2
,
-
3
]:
in_channels
=
out_ch_list
[
i
+
1
]
+
out_ch_list
[
i
]
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_0"
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_ch_list
[
i
],
kernel_size
=
1
,
weight_attr
=
ParamAttr
(
trainable
=
True
),
bias_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_1"
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
out_ch_list
[
i
],
out_channels
=
out_ch_list
[
i
],
kernel_size
=
3
,
padding
=
1
,
weight_attr
=
ParamAttr
(
trainable
=
True
),
bias_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_2"
.
format
(
i
),
nn
.
BatchNorm
(
num_channels
=
out_ch_list
[
i
],
act
=
"relu"
,
param_attr
=
ParamAttr
(
trainable
=
True
),
bias_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_3"
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
out_ch_list
[
i
],
out_channels
=
512
,
kernel_size
=
1
,
bias_attr
=
ParamAttr
(
trainable
=
True
),
weight_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
out_channels
=
512
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
fpn_list
=
[]
F
=
[]
for
i
in
range
(
len
(
self
.
depth
)):
fpn_list
.
append
(
np
.
sum
(
self
.
depth
[:
i
+
1
]))
for
i
,
block
in
enumerate
(
self
.
block_list
):
x
=
block
(
x
)
for
number
in
fpn_list
:
if
i
+
1
==
number
:
F
.
append
(
x
)
base
=
F
[
-
1
]
j
=
0
for
i
,
block
in
enumerate
(
self
.
base_block
):
if
i
%
3
==
0
and
i
<
6
:
j
=
j
+
1
b
,
c
,
w
,
h
=
F
[
-
j
-
1
].
shape
if
[
w
,
h
]
==
list
(
base
.
shape
[
2
:]):
base
=
base
else
:
base
=
self
.
conv_trans
[
j
-
1
](
base
)
base
=
self
.
bn_block
[
j
-
1
](
base
)
base
=
paddle
.
concat
([
base
,
F
[
-
j
-
1
]],
axis
=
1
)
base
=
block
(
base
)
return
base
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
2
if
stride
==
(
1
,
1
)
else
kernel_size
,
dilation
=
2
if
stride
==
(
1
,
1
)
else
1
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'.conv2d.output.1.w_0'
),
bias_attr
=
False
,
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
name
+
'.output.1.w_0'
),
bias_attr
=
ParamAttr
(
name
=
name
+
'.output.1.b_0'
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ShortCut
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
is_first
=
False
):
super
(
ShortCut
,
self
).
__init__
()
self
.
use_conv
=
True
if
in_channels
!=
out_channels
or
stride
!=
1
or
is_first
==
True
:
if
stride
==
(
1
,
1
):
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
1
,
name
=
name
)
else
:
# stride==(2,2)
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
self
.
use_conv
=
False
def
forward
(
self
,
x
):
if
self
.
use_conv
:
x
=
self
.
conv
(
x
)
return
x
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
stride
=
stride
,
is_first
=
False
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
*
4
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
self
.
conv2
(
y
)
y
=
y
+
self
.
short
(
x
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
is_first
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
stride
=
stride
,
is_first
=
is_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
y
+
self
.
short
(
x
)
return
F
.
relu
(
y
)
ppocr/modeling/heads/__init__.py
View file @
c1fd4664
...
...
@@ -23,10 +23,13 @@ def build_head(config):
# rec head
from
.rec_ctc_head
import
CTCHead
from
.rec_srn_head
import
SRNHead
# cls head
from
.cls_head
import
ClsHead
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
]
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'SRNHead'
]
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment