Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
83303bc7
Commit
83303bc7
authored
Oct 09, 2021
by
LDOUBLEV
Browse files
fix conflicts
parents
3af943f3
af0bac58
Changes
424
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1095 additions
and
88 deletions
+1095
-88
ppocr/losses/rec_nrtr_loss.py
ppocr/losses/rec_nrtr_loss.py
+30
-0
ppocr/losses/rec_sar_loss.py
ppocr/losses/rec_sar_loss.py
+25
-0
ppocr/losses/table_att_loss.py
ppocr/losses/table_att_loss.py
+109
-0
ppocr/metrics/__init__.py
ppocr/metrics/__init__.py
+13
-10
ppocr/metrics/det_metric.py
ppocr/metrics/det_metric.py
+1
-0
ppocr/metrics/distillation_metric.py
ppocr/metrics/distillation_metric.py
+73
-0
ppocr/metrics/e2e_metric.py
ppocr/metrics/e2e_metric.py
+42
-8
ppocr/metrics/eval_det_iou.py
ppocr/metrics/eval_det_iou.py
+0
-11
ppocr/metrics/rec_metric.py
ppocr/metrics/rec_metric.py
+11
-1
ppocr/metrics/table_metric.py
ppocr/metrics/table_metric.py
+50
-0
ppocr/modeling/architectures/__init__.py
ppocr/modeling/architectures/__init__.py
+12
-4
ppocr/modeling/architectures/base_model.py
ppocr/modeling/architectures/base_model.py
+14
-7
ppocr/modeling/architectures/distillation_model.py
ppocr/modeling/architectures/distillation_model.py
+60
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+19
-9
ppocr/modeling/backbones/det_mobilenet_v3.py
ppocr/modeling/backbones/det_mobilenet_v3.py
+13
-32
ppocr/modeling/backbones/rec_mobilenet_v3.py
ppocr/modeling/backbones/rec_mobilenet_v3.py
+3
-6
ppocr/modeling/backbones/rec_mv1_enhance.py
ppocr/modeling/backbones/rec_mv1_enhance.py
+256
-0
ppocr/modeling/backbones/rec_nrtr_mtb.py
ppocr/modeling/backbones/rec_nrtr_mtb.py
+48
-0
ppocr/modeling/backbones/rec_resnet_31.py
ppocr/modeling/backbones/rec_resnet_31.py
+176
-0
ppocr/modeling/backbones/rec_resnet_aster.py
ppocr/modeling/backbones/rec_resnet_aster.py
+140
-0
No files found.
ppocr/losses/rec_nrtr_loss.py
0 → 100644
View file @
83303bc7
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
class
NRTRLoss
(
nn
.
Layer
):
def
__init__
(
self
,
smoothing
=
True
,
**
kwargs
):
super
(
NRTRLoss
,
self
).
__init__
()
self
.
loss_func
=
nn
.
CrossEntropyLoss
(
reduction
=
'mean'
,
ignore_index
=
0
)
self
.
smoothing
=
smoothing
def
forward
(
self
,
pred
,
batch
):
pred
=
pred
.
reshape
([
-
1
,
pred
.
shape
[
2
]])
max_len
=
batch
[
2
].
max
()
tgt
=
batch
[
1
][:,
1
:
2
+
max_len
]
tgt
=
tgt
.
reshape
([
-
1
])
if
self
.
smoothing
:
eps
=
0.1
n_class
=
pred
.
shape
[
1
]
one_hot
=
F
.
one_hot
(
tgt
,
pred
.
shape
[
1
])
one_hot
=
one_hot
*
(
1
-
eps
)
+
(
1
-
one_hot
)
*
eps
/
(
n_class
-
1
)
log_prb
=
F
.
log_softmax
(
pred
,
axis
=
1
)
non_pad_mask
=
paddle
.
not_equal
(
tgt
,
paddle
.
zeros
(
tgt
.
shape
,
dtype
=
'int64'
))
loss
=
-
(
one_hot
*
log_prb
).
sum
(
axis
=
1
)
loss
=
loss
.
masked_select
(
non_pad_mask
).
mean
()
else
:
loss
=
self
.
loss_func
(
pred
,
tgt
)
return
{
'loss'
:
loss
}
ppocr/losses/rec_sar_loss.py
0 → 100644
View file @
83303bc7
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
class
SARLoss
(
nn
.
Layer
):
def
__init__
(
self
,
**
kwargs
):
super
(
SARLoss
,
self
).
__init__
()
self
.
loss_func
=
paddle
.
nn
.
loss
.
CrossEntropyLoss
(
reduction
=
"mean"
,
ignore_index
=
96
)
def
forward
(
self
,
predicts
,
batch
):
predict
=
predicts
[:,
:
-
1
,
:]
# ignore last index of outputs to be in same seq_len with targets
label
=
batch
[
1
].
astype
(
"int64"
)[:,
1
:]
# ignore first index of target in loss calculation
batch_size
,
num_steps
,
num_classes
=
predict
.
shape
[
0
],
predict
.
shape
[
1
],
predict
.
shape
[
2
]
assert
len
(
label
.
shape
)
==
len
(
list
(
predict
.
shape
))
-
1
,
\
"The target's shape and inputs's shape is [N, d] and [N, num_steps]"
inputs
=
paddle
.
reshape
(
predict
,
[
-
1
,
num_classes
])
targets
=
paddle
.
reshape
(
label
,
[
-
1
])
loss
=
self
.
loss_func
(
inputs
,
targets
)
return
{
'loss'
:
loss
}
ppocr/losses/table_att_loss.py
0 → 100644
View file @
83303bc7
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
from
paddle.nn
import
functional
as
F
from
paddle
import
fluid
class
TableAttentionLoss
(
nn
.
Layer
):
def
__init__
(
self
,
structure_weight
,
loc_weight
,
use_giou
=
False
,
giou_weight
=
1.0
,
**
kwargs
):
super
(
TableAttentionLoss
,
self
).
__init__
()
self
.
loss_func
=
nn
.
CrossEntropyLoss
(
weight
=
None
,
reduction
=
'none'
)
self
.
structure_weight
=
structure_weight
self
.
loc_weight
=
loc_weight
self
.
use_giou
=
use_giou
self
.
giou_weight
=
giou_weight
def
giou_loss
(
self
,
preds
,
bbox
,
eps
=
1e-7
,
reduction
=
'mean'
):
'''
:param preds:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,]
:param bbox:[[x1,y1,x2,y2], [x1,y1,x2,y2],,,]
:return: loss
'''
ix1
=
fluid
.
layers
.
elementwise_max
(
preds
[:,
0
],
bbox
[:,
0
])
iy1
=
fluid
.
layers
.
elementwise_max
(
preds
[:,
1
],
bbox
[:,
1
])
ix2
=
fluid
.
layers
.
elementwise_min
(
preds
[:,
2
],
bbox
[:,
2
])
iy2
=
fluid
.
layers
.
elementwise_min
(
preds
[:,
3
],
bbox
[:,
3
])
iw
=
fluid
.
layers
.
clip
(
ix2
-
ix1
+
1e-3
,
0.
,
1e10
)
ih
=
fluid
.
layers
.
clip
(
iy2
-
iy1
+
1e-3
,
0.
,
1e10
)
# overlap
inters
=
iw
*
ih
# union
uni
=
(
preds
[:,
2
]
-
preds
[:,
0
]
+
1e-3
)
*
(
preds
[:,
3
]
-
preds
[:,
1
]
+
1e-3
)
+
(
bbox
[:,
2
]
-
bbox
[:,
0
]
+
1e-3
)
*
(
bbox
[:,
3
]
-
bbox
[:,
1
]
+
1e-3
)
-
inters
+
eps
# ious
ious
=
inters
/
uni
ex1
=
fluid
.
layers
.
elementwise_min
(
preds
[:,
0
],
bbox
[:,
0
])
ey1
=
fluid
.
layers
.
elementwise_min
(
preds
[:,
1
],
bbox
[:,
1
])
ex2
=
fluid
.
layers
.
elementwise_max
(
preds
[:,
2
],
bbox
[:,
2
])
ey2
=
fluid
.
layers
.
elementwise_max
(
preds
[:,
3
],
bbox
[:,
3
])
ew
=
fluid
.
layers
.
clip
(
ex2
-
ex1
+
1e-3
,
0.
,
1e10
)
eh
=
fluid
.
layers
.
clip
(
ey2
-
ey1
+
1e-3
,
0.
,
1e10
)
# enclose erea
enclose
=
ew
*
eh
+
eps
giou
=
ious
-
(
enclose
-
uni
)
/
enclose
loss
=
1
-
giou
if
reduction
==
'mean'
:
loss
=
paddle
.
mean
(
loss
)
elif
reduction
==
'sum'
:
loss
=
paddle
.
sum
(
loss
)
else
:
raise
NotImplementedError
return
loss
def
forward
(
self
,
predicts
,
batch
):
structure_probs
=
predicts
[
'structure_probs'
]
structure_targets
=
batch
[
1
].
astype
(
"int64"
)
structure_targets
=
structure_targets
[:,
1
:]
if
len
(
batch
)
==
6
:
structure_mask
=
batch
[
5
].
astype
(
"int64"
)
structure_mask
=
structure_mask
[:,
1
:]
structure_mask
=
paddle
.
reshape
(
structure_mask
,
[
-
1
])
structure_probs
=
paddle
.
reshape
(
structure_probs
,
[
-
1
,
structure_probs
.
shape
[
-
1
]])
structure_targets
=
paddle
.
reshape
(
structure_targets
,
[
-
1
])
structure_loss
=
self
.
loss_func
(
structure_probs
,
structure_targets
)
if
len
(
batch
)
==
6
:
structure_loss
=
structure_loss
*
structure_mask
# structure_loss = paddle.sum(structure_loss) * self.structure_weight
structure_loss
=
paddle
.
mean
(
structure_loss
)
*
self
.
structure_weight
loc_preds
=
predicts
[
'loc_preds'
]
loc_targets
=
batch
[
2
].
astype
(
"float32"
)
loc_targets_mask
=
batch
[
4
].
astype
(
"float32"
)
loc_targets
=
loc_targets
[:,
1
:,
:]
loc_targets_mask
=
loc_targets_mask
[:,
1
:,
:]
loc_loss
=
F
.
mse_loss
(
loc_preds
*
loc_targets_mask
,
loc_targets
)
*
self
.
loc_weight
if
self
.
use_giou
:
loc_loss_giou
=
self
.
giou_loss
(
loc_preds
*
loc_targets_mask
,
loc_targets
)
*
self
.
giou_weight
total_loss
=
structure_loss
+
loc_loss
+
loc_loss_giou
return
{
'loss'
:
total_loss
,
"structure_loss"
:
structure_loss
,
"loc_loss"
:
loc_loss
,
"loc_loss_giou"
:
loc_loss_giou
}
else
:
total_loss
=
structure_loss
+
loc_loss
return
{
'loss'
:
total_loss
,
"structure_loss"
:
structure_loss
,
"loc_loss"
:
loc_loss
}
\ No newline at end of file
ppocr/metrics/__init__.py
View file @
83303bc7
...
@@ -19,23 +19,26 @@ from __future__ import unicode_literals
...
@@ -19,23 +19,26 @@ from __future__ import unicode_literals
import
copy
import
copy
__all__
=
[
'
build_metric
'
]
__all__
=
[
"
build_metric
"
]
from
.det_metric
import
DetMetric
from
.rec_metric
import
RecMetric
from
.cls_metric
import
ClsMetric
from
.e2e_metric
import
E2EMetric
from
.distillation_metric
import
DistillationMetric
from
.table_metric
import
TableMetric
from
.kie_metric
import
KIEMetric
def
build_metric
(
config
):
from
.det_metric
import
DetMetric
from
.rec_metric
import
RecMetric
from
.cls_metric
import
ClsMetric
from
.e2e_metric
import
E2EMetric
from
.kie_metric
import
KIEMetric
def
build_metric
(
config
):
support_dict
=
[
support_dict
=
[
'DetMetric'
,
'RecMetric'
,
'ClsMetric'
,
'E2EMetric'
,
'KIEMetric'
"DetMetric"
,
"RecMetric"
,
"ClsMetric"
,
"E2EMetric"
,
"DistillationMetric"
,
"TableMetric"
,
'KIEMetric'
]
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'
name
'
)
module_name
=
config
.
pop
(
"
name
"
)
assert
module_name
in
support_dict
,
Exception
(
assert
module_name
in
support_dict
,
Exception
(
'
metric only support {}
'
.
format
(
support_dict
))
"
metric only support {}
"
.
format
(
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
return
module_class
ppocr/metrics/det_metric.py
View file @
83303bc7
...
@@ -55,6 +55,7 @@ class DetMetric(object):
...
@@ -55,6 +55,7 @@ class DetMetric(object):
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list
)
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list
)
self
.
results
.
append
(
result
)
self
.
results
.
append
(
result
)
def
get_metric
(
self
):
def
get_metric
(
self
):
"""
"""
return metrics {
return metrics {
...
...
ppocr/metrics/distillation_metric.py
0 → 100644
View file @
83303bc7
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
importlib
import
copy
from
.rec_metric
import
RecMetric
from
.det_metric
import
DetMetric
from
.e2e_metric
import
E2EMetric
from
.cls_metric
import
ClsMetric
class
DistillationMetric
(
object
):
def
__init__
(
self
,
key
=
None
,
base_metric_name
=
None
,
main_indicator
=
None
,
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
key
=
key
self
.
main_indicator
=
main_indicator
self
.
base_metric_name
=
base_metric_name
self
.
kwargs
=
kwargs
self
.
metrics
=
None
def
_init_metrcis
(
self
,
preds
):
self
.
metrics
=
dict
()
mod
=
importlib
.
import_module
(
__name__
)
for
key
in
preds
:
self
.
metrics
[
key
]
=
getattr
(
mod
,
self
.
base_metric_name
)(
main_indicator
=
self
.
main_indicator
,
**
self
.
kwargs
)
self
.
metrics
[
key
].
reset
()
def
__call__
(
self
,
preds
,
batch
,
**
kwargs
):
assert
isinstance
(
preds
,
dict
)
if
self
.
metrics
is
None
:
self
.
_init_metrcis
(
preds
)
output
=
dict
()
for
key
in
preds
:
self
.
metrics
[
key
].
__call__
(
preds
[
key
],
batch
,
**
kwargs
)
def
get_metric
(
self
):
"""
return metrics {
'acc': 0,
'norm_edit_dis': 0,
}
"""
output
=
dict
()
for
key
in
self
.
metrics
:
metric
=
self
.
metrics
[
key
].
get_metric
()
# main indicator
if
key
==
self
.
key
:
output
.
update
(
metric
)
else
:
for
sub_key
in
metric
:
output
[
"{}_{}"
.
format
(
key
,
sub_key
)]
=
metric
[
sub_key
]
return
output
def
reset
(
self
):
for
key
in
self
.
metrics
:
self
.
metrics
[
key
].
reset
()
ppocr/metrics/e2e_metric.py
View file @
83303bc7
...
@@ -18,16 +18,18 @@ from __future__ import print_function
...
@@ -18,16 +18,18 @@ from __future__ import print_function
__all__
=
[
'E2EMetric'
]
__all__
=
[
'E2EMetric'
]
from
ppocr.utils.e2e_metric.Deteval
import
get_socre
,
combine_results
from
ppocr.utils.e2e_metric.Deteval
import
get_socre
_A
,
get_socre_B
,
combine_results
from
ppocr.utils.e2e_utils.extract_textpoint_slow
import
get_dict
from
ppocr.utils.e2e_utils.extract_textpoint_slow
import
get_dict
class
E2EMetric
(
object
):
class
E2EMetric
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
mode
,
gt_mat_dir
,
gt_mat_dir
,
character_dict_path
,
character_dict_path
,
main_indicator
=
'f_score_e2e'
,
main_indicator
=
'f_score_e2e'
,
**
kwargs
):
**
kwargs
):
self
.
mode
=
mode
self
.
gt_mat_dir
=
gt_mat_dir
self
.
gt_mat_dir
=
gt_mat_dir
self
.
label_list
=
get_dict
(
character_dict_path
)
self
.
label_list
=
get_dict
(
character_dict_path
)
self
.
max_index
=
len
(
self
.
label_list
)
self
.
max_index
=
len
(
self
.
label_list
)
...
@@ -35,13 +37,45 @@ class E2EMetric(object):
...
@@ -35,13 +37,45 @@ class E2EMetric(object):
self
.
reset
()
self
.
reset
()
def
__call__
(
self
,
preds
,
batch
,
**
kwargs
):
def
__call__
(
self
,
preds
,
batch
,
**
kwargs
):
img_id
=
batch
[
5
][
0
]
if
self
.
mode
==
'A'
:
e2e_info_list
=
[{
gt_polyons_batch
=
batch
[
2
]
'points'
:
det_polyon
,
temp_gt_strs_batch
=
batch
[
3
][
0
]
'text'
:
pred_str
ignore_tags_batch
=
batch
[
4
]
}
for
det_polyon
,
pred_str
in
zip
(
preds
[
'points'
],
preds
[
'strs'
])]
gt_strs_batch
=
[]
result
=
get_socre
(
self
.
gt_mat_dir
,
img_id
,
e2e_info_list
)
self
.
results
.
append
(
result
)
for
temp_list
in
temp_gt_strs_batch
:
t
=
""
for
index
in
temp_list
:
if
index
<
self
.
max_index
:
t
+=
self
.
label_list
[
index
]
gt_strs_batch
.
append
(
t
)
for
pred
,
gt_polyons
,
gt_strs
,
ignore_tags
in
zip
(
[
preds
],
gt_polyons_batch
,
[
gt_strs_batch
],
ignore_tags_batch
):
# prepare gt
gt_info_list
=
[{
'points'
:
gt_polyon
,
'text'
:
gt_str
,
'ignore'
:
ignore_tag
}
for
gt_polyon
,
gt_str
,
ignore_tag
in
zip
(
gt_polyons
,
gt_strs
,
ignore_tags
)]
# prepare det
e2e_info_list
=
[{
'points'
:
det_polyon
,
'texts'
:
pred_str
}
for
det_polyon
,
pred_str
in
zip
(
pred
[
'points'
],
pred
[
'texts'
])]
result
=
get_socre_A
(
gt_info_list
,
e2e_info_list
)
self
.
results
.
append
(
result
)
else
:
img_id
=
batch
[
5
][
0
]
e2e_info_list
=
[{
'points'
:
det_polyon
,
'texts'
:
pred_str
}
for
det_polyon
,
pred_str
in
zip
(
preds
[
'points'
],
preds
[
'texts'
])]
result
=
get_socre_B
(
self
.
gt_mat_dir
,
img_id
,
e2e_info_list
)
self
.
results
.
append
(
result
)
def
get_metric
(
self
):
def
get_metric
(
self
):
metircs
=
combine_results
(
self
.
results
)
metircs
=
combine_results
(
self
.
results
)
...
...
ppocr/metrics/eval_det_iou.py
View file @
83303bc7
...
@@ -169,21 +169,10 @@ class DetectionIoUEvaluator(object):
...
@@ -169,21 +169,10 @@ class DetectionIoUEvaluator(object):
numGlobalCareDet
+=
numDetCare
numGlobalCareDet
+=
numDetCare
perSampleMetrics
=
{
perSampleMetrics
=
{
'precision'
:
precision
,
'recall'
:
recall
,
'hmean'
:
hmean
,
'pairs'
:
pairs
,
'iouMat'
:
[]
if
len
(
detPols
)
>
100
else
iouMat
.
tolist
(),
'gtPolPoints'
:
gtPolPoints
,
'detPolPoints'
:
detPolPoints
,
'gtCare'
:
numGtCare
,
'gtCare'
:
numGtCare
,
'detCare'
:
numDetCare
,
'detCare'
:
numDetCare
,
'gtDontCare'
:
gtDontCarePolsNum
,
'detDontCare'
:
detDontCarePolsNum
,
'detMatched'
:
detMatched
,
'detMatched'
:
detMatched
,
'evaluationLog'
:
evaluationLog
}
}
return
perSampleMetrics
return
perSampleMetrics
def
combine_results
(
self
,
results
):
def
combine_results
(
self
,
results
):
...
...
ppocr/metrics/rec_metric.py
View file @
83303bc7
...
@@ -13,13 +13,20 @@
...
@@ -13,13 +13,20 @@
# limitations under the License.
# limitations under the License.
import
Levenshtein
import
Levenshtein
import
string
class
RecMetric
(
object
):
class
RecMetric
(
object
):
def
__init__
(
self
,
main_indicator
=
'acc'
,
**
kwargs
):
def
__init__
(
self
,
main_indicator
=
'acc'
,
is_filter
=
False
,
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
main_indicator
=
main_indicator
self
.
is_filter
=
is_filter
self
.
reset
()
self
.
reset
()
def
_normalize_text
(
self
,
text
):
text
=
''
.
join
(
filter
(
lambda
x
:
x
in
(
string
.
digits
+
string
.
ascii_letters
),
text
))
return
text
.
lower
()
def
__call__
(
self
,
pred_label
,
*
args
,
**
kwargs
):
def
__call__
(
self
,
pred_label
,
*
args
,
**
kwargs
):
preds
,
labels
=
pred_label
preds
,
labels
=
pred_label
correct_num
=
0
correct_num
=
0
...
@@ -28,6 +35,9 @@ class RecMetric(object):
...
@@ -28,6 +35,9 @@ class RecMetric(object):
for
(
pred
,
pred_conf
),
(
target
,
_
)
in
zip
(
preds
,
labels
):
for
(
pred
,
pred_conf
),
(
target
,
_
)
in
zip
(
preds
,
labels
):
pred
=
pred
.
replace
(
" "
,
""
)
pred
=
pred
.
replace
(
" "
,
""
)
target
=
target
.
replace
(
" "
,
""
)
target
=
target
.
replace
(
" "
,
""
)
if
self
.
is_filter
:
pred
=
self
.
_normalize_text
(
pred
)
target
=
self
.
_normalize_text
(
target
)
norm_edit_dis
+=
Levenshtein
.
distance
(
pred
,
target
)
/
max
(
norm_edit_dis
+=
Levenshtein
.
distance
(
pred
,
target
)
/
max
(
len
(
pred
),
len
(
target
),
1
)
len
(
pred
),
len
(
target
),
1
)
if
pred
==
target
:
if
pred
==
target
:
...
...
ppocr/metrics/table_metric.py
0 → 100644
View file @
83303bc7
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
class
TableMetric
(
object
):
def
__init__
(
self
,
main_indicator
=
'acc'
,
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
reset
()
def
__call__
(
self
,
pred
,
batch
,
*
args
,
**
kwargs
):
structure_probs
=
pred
[
'structure_probs'
].
numpy
()
structure_labels
=
batch
[
1
]
correct_num
=
0
all_num
=
0
structure_probs
=
np
.
argmax
(
structure_probs
,
axis
=
2
)
structure_labels
=
structure_labels
[:,
1
:]
batch_size
=
structure_probs
.
shape
[
0
]
for
bno
in
range
(
batch_size
):
all_num
+=
1
if
(
structure_probs
[
bno
]
==
structure_labels
[
bno
]).
all
():
correct_num
+=
1
self
.
correct_num
+=
correct_num
self
.
all_num
+=
all_num
return
{
'acc'
:
correct_num
*
1.0
/
all_num
,
}
def
get_metric
(
self
):
"""
return metrics {
'acc': 0,
}
"""
acc
=
1.0
*
self
.
correct_num
/
self
.
all_num
self
.
reset
()
return
{
'acc'
:
acc
}
def
reset
(
self
):
self
.
correct_num
=
0
self
.
all_num
=
0
ppocr/modeling/architectures/__init__.py
View file @
83303bc7
...
@@ -13,12 +13,20 @@
...
@@ -13,12 +13,20 @@
# limitations under the License.
# limitations under the License.
import
copy
import
copy
import
importlib
from
.base_model
import
BaseModel
from
.distillation_model
import
DistillationModel
__all__
=
[
'build_model'
]
__all__
=
[
'build_model'
]
def
build_model
(
config
):
def
build_model
(
config
):
from
.base_model
import
BaseModel
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
module_class
=
BaseModel
(
config
)
if
not
"name"
in
config
:
return
module_class
arch
=
BaseModel
(
config
)
\ No newline at end of file
else
:
name
=
config
.
pop
(
"name"
)
mod
=
importlib
.
import_module
(
__name__
)
arch
=
getattr
(
mod
,
name
)(
config
)
return
arch
ppocr/modeling/architectures/base_model.py
View file @
83303bc7
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -14,7 +14,6 @@
...
@@ -14,7 +14,6 @@
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
from
paddle
import
nn
from
paddle
import
nn
from
ppocr.modeling.transforms
import
build_transform
from
ppocr.modeling.transforms
import
build_transform
from
ppocr.modeling.backbones
import
build_backbone
from
ppocr.modeling.backbones
import
build_backbone
...
@@ -32,7 +31,6 @@ class BaseModel(nn.Layer):
...
@@ -32,7 +31,6 @@ class BaseModel(nn.Layer):
config (dict): the super parameters for module.
config (dict): the super parameters for module.
"""
"""
super
(
BaseModel
,
self
).
__init__
()
super
(
BaseModel
,
self
).
__init__
()
in_channels
=
config
.
get
(
'in_channels'
,
3
)
in_channels
=
config
.
get
(
'in_channels'
,
3
)
model_type
=
config
[
'model_type'
]
model_type
=
config
[
'model_type'
]
# build transfrom,
# build transfrom,
...
@@ -68,14 +66,23 @@ class BaseModel(nn.Layer):
...
@@ -68,14 +66,23 @@ class BaseModel(nn.Layer):
config
[
"Head"
][
'in_channels'
]
=
in_channels
config
[
"Head"
][
'in_channels'
]
=
in_channels
self
.
head
=
build_head
(
config
[
"Head"
])
self
.
head
=
build_head
(
config
[
"Head"
])
self
.
return_all_feats
=
config
.
get
(
"return_all_feats"
,
False
)
def
forward
(
self
,
x
,
data
=
None
):
def
forward
(
self
,
x
,
data
=
None
):
y
=
dict
()
if
self
.
use_transform
:
if
self
.
use_transform
:
x
=
self
.
transform
(
x
)
x
=
self
.
transform
(
x
)
x
=
self
.
backbone
(
x
)
x
=
self
.
backbone
(
x
)
y
[
"backbone_out"
]
=
x
if
self
.
use_neck
:
if
self
.
use_neck
:
x
=
self
.
neck
(
x
)
x
=
self
.
neck
(
x
)
if
data
is
None
:
y
[
"neck_out"
]
=
x
x
=
self
.
head
(
x
)
x
=
self
.
head
(
x
,
targets
=
data
)
if
isinstance
(
x
,
dict
):
y
.
update
(
x
)
else
:
y
[
"head_out"
]
=
x
if
self
.
return_all_feats
:
return
y
else
:
else
:
x
=
self
.
head
(
x
,
data
)
return
x
return
x
ppocr/modeling/architectures/distillation_model.py
0 → 100644
View file @
83303bc7
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
from
ppocr.modeling.transforms
import
build_transform
from
ppocr.modeling.backbones
import
build_backbone
from
ppocr.modeling.necks
import
build_neck
from
ppocr.modeling.heads
import
build_head
from
.base_model
import
BaseModel
from
ppocr.utils.save_load
import
init_model
,
load_pretrained_params
__all__
=
[
'DistillationModel'
]
class
DistillationModel
(
nn
.
Layer
):
def
__init__
(
self
,
config
):
"""
the module for OCR distillation.
args:
config (dict): the super parameters for module.
"""
super
().
__init__
()
self
.
model_list
=
[]
self
.
model_name_list
=
[]
for
key
in
config
[
"Models"
]:
model_config
=
config
[
"Models"
][
key
]
freeze_params
=
False
pretrained
=
None
if
"freeze_params"
in
model_config
:
freeze_params
=
model_config
.
pop
(
"freeze_params"
)
if
"pretrained"
in
model_config
:
pretrained
=
model_config
.
pop
(
"pretrained"
)
model
=
BaseModel
(
model_config
)
if
pretrained
is
not
None
:
load_pretrained_params
(
model
,
pretrained
)
if
freeze_params
:
for
param
in
model
.
parameters
():
param
.
trainable
=
False
self
.
model_list
.
append
(
self
.
add_sublayer
(
key
,
model
))
self
.
model_name_list
.
append
(
key
)
def
forward
(
self
,
x
):
result_dict
=
dict
()
for
idx
,
model_name
in
enumerate
(
self
.
model_name_list
):
result_dict
[
model_name
]
=
self
.
model_list
[
idx
](
x
)
return
result_dict
ppocr/modeling/backbones/__init__.py
View file @
83303bc7
...
@@ -12,33 +12,43 @@
...
@@ -12,33 +12,43 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
__all__
=
[
'
build_backbone
'
]
__all__
=
[
"
build_backbone
"
]
def
build_backbone
(
config
,
model_type
):
def
build_backbone
(
config
,
model_type
):
if
model_type
==
'
det
'
:
if
model_type
==
"
det
"
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd_sast
import
ResNet_SAST
from
.det_resnet_vd_sast
import
ResNet_SAST
support_dict
=
[
'
MobileNetV3
'
,
'
ResNet
'
,
'
ResNet_SAST
'
]
support_dict
=
[
"
MobileNetV3
"
,
"
ResNet
"
,
"
ResNet_SAST
"
]
elif
model_type
==
'
rec
'
or
model_type
==
'
cls
'
:
elif
model_type
==
"
rec
"
or
model_type
==
"
cls
"
:
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_resnet_vd
import
ResNet
from
.rec_resnet_vd
import
ResNet
from
.rec_resnet_fpn
import
ResNetFPN
from
.rec_resnet_fpn
import
ResNetFPN
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
]
from
.rec_mv1_enhance
import
MobileNetV1Enhance
elif
model_type
==
'e2e'
:
from
.rec_nrtr_mtb
import
MTB
from
.rec_resnet_31
import
ResNet31
from
.rec_resnet_aster
import
ResNet_ASTER
support_dict
=
[
'MobileNetV1Enhance'
,
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
,
'MTB'
,
"ResNet31"
,
"ResNet_ASTER"
]
elif
model_type
==
"e2e"
:
from
.e2e_resnet_vd_pg
import
ResNet
from
.e2e_resnet_vd_pg
import
ResNet
support_dict
=
[
'ResNet'
]
support_dict
=
[
'ResNet'
]
elif
model_type
==
'kie'
:
elif
model_type
==
'kie'
:
from
.kie_unet_sdmgr
import
Kie_backbone
from
.kie_unet_sdmgr
import
Kie_backbone
support_dict
=
[
'Kie_backbone'
]
support_dict
=
[
'Kie_backbone'
]
elif
model_type
==
"table"
:
from
.table_resnet_vd
import
ResNet
from
.table_mobilenet_v3
import
MobileNetV3
support_dict
=
[
"ResNet"
,
"MobileNetV3"
]
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
module_name
=
config
.
pop
(
'
name
'
)
module_name
=
config
.
pop
(
"
name
"
)
assert
module_name
in
support_dict
,
Exception
(
assert
module_name
in
support_dict
,
Exception
(
'
when model typs is {}, backbone only support {}
'
.
format
(
model_type
,
"
when model typs is {}, backbone only support {}
"
.
format
(
model_type
,
support_dict
))
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
return
module_class
ppocr/modeling/backbones/det_mobilenet_v3.py
View file @
83303bc7
...
@@ -102,8 +102,7 @@ class MobileNetV3(nn.Layer):
...
@@ -102,8 +102,7 @@ class MobileNetV3(nn.Layer):
padding
=
1
,
padding
=
1
,
groups
=
1
,
groups
=
1
,
if_act
=
True
,
if_act
=
True
,
act
=
'hardswish'
,
act
=
'hardswish'
)
name
=
'conv1'
)
self
.
stages
=
[]
self
.
stages
=
[]
self
.
out_channels
=
[]
self
.
out_channels
=
[]
...
@@ -125,8 +124,7 @@ class MobileNetV3(nn.Layer):
...
@@ -125,8 +124,7 @@ class MobileNetV3(nn.Layer):
kernel_size
=
k
,
kernel_size
=
k
,
stride
=
s
,
stride
=
s
,
use_se
=
se
,
use_se
=
se
,
act
=
nl
,
act
=
nl
))
name
=
"conv"
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
i
+=
1
block_list
.
append
(
block_list
.
append
(
...
@@ -138,8 +136,7 @@ class MobileNetV3(nn.Layer):
...
@@ -138,8 +136,7 @@ class MobileNetV3(nn.Layer):
padding
=
0
,
padding
=
0
,
groups
=
1
,
groups
=
1
,
if_act
=
True
,
if_act
=
True
,
act
=
'hardswish'
,
act
=
'hardswish'
))
name
=
'conv_last'
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
for
i
,
stage
in
enumerate
(
self
.
stages
):
for
i
,
stage
in
enumerate
(
self
.
stages
):
...
@@ -163,8 +160,7 @@ class ConvBNLayer(nn.Layer):
...
@@ -163,8 +160,7 @@ class ConvBNLayer(nn.Layer):
padding
,
padding
,
groups
=
1
,
groups
=
1
,
if_act
=
True
,
if_act
=
True
,
act
=
None
,
act
=
None
):
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
if_act
=
if_act
self
.
act
=
act
self
.
act
=
act
...
@@ -175,16 +171,9 @@ class ConvBNLayer(nn.Layer):
...
@@ -175,16 +171,9 @@ class ConvBNLayer(nn.Layer):
stride
=
stride
,
stride
=
stride
,
padding
=
padding
,
padding
=
padding
,
groups
=
groups
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
None
)
num_channels
=
out_channels
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_bn_scale"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_bn_offset"
),
moving_mean_name
=
name
+
"_bn_mean"
,
moving_variance_name
=
name
+
"_bn_variance"
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
conv
(
x
)
...
@@ -209,8 +198,7 @@ class ResidualUnit(nn.Layer):
...
@@ -209,8 +198,7 @@ class ResidualUnit(nn.Layer):
kernel_size
,
kernel_size
,
stride
,
stride
,
use_se
,
use_se
,
act
=
None
,
act
=
None
):
name
=
''
):
super
(
ResidualUnit
,
self
).
__init__
()
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_se
=
use_se
self
.
if_se
=
use_se
...
@@ -222,8 +210,7 @@ class ResidualUnit(nn.Layer):
...
@@ -222,8 +210,7 @@ class ResidualUnit(nn.Layer):
stride
=
1
,
stride
=
1
,
padding
=
0
,
padding
=
0
,
if_act
=
True
,
if_act
=
True
,
act
=
act
,
act
=
act
)
name
=
name
+
"_expand"
)
self
.
bottleneck_conv
=
ConvBNLayer
(
self
.
bottleneck_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
out_channels
=
mid_channels
,
...
@@ -232,10 +219,9 @@ class ResidualUnit(nn.Layer):
...
@@ -232,10 +219,9 @@ class ResidualUnit(nn.Layer):
padding
=
int
((
kernel_size
-
1
)
//
2
),
padding
=
int
((
kernel_size
-
1
)
//
2
),
groups
=
mid_channels
,
groups
=
mid_channels
,
if_act
=
True
,
if_act
=
True
,
act
=
act
,
act
=
act
)
name
=
name
+
"_depthwise"
)
if
self
.
if_se
:
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_channels
,
name
=
name
+
"_se"
)
self
.
mid_se
=
SEModule
(
mid_channels
)
self
.
linear_conv
=
ConvBNLayer
(
self
.
linear_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
out_channels
=
out_channels
,
...
@@ -243,8 +229,7 @@ class ResidualUnit(nn.Layer):
...
@@ -243,8 +229,7 @@ class ResidualUnit(nn.Layer):
stride
=
1
,
stride
=
1
,
padding
=
0
,
padding
=
0
,
if_act
=
False
,
if_act
=
False
,
act
=
None
,
act
=
None
)
name
=
name
+
"_linear"
)
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
x
=
self
.
expand_conv
(
inputs
)
...
@@ -258,7 +243,7 @@ class ResidualUnit(nn.Layer):
...
@@ -258,7 +243,7 @@ class ResidualUnit(nn.Layer):
class
SEModule
(
nn
.
Layer
):
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
,
name
=
""
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
nn
.
Conv2D
(
self
.
conv1
=
nn
.
Conv2D
(
...
@@ -266,17 +251,13 @@ class SEModule(nn.Layer):
...
@@ -266,17 +251,13 @@ class SEModule(nn.Layer):
out_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
//
reduction
,
kernel_size
=
1
,
kernel_size
=
1
,
stride
=
1
,
stride
=
1
,
padding
=
0
,
padding
=
0
)
weight_attr
=
ParamAttr
(
name
=
name
+
"_1_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_1_offset"
))
self
.
conv2
=
nn
.
Conv2D
(
self
.
conv2
=
nn
.
Conv2D
(
in_channels
=
in_channels
//
reduction
,
in_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
,
out_channels
=
in_channels
,
kernel_size
=
1
,
kernel_size
=
1
,
stride
=
1
,
stride
=
1
,
padding
=
0
,
padding
=
0
)
weight_attr
=
ParamAttr
(
name
+
"_2_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_2_offset"
))
def
forward
(
self
,
inputs
):
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
avg_pool
(
inputs
)
...
...
ppocr/modeling/backbones/rec_mobilenet_v3.py
View file @
83303bc7
...
@@ -96,8 +96,7 @@ class MobileNetV3(nn.Layer):
...
@@ -96,8 +96,7 @@ class MobileNetV3(nn.Layer):
padding
=
1
,
padding
=
1
,
groups
=
1
,
groups
=
1
,
if_act
=
True
,
if_act
=
True
,
act
=
'hardswish'
,
act
=
'hardswish'
)
name
=
'conv1'
)
i
=
0
i
=
0
block_list
=
[]
block_list
=
[]
inplanes
=
make_divisible
(
inplanes
*
scale
)
inplanes
=
make_divisible
(
inplanes
*
scale
)
...
@@ -110,8 +109,7 @@ class MobileNetV3(nn.Layer):
...
@@ -110,8 +109,7 @@ class MobileNetV3(nn.Layer):
kernel_size
=
k
,
kernel_size
=
k
,
stride
=
s
,
stride
=
s
,
use_se
=
se
,
use_se
=
se
,
act
=
nl
,
act
=
nl
))
name
=
'conv'
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
i
+=
1
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
...
@@ -124,8 +122,7 @@ class MobileNetV3(nn.Layer):
...
@@ -124,8 +122,7 @@ class MobileNetV3(nn.Layer):
padding
=
0
,
padding
=
0
,
groups
=
1
,
groups
=
1
,
if_act
=
True
,
if_act
=
True
,
act
=
'hardswish'
,
act
=
'hardswish'
)
name
=
'conv_last'
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
...
...
ppocr/modeling/backbones/rec_mv1_enhance.py
0 → 100644
View file @
83303bc7
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
import
math
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
,
reshape
,
transpose
,
concat
,
split
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
import
math
from
paddle.nn.functional
import
hardswish
,
hardsigmoid
from
paddle.regularizer
import
L2Decay
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
filter_size
,
num_filters
,
stride
,
padding
,
channels
=
None
,
num_groups
=
1
,
act
=
'hard_swish'
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
weight_attr
=
ParamAttr
(
initializer
=
KaimingNormal
()),
bias_attr
=
False
)
self
.
_batch_norm
=
BatchNorm
(
num_filters
,
act
=
act
,
param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)))
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
DepthwiseSeparable
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
scale
,
dw_size
=
3
,
padding
=
1
,
use_se
=
False
):
super
(
DepthwiseSeparable
,
self
).
__init__
()
self
.
use_se
=
use_se
self
.
_depthwise_conv
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_filters
=
int
(
num_filters1
*
scale
),
filter_size
=
dw_size
,
stride
=
stride
,
padding
=
padding
,
num_groups
=
int
(
num_groups
*
scale
))
if
use_se
:
self
.
_se
=
SEModule
(
int
(
num_filters1
*
scale
))
self
.
_pointwise_conv
=
ConvBNLayer
(
num_channels
=
int
(
num_filters1
*
scale
),
filter_size
=
1
,
num_filters
=
int
(
num_filters2
*
scale
),
stride
=
1
,
padding
=
0
)
def
forward
(
self
,
inputs
):
y
=
self
.
_depthwise_conv
(
inputs
)
if
self
.
use_se
:
y
=
self
.
_se
(
y
)
y
=
self
.
_pointwise_conv
(
y
)
return
y
class
MobileNetV1Enhance
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
scale
=
0.5
,
**
kwargs
):
super
().
__init__
()
self
.
scale
=
scale
self
.
block_list
=
[]
self
.
conv1
=
ConvBNLayer
(
num_channels
=
3
,
filter_size
=
3
,
channels
=
3
,
num_filters
=
int
(
32
*
scale
),
stride
=
2
,
padding
=
1
)
conv2_1
=
DepthwiseSeparable
(
num_channels
=
int
(
32
*
scale
),
num_filters1
=
32
,
num_filters2
=
64
,
num_groups
=
32
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_1
)
conv2_2
=
DepthwiseSeparable
(
num_channels
=
int
(
64
*
scale
),
num_filters1
=
64
,
num_filters2
=
128
,
num_groups
=
64
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_2
)
conv3_1
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
128
,
num_groups
=
128
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv3_1
)
conv3_2
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
256
,
num_groups
=
128
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv3_2
)
conv4_1
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
256
,
num_groups
=
256
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv4_1
)
conv4_2
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
512
,
num_groups
=
256
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv4_2
)
for
_
in
range
(
5
):
conv5
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
512
,
num_groups
=
512
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
False
)
self
.
block_list
.
append
(
conv5
)
conv5_6
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
1024
,
num_groups
=
512
,
stride
=
(
2
,
1
),
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
True
)
self
.
block_list
.
append
(
conv5_6
)
conv6
=
DepthwiseSeparable
(
num_channels
=
int
(
1024
*
scale
),
num_filters1
=
1024
,
num_filters2
=
1024
,
num_groups
=
1024
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
use_se
=
True
,
scale
=
scale
)
self
.
block_list
.
append
(
conv6
)
self
.
block_list
=
nn
.
Sequential
(
*
self
.
block_list
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
int
(
1024
*
scale
)
def
forward
(
self
,
inputs
):
y
=
self
.
conv1
(
inputs
)
y
=
self
.
block_list
(
y
)
y
=
self
.
pool
(
y
)
return
y
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
channel
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
Conv2D
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
self
.
conv2
=
Conv2D
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
hardsigmoid
(
outputs
)
return
paddle
.
multiply
(
x
=
inputs
,
y
=
outputs
)
ppocr/modeling/backbones/rec_nrtr_mtb.py
0 → 100644
View file @
83303bc7
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
nn
import
paddle
class
MTB
(
nn
.
Layer
):
def
__init__
(
self
,
cnn_num
,
in_channels
):
super
(
MTB
,
self
).
__init__
()
self
.
block
=
nn
.
Sequential
()
self
.
out_channels
=
in_channels
self
.
cnn_num
=
cnn_num
if
self
.
cnn_num
==
2
:
for
i
in
range
(
self
.
cnn_num
):
self
.
block
.
add_sublayer
(
'conv_{}'
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
in_channels
if
i
==
0
else
32
*
(
2
**
(
i
-
1
)),
out_channels
=
32
*
(
2
**
i
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
self
.
block
.
add_sublayer
(
'relu_{}'
.
format
(
i
),
nn
.
ReLU
())
self
.
block
.
add_sublayer
(
'bn_{}'
.
format
(
i
),
nn
.
BatchNorm2D
(
32
*
(
2
**
i
)))
def
forward
(
self
,
images
):
x
=
self
.
block
(
images
)
if
self
.
cnn_num
==
2
:
# (b, w, h, c)
x
=
paddle
.
transpose
(
x
,
[
0
,
3
,
2
,
1
])
x_shape
=
paddle
.
shape
(
x
)
x
=
paddle
.
reshape
(
x
,
[
x_shape
[
0
],
x_shape
[
1
],
x_shape
[
2
]
*
x_shape
[
3
]])
return
x
ppocr/modeling/backbones/rec_resnet_31.py
0 → 100644
View file @
83303bc7
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
numpy
as
np
__all__
=
[
"ResNet31"
]
def
conv3x3
(
in_channel
,
out_channel
,
stride
=
1
):
return
nn
.
Conv2D
(
in_channel
,
out_channel
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
class
BasicBlock
(
nn
.
Layer
):
expansion
=
1
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
downsample
=
False
):
super
().
__init__
()
self
.
conv1
=
conv3x3
(
in_channels
,
channels
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2D
(
channels
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
conv3x3
(
channels
,
channels
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
)
self
.
downsample
=
downsample
if
downsample
:
self
.
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
channels
*
self
.
expansion
,
1
,
stride
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
channels
*
self
.
expansion
),
)
else
:
self
.
downsample
=
nn
.
Sequential
()
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet31
(
nn
.
Layer
):
'''
Args:
in_channels (int): Number of channels of input image tensor.
layers (list[int]): List of BasicBlock number for each stage.
channels (list[int]): List of out_channels of Conv2d layer.
out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
'''
def
__init__
(
self
,
in_channels
=
3
,
layers
=
[
1
,
2
,
5
,
3
],
channels
=
[
64
,
128
,
256
,
256
,
512
,
512
,
512
],
out_indices
=
None
,
last_stage_pool
=
False
):
super
(
ResNet31
,
self
).
__init__
()
assert
isinstance
(
in_channels
,
int
)
assert
isinstance
(
last_stage_pool
,
bool
)
self
.
out_indices
=
out_indices
self
.
last_stage_pool
=
last_stage_pool
# conv 1 (Conv Conv)
self
.
conv1_1
=
nn
.
Conv2D
(
in_channels
,
channels
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_1
=
nn
.
BatchNorm2D
(
channels
[
0
])
self
.
relu1_1
=
nn
.
ReLU
()
self
.
conv1_2
=
nn
.
Conv2D
(
channels
[
0
],
channels
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_2
=
nn
.
BatchNorm2D
(
channels
[
1
])
self
.
relu1_2
=
nn
.
ReLU
()
# conv 2 (Max-pooling, Residual block, Conv)
self
.
pool2
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block2
=
self
.
_make_layer
(
channels
[
1
],
channels
[
2
],
layers
[
0
])
self
.
conv2
=
nn
.
Conv2D
(
channels
[
2
],
channels
[
2
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
[
2
])
self
.
relu2
=
nn
.
ReLU
()
# conv 3 (Max-pooling, Residual block, Conv)
self
.
pool3
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block3
=
self
.
_make_layer
(
channels
[
2
],
channels
[
3
],
layers
[
1
])
self
.
conv3
=
nn
.
Conv2D
(
channels
[
3
],
channels
[
3
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn3
=
nn
.
BatchNorm2D
(
channels
[
3
])
self
.
relu3
=
nn
.
ReLU
()
# conv 4 (Max-pooling, Residual block, Conv)
self
.
pool4
=
nn
.
MaxPool2D
(
kernel_size
=
(
2
,
1
),
stride
=
(
2
,
1
),
padding
=
0
,
ceil_mode
=
True
)
self
.
block4
=
self
.
_make_layer
(
channels
[
3
],
channels
[
4
],
layers
[
2
])
self
.
conv4
=
nn
.
Conv2D
(
channels
[
4
],
channels
[
4
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn4
=
nn
.
BatchNorm2D
(
channels
[
4
])
self
.
relu4
=
nn
.
ReLU
()
# conv 5 ((Max-pooling), Residual block, Conv)
self
.
pool5
=
None
if
self
.
last_stage_pool
:
self
.
pool5
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block5
=
self
.
_make_layer
(
channels
[
4
],
channels
[
5
],
layers
[
3
])
self
.
conv5
=
nn
.
Conv2D
(
channels
[
5
],
channels
[
5
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn5
=
nn
.
BatchNorm2D
(
channels
[
5
])
self
.
relu5
=
nn
.
ReLU
()
self
.
out_channels
=
channels
[
-
1
]
def
_make_layer
(
self
,
input_channels
,
output_channels
,
blocks
):
layers
=
[]
for
_
in
range
(
blocks
):
downsample
=
None
if
input_channels
!=
output_channels
:
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
input_channels
,
output_channels
,
kernel_size
=
1
,
stride
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
output_channels
),
)
layers
.
append
(
BasicBlock
(
input_channels
,
output_channels
,
downsample
=
downsample
))
input_channels
=
output_channels
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1_1
(
x
)
x
=
self
.
bn1_1
(
x
)
x
=
self
.
relu1_1
(
x
)
x
=
self
.
conv1_2
(
x
)
x
=
self
.
bn1_2
(
x
)
x
=
self
.
relu1_2
(
x
)
outs
=
[]
for
i
in
range
(
4
):
layer_index
=
i
+
2
pool_layer
=
getattr
(
self
,
f
'pool
{
layer_index
}
'
)
block_layer
=
getattr
(
self
,
f
'block
{
layer_index
}
'
)
conv_layer
=
getattr
(
self
,
f
'conv
{
layer_index
}
'
)
bn_layer
=
getattr
(
self
,
f
'bn
{
layer_index
}
'
)
relu_layer
=
getattr
(
self
,
f
'relu
{
layer_index
}
'
)
if
pool_layer
is
not
None
:
x
=
pool_layer
(
x
)
x
=
block_layer
(
x
)
x
=
conv_layer
(
x
)
x
=
bn_layer
(
x
)
x
=
relu_layer
(
x
)
outs
.
append
(
x
)
if
self
.
out_indices
is
not
None
:
return
tuple
([
outs
[
i
]
for
i
in
self
.
out_indices
])
return
x
ppocr/modeling/backbones/rec_resnet_aster.py
0 → 100644
View file @
83303bc7
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
paddle
import
paddle.nn
as
nn
import
sys
import
math
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
):
"""3x3 convolution with padding"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
def
conv1x1
(
in_planes
,
out_planes
,
stride
=
1
):
"""1x1 convolution"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
1
,
stride
=
stride
,
bias_attr
=
False
)
def
get_sinusoid_encoding
(
n_position
,
feat_dim
,
wave_length
=
10000
):
# [n_position]
positions
=
paddle
.
arange
(
0
,
n_position
)
# [feat_dim]
dim_range
=
paddle
.
arange
(
0
,
feat_dim
)
dim_range
=
paddle
.
pow
(
wave_length
,
2
*
(
dim_range
//
2
)
/
feat_dim
)
# [n_position, feat_dim]
angles
=
paddle
.
unsqueeze
(
positions
,
axis
=
1
)
/
paddle
.
unsqueeze
(
dim_range
,
axis
=
0
)
angles
=
paddle
.
cast
(
angles
,
"float32"
)
angles
[:,
0
::
2
]
=
paddle
.
sin
(
angles
[:,
0
::
2
])
angles
[:,
1
::
2
]
=
paddle
.
cos
(
angles
[:,
1
::
2
])
return
angles
class
AsterBlock
(
nn
.
Layer
):
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
):
super
(
AsterBlock
,
self
).
__init__
()
self
.
conv1
=
conv1x1
(
inplanes
,
planes
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2D
(
planes
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
conv3x3
(
planes
,
planes
)
self
.
bn2
=
nn
.
BatchNorm2D
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet_ASTER
(
nn
.
Layer
):
"""For aster or crnn"""
def
__init__
(
self
,
with_lstm
=
True
,
n_group
=
1
,
in_channels
=
3
):
super
(
ResNet_ASTER
,
self
).
__init__
()
self
.
with_lstm
=
with_lstm
self
.
n_group
=
n_group
self
.
layer0
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
32
,
kernel_size
=
(
3
,
3
),
stride
=
1
,
padding
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
32
),
nn
.
ReLU
())
self
.
inplanes
=
32
self
.
layer1
=
self
.
_make_layer
(
32
,
3
,
[
2
,
2
])
# [16, 50]
self
.
layer2
=
self
.
_make_layer
(
64
,
4
,
[
2
,
2
])
# [8, 25]
self
.
layer3
=
self
.
_make_layer
(
128
,
6
,
[
2
,
1
])
# [4, 25]
self
.
layer4
=
self
.
_make_layer
(
256
,
6
,
[
2
,
1
])
# [2, 25]
self
.
layer5
=
self
.
_make_layer
(
512
,
3
,
[
2
,
1
])
# [1, 25]
if
with_lstm
:
self
.
rnn
=
nn
.
LSTM
(
512
,
256
,
direction
=
"bidirect"
,
num_layers
=
2
)
self
.
out_channels
=
2
*
256
else
:
self
.
out_channels
=
512
def
_make_layer
(
self
,
planes
,
blocks
,
stride
):
downsample
=
None
if
stride
!=
[
1
,
1
]
or
self
.
inplanes
!=
planes
:
downsample
=
nn
.
Sequential
(
conv1x1
(
self
.
inplanes
,
planes
,
stride
),
nn
.
BatchNorm2D
(
planes
))
layers
=
[]
layers
.
append
(
AsterBlock
(
self
.
inplanes
,
planes
,
stride
,
downsample
))
self
.
inplanes
=
planes
for
_
in
range
(
1
,
blocks
):
layers
.
append
(
AsterBlock
(
self
.
inplanes
,
planes
))
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x0
=
self
.
layer0
(
x
)
x1
=
self
.
layer1
(
x0
)
x2
=
self
.
layer2
(
x1
)
x3
=
self
.
layer3
(
x2
)
x4
=
self
.
layer4
(
x3
)
x5
=
self
.
layer5
(
x4
)
cnn_feat
=
x5
.
squeeze
(
2
)
# [N, c, w]
cnn_feat
=
paddle
.
transpose
(
cnn_feat
,
perm
=
[
0
,
2
,
1
])
if
self
.
with_lstm
:
rnn_feat
,
_
=
self
.
rnn
(
cnn_feat
)
return
rnn_feat
else
:
return
cnn_feat
Prev
1
…
12
13
14
15
16
17
18
19
20
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment