Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
19eb7eb8
Commit
19eb7eb8
authored
Sep 03, 2021
by
Leif
Browse files
Merge remote-tracking branch 'origin/dygraph' into dy1
parents
0afe6c32
03b7daa5
Changes
364
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1325 additions
and
81 deletions
+1325
-81
ppocr/metrics/eval_det_iou.py
ppocr/metrics/eval_det_iou.py
+4
-3
ppocr/metrics/rec_metric.py
ppocr/metrics/rec_metric.py
+1
-0
ppocr/metrics/table_metric.py
ppocr/metrics/table_metric.py
+50
-0
ppocr/modeling/architectures/__init__.py
ppocr/modeling/architectures/__init__.py
+12
-4
ppocr/modeling/architectures/base_model.py
ppocr/modeling/architectures/base_model.py
+14
-7
ppocr/modeling/architectures/distillation_model.py
ppocr/modeling/architectures/distillation_model.py
+60
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+18
-7
ppocr/modeling/backbones/det_mobilenet_v3.py
ppocr/modeling/backbones/det_mobilenet_v3.py
+13
-32
ppocr/modeling/backbones/e2e_resnet_vd_pg.py
ppocr/modeling/backbones/e2e_resnet_vd_pg.py
+265
-0
ppocr/modeling/backbones/rec_mobilenet_v3.py
ppocr/modeling/backbones/rec_mobilenet_v3.py
+3
-6
ppocr/modeling/backbones/rec_mv1_enhance.py
ppocr/modeling/backbones/rec_mv1_enhance.py
+256
-0
ppocr/modeling/backbones/rec_nrtr_mtb.py
ppocr/modeling/backbones/rec_nrtr_mtb.py
+46
-0
ppocr/modeling/backbones/rec_resnet_vd.py
ppocr/modeling/backbones/rec_resnet_vd.py
+1
-1
ppocr/modeling/backbones/table_mobilenet_v3.py
ppocr/modeling/backbones/table_mobilenet_v3.py
+287
-0
ppocr/modeling/backbones/table_resnet_vd.py
ppocr/modeling/backbones/table_resnet_vd.py
+280
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+6
-1
ppocr/modeling/heads/cls_head.py
ppocr/modeling/heads/cls_head.py
+1
-1
ppocr/modeling/heads/det_db_head.py
ppocr/modeling/heads/det_db_head.py
+6
-17
ppocr/modeling/heads/det_east_head.py
ppocr/modeling/heads/det_east_head.py
+1
-1
ppocr/modeling/heads/det_sast_head.py
ppocr/modeling/heads/det_sast_head.py
+1
-1
No files found.
ppocr/metrics/eval_det_iou.py
View file @
19eb7eb8
...
...
@@ -150,7 +150,7 @@ class DetectionIoUEvaluator(object):
pairs
.
append
({
'gt'
:
gtNum
,
'det'
:
detNum
})
detMatchedNums
.
append
(
detNum
)
evaluationLog
+=
"Match GT #"
+
\
str
(
gtNum
)
+
" with Det #"
+
str
(
detNum
)
+
"
\n
"
str
(
gtNum
)
+
" with Det #"
+
str
(
detNum
)
+
"
\n
"
numGtCare
=
(
len
(
gtPols
)
-
len
(
gtDontCarePolsNum
))
numDetCare
=
(
len
(
detPols
)
-
len
(
detDontCarePolsNum
))
...
...
@@ -162,7 +162,7 @@ class DetectionIoUEvaluator(object):
precision
=
0
if
numDetCare
==
0
else
float
(
detMatched
)
/
numDetCare
hmean
=
0
if
(
precision
+
recall
)
==
0
else
2.0
*
\
precision
*
recall
/
(
precision
+
recall
)
precision
*
recall
/
(
precision
+
recall
)
matchedSum
+=
detMatched
numGlobalCareGt
+=
numGtCare
...
...
@@ -200,7 +200,8 @@ class DetectionIoUEvaluator(object):
methodPrecision
=
0
if
numGlobalCareDet
==
0
else
float
(
matchedSum
)
/
numGlobalCareDet
methodHmean
=
0
if
methodRecall
+
methodPrecision
==
0
else
2
*
\
methodRecall
*
methodPrecision
/
(
methodRecall
+
methodPrecision
)
methodRecall
*
methodPrecision
/
(
methodRecall
+
methodPrecision
)
# print(methodRecall, methodPrecision, methodHmean)
# sys.exit(-1)
methodMetrics
=
{
...
...
ppocr/metrics/rec_metric.py
View file @
19eb7eb8
...
...
@@ -57,3 +57,4 @@ class RecMetric(object):
self
.
correct_num
=
0
self
.
all_num
=
0
self
.
norm_edit_dis
=
0
ppocr/metrics/table_metric.py
0 → 100644
View file @
19eb7eb8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
class
TableMetric
(
object
):
def
__init__
(
self
,
main_indicator
=
'acc'
,
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
reset
()
def
__call__
(
self
,
pred
,
batch
,
*
args
,
**
kwargs
):
structure_probs
=
pred
[
'structure_probs'
].
numpy
()
structure_labels
=
batch
[
1
]
correct_num
=
0
all_num
=
0
structure_probs
=
np
.
argmax
(
structure_probs
,
axis
=
2
)
structure_labels
=
structure_labels
[:,
1
:]
batch_size
=
structure_probs
.
shape
[
0
]
for
bno
in
range
(
batch_size
):
all_num
+=
1
if
(
structure_probs
[
bno
]
==
structure_labels
[
bno
]).
all
():
correct_num
+=
1
self
.
correct_num
+=
correct_num
self
.
all_num
+=
all_num
return
{
'acc'
:
correct_num
*
1.0
/
all_num
,
}
def
get_metric
(
self
):
"""
return metrics {
'acc': 0,
}
"""
acc
=
1.0
*
self
.
correct_num
/
self
.
all_num
self
.
reset
()
return
{
'acc'
:
acc
}
def
reset
(
self
):
self
.
correct_num
=
0
self
.
all_num
=
0
ppocr/modeling/architectures/__init__.py
View file @
19eb7eb8
...
...
@@ -13,12 +13,20 @@
# limitations under the License.
import
copy
import
importlib
from
.base_model
import
BaseModel
from
.distillation_model
import
DistillationModel
__all__
=
[
'build_model'
]
def
build_model
(
config
):
from
.base_model
import
BaseModel
config
=
copy
.
deepcopy
(
config
)
module_class
=
BaseModel
(
config
)
return
module_class
\ No newline at end of file
if
not
"name"
in
config
:
arch
=
BaseModel
(
config
)
else
:
name
=
config
.
pop
(
"name"
)
mod
=
importlib
.
import_module
(
__name__
)
arch
=
getattr
(
mod
,
name
)(
config
)
return
arch
ppocr/modeling/architectures/base_model.py
View file @
19eb7eb8
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
...
...
@@ -14,7 +14,6 @@
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
from
ppocr.modeling.transforms
import
build_transform
from
ppocr.modeling.backbones
import
build_backbone
...
...
@@ -32,7 +31,6 @@ class BaseModel(nn.Layer):
config (dict): the super parameters for module.
"""
super
(
BaseModel
,
self
).
__init__
()
in_channels
=
config
.
get
(
'in_channels'
,
3
)
model_type
=
config
[
'model_type'
]
# build transfrom,
...
...
@@ -68,14 +66,23 @@ class BaseModel(nn.Layer):
config
[
"Head"
][
'in_channels'
]
=
in_channels
self
.
head
=
build_head
(
config
[
"Head"
])
self
.
return_all_feats
=
config
.
get
(
"return_all_feats"
,
False
)
def
forward
(
self
,
x
,
data
=
None
):
y
=
dict
()
if
self
.
use_transform
:
x
=
self
.
transform
(
x
)
x
=
self
.
backbone
(
x
)
y
[
"backbone_out"
]
=
x
if
self
.
use_neck
:
x
=
self
.
neck
(
x
)
if
data
is
None
:
x
=
self
.
head
(
x
)
y
[
"neck_out"
]
=
x
x
=
self
.
head
(
x
,
targets
=
data
)
if
isinstance
(
x
,
dict
):
y
.
update
(
x
)
else
:
y
[
"head_out"
]
=
x
if
self
.
return_all_feats
:
return
y
else
:
x
=
self
.
head
(
x
,
data
)
return
x
return
x
ppocr/modeling/architectures/distillation_model.py
0 → 100644
View file @
19eb7eb8
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
from
ppocr.modeling.transforms
import
build_transform
from
ppocr.modeling.backbones
import
build_backbone
from
ppocr.modeling.necks
import
build_neck
from
ppocr.modeling.heads
import
build_head
from
.base_model
import
BaseModel
from
ppocr.utils.save_load
import
init_model
,
load_pretrained_params
__all__
=
[
'DistillationModel'
]
class
DistillationModel
(
nn
.
Layer
):
def
__init__
(
self
,
config
):
"""
the module for OCR distillation.
args:
config (dict): the super parameters for module.
"""
super
().
__init__
()
self
.
model_list
=
[]
self
.
model_name_list
=
[]
for
key
in
config
[
"Models"
]:
model_config
=
config
[
"Models"
][
key
]
freeze_params
=
False
pretrained
=
None
if
"freeze_params"
in
model_config
:
freeze_params
=
model_config
.
pop
(
"freeze_params"
)
if
"pretrained"
in
model_config
:
pretrained
=
model_config
.
pop
(
"pretrained"
)
model
=
BaseModel
(
model_config
)
if
pretrained
is
not
None
:
load_pretrained_params
(
model
,
pretrained
)
if
freeze_params
:
for
param
in
model
.
parameters
():
param
.
trainable
=
False
self
.
model_list
.
append
(
self
.
add_sublayer
(
key
,
model
))
self
.
model_name_list
.
append
(
key
)
def
forward
(
self
,
x
):
result_dict
=
dict
()
for
idx
,
model_name
in
enumerate
(
self
.
model_name_list
):
result_dict
[
model_name
]
=
self
.
model_list
[
idx
](
x
)
return
result_dict
ppocr/modeling/backbones/__init__.py
View file @
19eb7eb8
...
...
@@ -12,26 +12,37 @@
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'
build_backbone
'
]
__all__
=
[
"
build_backbone
"
]
def
build_backbone
(
config
,
model_type
):
if
model_type
==
'
det
'
:
if
model_type
==
"
det
"
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd_sast
import
ResNet_SAST
support_dict
=
[
'
MobileNetV3
'
,
'
ResNet
'
,
'
ResNet_SAST
'
]
elif
model_type
==
'
rec
'
or
model_type
==
'
cls
'
:
support_dict
=
[
"
MobileNetV3
"
,
"
ResNet
"
,
"
ResNet_SAST
"
]
elif
model_type
==
"
rec
"
or
model_type
==
"
cls
"
:
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_resnet_vd
import
ResNet
from
.rec_resnet_fpn
import
ResNetFPN
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
]
from
.rec_mv1_enhance
import
MobileNetV1Enhance
from
.rec_nrtr_mtb
import
MTB
support_dict
=
[
'MobileNetV1Enhance'
,
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
,
'MTB'
]
elif
model_type
==
"e2e"
:
from
.e2e_resnet_vd_pg
import
ResNet
support_dict
=
[
"ResNet"
]
elif
model_type
==
"table"
:
from
.table_resnet_vd
import
ResNet
from
.table_mobilenet_v3
import
MobileNetV3
support_dict
=
[
"ResNet"
,
"MobileNetV3"
]
else
:
raise
NotImplementedError
module_name
=
config
.
pop
(
'
name
'
)
module_name
=
config
.
pop
(
"
name
"
)
assert
module_name
in
support_dict
,
Exception
(
'
when model typs is {}, backbone only support {}
'
.
format
(
model_type
,
"
when model typs is {}, backbone only support {}
"
.
format
(
model_type
,
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
ppocr/modeling/backbones/det_mobilenet_v3.py
View file @
19eb7eb8
...
...
@@ -102,8 +102,7 @@ class MobileNetV3(nn.Layer):
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv1'
)
act
=
'hardswish'
)
self
.
stages
=
[]
self
.
out_channels
=
[]
...
...
@@ -125,8 +124,7 @@ class MobileNetV3(nn.Layer):
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
)))
act
=
nl
))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
block_list
.
append
(
...
...
@@ -138,8 +136,7 @@ class MobileNetV3(nn.Layer):
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv_last'
))
act
=
'hardswish'
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
for
i
,
stage
in
enumerate
(
self
.
stages
):
...
...
@@ -163,8 +160,7 @@ class ConvBNLayer(nn.Layer):
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
act
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
...
...
@@ -175,16 +171,9 @@ class ConvBNLayer(nn.Layer):
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_bn_scale"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_bn_offset"
),
moving_mean_name
=
name
+
"_bn_mean"
,
moving_variance_name
=
name
+
"_bn_variance"
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
None
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
...
...
@@ -209,8 +198,7 @@ class ResidualUnit(nn.Layer):
kernel_size
,
stride
,
use_se
,
act
=
None
,
name
=
''
):
act
=
None
):
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_se
=
use_se
...
...
@@ -222,8 +210,7 @@ class ResidualUnit(nn.Layer):
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_expand"
)
act
=
act
)
self
.
bottleneck_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
...
...
@@ -232,10 +219,9 @@ class ResidualUnit(nn.Layer):
padding
=
int
((
kernel_size
-
1
)
//
2
),
groups
=
mid_channels
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_depthwise"
)
act
=
act
)
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_channels
,
name
=
name
+
"_se"
)
self
.
mid_se
=
SEModule
(
mid_channels
)
self
.
linear_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
...
...
@@ -243,8 +229,7 @@ class ResidualUnit(nn.Layer):
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
name
+
"_linear"
)
act
=
None
)
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
...
...
@@ -258,7 +243,7 @@ class ResidualUnit(nn.Layer):
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
,
name
=
""
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
nn
.
Conv2D
(
...
...
@@ -266,17 +251,13 @@ class SEModule(nn.Layer):
out_channels
=
in_channels
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_1_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_1_offset"
))
padding
=
0
)
self
.
conv2
=
nn
.
Conv2D
(
in_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
+
"_2_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_2_offset"
))
padding
=
0
)
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
...
...
ppocr/modeling/backbones/e2e_resnet_vd_pg.py
0 → 100644
View file @
19eb7eb8
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
__all__
=
[
"ResNet"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
stride
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
super
(
ResNet
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
# depth = [3, 4, 6, 3]
depth
=
[
3
,
4
,
6
,
3
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_channels
=
[
64
,
256
,
512
,
1024
,
2048
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
,
512
]
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
64
,
kernel_size
=
7
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1_1"
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
stages
=
[]
self
.
out_channels
=
[
3
,
64
]
# num_filters = [64, 128, 256, 512, 512]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
else
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
basic_block
)
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
def
forward
(
self
,
inputs
):
out
=
[
inputs
]
y
=
self
.
conv1_1
(
inputs
)
out
.
append
(
y
)
y
=
self
.
pool2d_max
(
y
)
for
block
in
self
.
stages
:
y
=
block
(
y
)
out
.
append
(
y
)
return
out
ppocr/modeling/backbones/rec_mobilenet_v3.py
View file @
19eb7eb8
...
...
@@ -96,8 +96,7 @@ class MobileNetV3(nn.Layer):
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv1'
)
act
=
'hardswish'
)
i
=
0
block_list
=
[]
inplanes
=
make_divisible
(
inplanes
*
scale
)
...
...
@@ -110,8 +109,7 @@ class MobileNetV3(nn.Layer):
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
'conv'
+
str
(
i
+
2
)))
act
=
nl
))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
...
...
@@ -124,8 +122,7 @@ class MobileNetV3(nn.Layer):
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv_last'
)
act
=
'hardswish'
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
...
...
ppocr/modeling/backbones/rec_mv1_enhance.py
0 → 100644
View file @
19eb7eb8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
import
math
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
,
reshape
,
transpose
,
concat
,
split
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
import
math
from
paddle.nn.functional
import
hardswish
,
hardsigmoid
from
paddle.regularizer
import
L2Decay
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
filter_size
,
num_filters
,
stride
,
padding
,
channels
=
None
,
num_groups
=
1
,
act
=
'hard_swish'
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
weight_attr
=
ParamAttr
(
initializer
=
KaimingNormal
()),
bias_attr
=
False
)
self
.
_batch_norm
=
BatchNorm
(
num_filters
,
act
=
act
,
param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)))
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
DepthwiseSeparable
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
scale
,
dw_size
=
3
,
padding
=
1
,
use_se
=
False
):
super
(
DepthwiseSeparable
,
self
).
__init__
()
self
.
use_se
=
use_se
self
.
_depthwise_conv
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_filters
=
int
(
num_filters1
*
scale
),
filter_size
=
dw_size
,
stride
=
stride
,
padding
=
padding
,
num_groups
=
int
(
num_groups
*
scale
))
if
use_se
:
self
.
_se
=
SEModule
(
int
(
num_filters1
*
scale
))
self
.
_pointwise_conv
=
ConvBNLayer
(
num_channels
=
int
(
num_filters1
*
scale
),
filter_size
=
1
,
num_filters
=
int
(
num_filters2
*
scale
),
stride
=
1
,
padding
=
0
)
def
forward
(
self
,
inputs
):
y
=
self
.
_depthwise_conv
(
inputs
)
if
self
.
use_se
:
y
=
self
.
_se
(
y
)
y
=
self
.
_pointwise_conv
(
y
)
return
y
class
MobileNetV1Enhance
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
scale
=
0.5
,
**
kwargs
):
super
().
__init__
()
self
.
scale
=
scale
self
.
block_list
=
[]
self
.
conv1
=
ConvBNLayer
(
num_channels
=
3
,
filter_size
=
3
,
channels
=
3
,
num_filters
=
int
(
32
*
scale
),
stride
=
2
,
padding
=
1
)
conv2_1
=
DepthwiseSeparable
(
num_channels
=
int
(
32
*
scale
),
num_filters1
=
32
,
num_filters2
=
64
,
num_groups
=
32
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_1
)
conv2_2
=
DepthwiseSeparable
(
num_channels
=
int
(
64
*
scale
),
num_filters1
=
64
,
num_filters2
=
128
,
num_groups
=
64
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_2
)
conv3_1
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
128
,
num_groups
=
128
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv3_1
)
conv3_2
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
256
,
num_groups
=
128
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv3_2
)
conv4_1
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
256
,
num_groups
=
256
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv4_1
)
conv4_2
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
512
,
num_groups
=
256
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv4_2
)
for
_
in
range
(
5
):
conv5
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
512
,
num_groups
=
512
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
False
)
self
.
block_list
.
append
(
conv5
)
conv5_6
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
1024
,
num_groups
=
512
,
stride
=
(
2
,
1
),
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
True
)
self
.
block_list
.
append
(
conv5_6
)
conv6
=
DepthwiseSeparable
(
num_channels
=
int
(
1024
*
scale
),
num_filters1
=
1024
,
num_filters2
=
1024
,
num_groups
=
1024
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
use_se
=
True
,
scale
=
scale
)
self
.
block_list
.
append
(
conv6
)
self
.
block_list
=
nn
.
Sequential
(
*
self
.
block_list
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
int
(
1024
*
scale
)
def
forward
(
self
,
inputs
):
y
=
self
.
conv1
(
inputs
)
y
=
self
.
block_list
(
y
)
y
=
self
.
pool
(
y
)
return
y
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
channel
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
Conv2D
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
self
.
conv2
=
Conv2D
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
hardsigmoid
(
outputs
)
return
paddle
.
multiply
(
x
=
inputs
,
y
=
outputs
)
ppocr/modeling/backbones/rec_nrtr_mtb.py
0 → 100644
View file @
19eb7eb8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
nn
class
MTB
(
nn
.
Layer
):
def
__init__
(
self
,
cnn_num
,
in_channels
):
super
(
MTB
,
self
).
__init__
()
self
.
block
=
nn
.
Sequential
()
self
.
out_channels
=
in_channels
self
.
cnn_num
=
cnn_num
if
self
.
cnn_num
==
2
:
for
i
in
range
(
self
.
cnn_num
):
self
.
block
.
add_sublayer
(
'conv_{}'
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
in_channels
if
i
==
0
else
32
*
(
2
**
(
i
-
1
)),
out_channels
=
32
*
(
2
**
i
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
self
.
block
.
add_sublayer
(
'relu_{}'
.
format
(
i
),
nn
.
ReLU
())
self
.
block
.
add_sublayer
(
'bn_{}'
.
format
(
i
),
nn
.
BatchNorm2D
(
32
*
(
2
**
i
)))
def
forward
(
self
,
images
):
x
=
self
.
block
(
images
)
if
self
.
cnn_num
==
2
:
# (b, w, h, c)
x
=
x
.
transpose
([
0
,
3
,
2
,
1
])
x_shape
=
x
.
shape
x
=
x
.
reshape
([
x_shape
[
0
],
x_shape
[
1
],
x_shape
[
2
]
*
x_shape
[
3
]])
return
x
ppocr/modeling/backbones/rec_resnet_vd.py
View file @
19eb7eb8
...
...
@@ -249,7 +249,7 @@ class ResNet(nn.Layer):
name
=
conv_name
))
shortcut
=
True
self
.
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
=
num_filters
[
block
]
self
.
out_channels
=
num_filters
[
block
]
*
4
else
:
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
...
...
ppocr/modeling/backbones/table_mobilenet_v3.py
0 → 100644
View file @
19eb7eb8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
__all__
=
[
'MobileNetV3'
]
def
make_divisible
(
v
,
divisor
=
8
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
'large'
,
scale
=
0.5
,
disable_se
=
False
,
**
kwargs
):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
super
(
MobileNetV3
,
self
).
__init__
()
self
.
disable_se
=
disable_se
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
1
],
[
3
,
64
,
24
,
False
,
'relu'
,
2
],
[
3
,
72
,
24
,
False
,
'relu'
,
1
],
[
5
,
72
,
40
,
True
,
'relu'
,
2
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
3
,
240
,
80
,
False
,
'hardswish'
,
2
],
[
3
,
200
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
184
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
184
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
480
,
112
,
True
,
'hardswish'
,
1
],
[
3
,
672
,
112
,
True
,
'hardswish'
,
1
],
[
5
,
672
,
160
,
True
,
'hardswish'
,
2
],
[
5
,
960
,
160
,
True
,
'hardswish'
,
1
],
[
5
,
960
,
160
,
True
,
'hardswish'
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
2
],
[
3
,
72
,
24
,
False
,
'relu'
,
2
],
[
3
,
88
,
24
,
False
,
'relu'
,
1
],
[
5
,
96
,
40
,
True
,
'hardswish'
,
2
],
[
5
,
240
,
40
,
True
,
'hardswish'
,
1
],
[
5
,
240
,
40
,
True
,
'hardswish'
,
1
],
[
5
,
120
,
48
,
True
,
'hardswish'
,
1
],
[
5
,
144
,
48
,
True
,
'hardswish'
,
1
],
[
5
,
288
,
96
,
True
,
'hardswish'
,
2
],
[
5
,
576
,
96
,
True
,
'hardswish'
,
1
],
[
5
,
576
,
96
,
True
,
'hardswish'
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
scale
in
supported_scale
,
\
"supported scale are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv1'
)
self
.
stages
=
[]
self
.
out_channels
=
[]
block_list
=
[]
i
=
0
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
cfg
:
se
=
se
and
not
self
.
disable_se
start_idx
=
2
if
model_name
==
'large'
else
0
if
s
==
2
and
i
>
start_idx
:
self
.
out_channels
.
append
(
inplanes
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
block_list
=
[]
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
block_list
.
append
(
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv_last'
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
for
i
,
stage
in
enumerate
(
self
.
stages
):
self
.
add_sublayer
(
sublayer
=
stage
,
name
=
"stage{}"
.
format
(
i
))
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
out_list
=
[]
for
stage
in
self
.
stages
:
x
=
stage
(
x
)
out_list
.
append
(
x
)
return
out_list
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_bn_scale"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_bn_offset"
),
moving_mean_name
=
name
+
"_bn_mean"
,
moving_variance_name
=
name
+
"_bn_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
if
self
.
if_act
:
if
self
.
act
==
"relu"
:
x
=
F
.
relu
(
x
)
elif
self
.
act
==
"hardswish"
:
x
=
F
.
hardswish
(
x
)
else
:
print
(
"The activation function({}) is selected incorrectly."
.
format
(
self
.
act
))
exit
()
return
x
class
ResidualUnit
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
kernel_size
,
stride
,
use_se
,
act
=
None
,
name
=
''
):
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_se
=
use_se
self
.
expand_conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
mid_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_expand"
)
self
.
bottleneck_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
int
((
kernel_size
-
1
)
//
2
),
groups
=
mid_channels
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_depthwise"
)
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_channels
,
name
=
name
+
"_se"
)
self
.
linear_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
name
+
"_linear"
)
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
x
=
self
.
bottleneck_conv
(
x
)
if
self
.
if_se
:
x
=
self
.
mid_se
(
x
)
x
=
self
.
linear_conv
(
x
)
if
self
.
if_shortcut
:
x
=
paddle
.
add
(
inputs
,
x
)
return
x
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
,
name
=
""
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_1_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_1_offset"
))
self
.
conv2
=
nn
.
Conv2D
(
in_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
+
"_2_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_2_offset"
))
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
F
.
hardsigmoid
(
outputs
,
slope
=
0.2
,
offset
=
0.5
)
return
inputs
*
outputs
\ No newline at end of file
ppocr/modeling/backbones/table_resnet_vd.py
0 → 100644
View file @
19eb7eb8
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
__all__
=
[
"ResNet"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
inputs
):
if
self
.
is_vd_mode
:
inputs
=
self
.
_pool2d_avg
(
inputs
)
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
super
(
ResNet
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_channels
=
[
64
,
256
,
512
,
1024
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1_1"
)
self
.
conv1_2
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_2"
)
self
.
conv1_3
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_3"
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
stages
=
[]
self
.
out_channels
=
[]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
else
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
basic_block
)
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
def
forward
(
self
,
inputs
):
y
=
self
.
conv1_1
(
inputs
)
y
=
self
.
conv1_2
(
y
)
y
=
self
.
conv1_3
(
y
)
y
=
self
.
pool2d_max
(
y
)
out
=
[]
for
block
in
self
.
stages
:
y
=
block
(
y
)
out
.
append
(
y
)
return
out
ppocr/modeling/heads/__init__.py
View file @
19eb7eb8
...
...
@@ -20,19 +20,24 @@ def build_head(config):
from
.det_db_head
import
DBHead
from
.det_east_head
import
EASTHead
from
.det_sast_head
import
SASTHead
from
.e2e_pg_head
import
PGHead
# rec head
from
.rec_ctc_head
import
CTCHead
from
.rec_att_head
import
AttentionHead
from
.rec_srn_head
import
SRNHead
from
.rec_nrtr_head
import
Transformer
# cls head
from
.cls_head
import
ClsHead
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
'SRNHead'
,
'PGHead'
,
'Transformer'
,
'TableAttentionHead'
]
#table head
from
.table_att_head
import
TableAttentionHead
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
support_dict
))
...
...
ppocr/modeling/heads/cls_head.py
View file @
19eb7eb8
...
...
@@ -43,7 +43,7 @@ class ClsHead(nn.Layer):
initializer
=
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)),
bias_attr
=
ParamAttr
(
name
=
"fc_0.b_0"
),
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
x
=
self
.
pool
(
x
)
x
=
paddle
.
reshape
(
x
,
shape
=
[
x
.
shape
[
0
],
x
.
shape
[
1
]])
x
=
self
.
fc
(
x
)
...
...
ppocr/modeling/heads/det_db_head.py
View file @
19eb7eb8
...
...
@@ -23,10 +23,10 @@ import paddle.nn.functional as F
from
paddle
import
ParamAttr
def
get_bias_attr
(
k
,
name
):
def
get_bias_attr
(
k
):
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
paddle
.
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
bias_attr
=
ParamAttr
(
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
bias_attr
=
ParamAttr
(
initializer
=
initializer
)
return
bias_attr
...
...
@@ -38,18 +38,14 @@ class Head(nn.Layer):
out_channels
=
in_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
weight_attr
=
ParamAttr
(
name
=
name_list
[
0
]
+
'.w_0'
),
weight_attr
=
ParamAttr
(),
bias_attr
=
False
)
self
.
conv_bn1
=
nn
.
BatchNorm
(
num_channels
=
in_channels
//
4
,
param_attr
=
ParamAttr
(
name
=
name_list
[
1
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
)),
bias_attr
=
ParamAttr
(
name
=
name_list
[
1
]
+
'.b_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1e-4
)),
moving_mean_name
=
name_list
[
1
]
+
'.w_1'
,
moving_variance_name
=
name_list
[
1
]
+
'.w_2'
,
act
=
'relu'
)
self
.
conv2
=
nn
.
Conv2DTranspose
(
in_channels
=
in_channels
//
4
,
...
...
@@ -57,19 +53,14 @@ class Head(nn.Layer):
kernel_size
=
2
,
stride
=
2
,
weight_attr
=
ParamAttr
(
name
=
name_list
[
2
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
KaimingUniform
()),
bias_attr
=
get_bias_attr
(
in_channels
//
4
,
name_list
[
-
1
]
+
"conv2"
))
bias_attr
=
get_bias_attr
(
in_channels
//
4
))
self
.
conv_bn2
=
nn
.
BatchNorm
(
num_channels
=
in_channels
//
4
,
param_attr
=
ParamAttr
(
name
=
name_list
[
3
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
)),
bias_attr
=
ParamAttr
(
name
=
name_list
[
3
]
+
'.b_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1e-4
)),
moving_mean_name
=
name_list
[
3
]
+
'.w_1'
,
moving_variance_name
=
name_list
[
3
]
+
'.w_2'
,
act
=
"relu"
)
self
.
conv3
=
nn
.
Conv2DTranspose
(
in_channels
=
in_channels
//
4
,
...
...
@@ -77,10 +68,8 @@ class Head(nn.Layer):
kernel_size
=
2
,
stride
=
2
,
weight_attr
=
ParamAttr
(
name
=
name_list
[
4
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
KaimingUniform
()),
bias_attr
=
get_bias_attr
(
in_channels
//
4
,
name_list
[
-
1
]
+
"conv3"
),
)
bias_attr
=
get_bias_attr
(
in_channels
//
4
),
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
...
...
@@ -117,7 +106,7 @@ class DBHead(nn.Layer):
def
step_function
(
self
,
x
,
y
):
return
paddle
.
reciprocal
(
1
+
paddle
.
exp
(
-
self
.
k
*
(
x
-
y
)))
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
shrink_maps
=
self
.
binarize
(
x
)
if
not
self
.
training
:
return
{
'maps'
:
shrink_maps
}
...
...
ppocr/modeling/heads/det_east_head.py
View file @
19eb7eb8
...
...
@@ -109,7 +109,7 @@ class EASTHead(nn.Layer):
act
=
None
,
name
=
"f_geo"
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
f_det
=
self
.
det_conv1
(
x
)
f_det
=
self
.
det_conv2
(
f_det
)
f_score
=
self
.
score_conv
(
f_det
)
...
...
ppocr/modeling/heads/det_sast_head.py
View file @
19eb7eb8
...
...
@@ -116,7 +116,7 @@ class SASTHead(nn.Layer):
self
.
head1
=
SAST_Header1
(
in_channels
)
self
.
head2
=
SAST_Header2
(
in_channels
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
f_score
,
f_border
=
self
.
head1
(
x
)
f_tvo
,
f_tco
=
self
.
head2
(
x
)
...
...
Prev
1
…
10
11
12
13
14
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment