Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
adc62fcd
Unverified
Commit
adc62fcd
authored
Aug 17, 2021
by
topduke
Committed by
GitHub
Aug 17, 2021
Browse files
Merge branch 'dygraph' into dygraph
parents
8227ad1b
a81b88a0
Changes
152
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1191 additions
and
44 deletions
+1191
-44
ppocr/metrics/__init__.py
ppocr/metrics/__init__.py
+2
-2
ppocr/metrics/det_metric.py
ppocr/metrics/det_metric.py
+1
-0
ppocr/metrics/distillation_metric.py
ppocr/metrics/distillation_metric.py
+4
-7
ppocr/metrics/table_metric.py
ppocr/metrics/table_metric.py
+50
-0
ppocr/modeling/architectures/base_model.py
ppocr/modeling/architectures/base_model.py
+5
-5
ppocr/modeling/architectures/distillation_model.py
ppocr/modeling/architectures/distillation_model.py
+2
-2
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+14
-10
ppocr/modeling/backbones/rec_mv1_enhance.py
ppocr/modeling/backbones/rec_mv1_enhance.py
+256
-0
ppocr/modeling/backbones/table_mobilenet_v3.py
ppocr/modeling/backbones/table_mobilenet_v3.py
+287
-0
ppocr/modeling/backbones/table_resnet_vd.py
ppocr/modeling/backbones/table_resnet_vd.py
+280
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+5
-1
ppocr/modeling/heads/cls_head.py
ppocr/modeling/heads/cls_head.py
+1
-1
ppocr/modeling/heads/det_db_head.py
ppocr/modeling/heads/det_db_head.py
+1
-1
ppocr/modeling/heads/det_east_head.py
ppocr/modeling/heads/det_east_head.py
+1
-1
ppocr/modeling/heads/det_sast_head.py
ppocr/modeling/heads/det_sast_head.py
+1
-1
ppocr/modeling/heads/e2e_pg_head.py
ppocr/modeling/heads/e2e_pg_head.py
+1
-1
ppocr/modeling/heads/rec_ctc_head.py
ppocr/modeling/heads/rec_ctc_head.py
+38
-10
ppocr/modeling/heads/rec_srn_head.py
ppocr/modeling/heads/rec_srn_head.py
+2
-1
ppocr/modeling/heads/table_att_head.py
ppocr/modeling/heads/table_att_head.py
+238
-0
ppocr/modeling/necks/__init__.py
ppocr/modeling/necks/__init__.py
+2
-1
No files found.
ppocr/metrics/__init__.py
View file @
adc62fcd
...
@@ -26,11 +26,11 @@ from .rec_metric import RecMetric
...
@@ -26,11 +26,11 @@ from .rec_metric import RecMetric
from
.cls_metric
import
ClsMetric
from
.cls_metric
import
ClsMetric
from
.e2e_metric
import
E2EMetric
from
.e2e_metric
import
E2EMetric
from
.distillation_metric
import
DistillationMetric
from
.distillation_metric
import
DistillationMetric
from
.table_metric
import
TableMetric
def
build_metric
(
config
):
def
build_metric
(
config
):
support_dict
=
[
support_dict
=
[
"DetMetric"
,
"RecMetric"
,
"ClsMetric"
,
"E2EMetric"
,
"DistillationMetric"
"DetMetric"
,
"RecMetric"
,
"ClsMetric"
,
"E2EMetric"
,
"DistillationMetric"
,
"TableMetric"
]
]
config
=
copy
.
deepcopy
(
config
)
config
=
copy
.
deepcopy
(
config
)
...
...
ppocr/metrics/det_metric.py
View file @
adc62fcd
...
@@ -55,6 +55,7 @@ class DetMetric(object):
...
@@ -55,6 +55,7 @@ class DetMetric(object):
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list
)
result
=
self
.
evaluator
.
evaluate_image
(
gt_info_list
,
det_info_list
)
self
.
results
.
append
(
result
)
self
.
results
.
append
(
result
)
def
get_metric
(
self
):
def
get_metric
(
self
):
"""
"""
return metrics {
return metrics {
...
...
ppocr/metrics/distillation_metric.py
View file @
adc62fcd
...
@@ -24,8 +24,8 @@ from .cls_metric import ClsMetric
...
@@ -24,8 +24,8 @@ from .cls_metric import ClsMetric
class
DistillationMetric
(
object
):
class
DistillationMetric
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
key
=
None
,
key
=
None
,
base_metric_name
=
"RecMetric"
,
base_metric_name
=
None
,
main_indicator
=
'acc'
,
main_indicator
=
None
,
**
kwargs
):
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
main_indicator
=
main_indicator
self
.
key
=
key
self
.
key
=
key
...
@@ -42,16 +42,13 @@ class DistillationMetric(object):
...
@@ -42,16 +42,13 @@ class DistillationMetric(object):
main_indicator
=
self
.
main_indicator
,
**
self
.
kwargs
)
main_indicator
=
self
.
main_indicator
,
**
self
.
kwargs
)
self
.
metrics
[
key
].
reset
()
self
.
metrics
[
key
].
reset
()
def
__call__
(
self
,
preds
,
*
args
,
**
kwargs
):
def
__call__
(
self
,
preds
,
batch
,
**
kwargs
):
assert
isinstance
(
preds
,
dict
)
assert
isinstance
(
preds
,
dict
)
if
self
.
metrics
is
None
:
if
self
.
metrics
is
None
:
self
.
_init_metrcis
(
preds
)
self
.
_init_metrcis
(
preds
)
output
=
dict
()
output
=
dict
()
for
key
in
preds
:
for
key
in
preds
:
metric
=
self
.
metrics
[
key
].
__call__
(
preds
[
key
],
*
args
,
**
kwargs
)
self
.
metrics
[
key
].
__call__
(
preds
[
key
],
batch
,
**
kwargs
)
for
sub_key
in
metric
:
output
[
"{}_{}"
.
format
(
key
,
sub_key
)]
=
metric
[
sub_key
]
return
output
def
get_metric
(
self
):
def
get_metric
(
self
):
"""
"""
...
...
ppocr/metrics/table_metric.py
0 → 100644
View file @
adc62fcd
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
class
TableMetric
(
object
):
def
__init__
(
self
,
main_indicator
=
'acc'
,
**
kwargs
):
self
.
main_indicator
=
main_indicator
self
.
reset
()
def
__call__
(
self
,
pred
,
batch
,
*
args
,
**
kwargs
):
structure_probs
=
pred
[
'structure_probs'
].
numpy
()
structure_labels
=
batch
[
1
]
correct_num
=
0
all_num
=
0
structure_probs
=
np
.
argmax
(
structure_probs
,
axis
=
2
)
structure_labels
=
structure_labels
[:,
1
:]
batch_size
=
structure_probs
.
shape
[
0
]
for
bno
in
range
(
batch_size
):
all_num
+=
1
if
(
structure_probs
[
bno
]
==
structure_labels
[
bno
]).
all
():
correct_num
+=
1
self
.
correct_num
+=
correct_num
self
.
all_num
+=
all_num
return
{
'acc'
:
correct_num
*
1.0
/
all_num
,
}
def
get_metric
(
self
):
"""
return metrics {
'acc': 0,
}
"""
acc
=
1.0
*
self
.
correct_num
/
self
.
all_num
self
.
reset
()
return
{
'acc'
:
acc
}
def
reset
(
self
):
self
.
correct_num
=
0
self
.
all_num
=
0
ppocr/modeling/architectures/base_model.py
View file @
adc62fcd
# Copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserved.
# Copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserved.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -77,11 +77,11 @@ class BaseModel(nn.Layer):
...
@@ -77,11 +77,11 @@ class BaseModel(nn.Layer):
if
self
.
use_neck
:
if
self
.
use_neck
:
x
=
self
.
neck
(
x
)
x
=
self
.
neck
(
x
)
y
[
"neck_out"
]
=
x
y
[
"neck_out"
]
=
x
if
data
is
None
:
x
=
self
.
head
(
x
,
targets
=
data
)
x
=
self
.
head
(
x
)
if
isinstance
(
x
,
dict
):
y
.
update
(
x
)
else
:
else
:
x
=
self
.
head
(
x
,
data
)
y
[
"head_out"
]
=
x
y
[
"head_out"
]
=
x
if
self
.
return_all_feats
:
if
self
.
return_all_feats
:
return
y
return
y
else
:
else
:
...
...
ppocr/modeling/architectures/distillation_model.py
View file @
adc62fcd
...
@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone
...
@@ -21,7 +21,7 @@ from ppocr.modeling.backbones import build_backbone
from
ppocr.modeling.necks
import
build_neck
from
ppocr.modeling.necks
import
build_neck
from
ppocr.modeling.heads
import
build_head
from
ppocr.modeling.heads
import
build_head
from
.base_model
import
BaseModel
from
.base_model
import
BaseModel
from
ppocr.utils.save_load
import
init_model
from
ppocr.utils.save_load
import
init_model
,
load_pretrained_params
__all__
=
[
'DistillationModel'
]
__all__
=
[
'DistillationModel'
]
...
@@ -46,7 +46,7 @@ class DistillationModel(nn.Layer):
...
@@ -46,7 +46,7 @@ class DistillationModel(nn.Layer):
pretrained
=
model_config
.
pop
(
"pretrained"
)
pretrained
=
model_config
.
pop
(
"pretrained"
)
model
=
BaseModel
(
model_config
)
model
=
BaseModel
(
model_config
)
if
pretrained
is
not
None
:
if
pretrained
is
not
None
:
init_model
(
model
,
path
=
pretrained
)
load_pretrained_params
(
model
,
pretrained
)
if
freeze_params
:
if
freeze_params
:
for
param
in
model
.
parameters
():
for
param
in
model
.
parameters
():
param
.
trainable
=
False
param
.
trainable
=
False
...
...
ppocr/modeling/backbones/__init__.py
View file @
adc62fcd
...
@@ -12,32 +12,36 @@
...
@@ -12,32 +12,36 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
__all__
=
[
'
build_backbone
'
]
__all__
=
[
"
build_backbone
"
]
def
build_backbone
(
config
,
model_type
):
def
build_backbone
(
config
,
model_type
):
if
model_type
==
'
det
'
:
if
model_type
==
"
det
"
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd
import
ResNet
from
.det_resnet_vd_sast
import
ResNet_SAST
from
.det_resnet_vd_sast
import
ResNet_SAST
support_dict
=
[
'
MobileNetV3
'
,
'
ResNet
'
,
'
ResNet_SAST
'
]
support_dict
=
[
"
MobileNetV3
"
,
"
ResNet
"
,
"
ResNet_SAST
"
]
elif
model_type
==
'
rec
'
or
model_type
==
'
cls
'
:
elif
model_type
==
"
rec
"
or
model_type
==
"
cls
"
:
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_resnet_vd
import
ResNet
from
.rec_resnet_vd
import
ResNet
from
.rec_resnet_fpn
import
ResNetFPN
from
.rec_resnet_fpn
import
ResNetFPN
from
.rec_mv1_enhance
import
MobileNetV1Enhance
from
.rec_nrtr_mtb
import
MTB
from
.rec_nrtr_mtb
import
MTB
from
.rec_swin
import
SwinTransformer
from
.rec_swin
import
SwinTransformer
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
,
'MTB'
,
'SwinTransformer'
]
support_dict
=
[
'MobileNetV1Enhance'
,
'MobileNetV3'
,
'ResNet'
,
'ResNetFPN'
,
'MTB'
,
'SwinTransformer'
]
elif
model_type
==
"e2e"
:
elif
model_type
==
'e2e'
:
from
.e2e_resnet_vd_pg
import
ResNet
from
.e2e_resnet_vd_pg
import
ResNet
support_dict
=
[
'ResNet'
]
support_dict
=
[
"ResNet"
]
elif
model_type
==
"table"
:
from
.table_resnet_vd
import
ResNet
from
.table_mobilenet_v3
import
MobileNetV3
support_dict
=
[
"ResNet"
,
"MobileNetV3"
]
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
module_name
=
config
.
pop
(
'
name
'
)
module_name
=
config
.
pop
(
"
name
"
)
assert
module_name
in
support_dict
,
Exception
(
assert
module_name
in
support_dict
,
Exception
(
'
when model typs is {}, backbone only support {}
'
.
format
(
model_type
,
"
when model typs is {}, backbone only support {}
"
.
format
(
model_type
,
support_dict
))
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
return
module_class
ppocr/modeling/backbones/rec_mv1_enhance.py
0 → 100644
View file @
adc62fcd
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
import
math
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
,
reshape
,
transpose
,
concat
,
split
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
import
math
from
paddle.nn.functional
import
hardswish
,
hardsigmoid
from
paddle.regularizer
import
L2Decay
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
filter_size
,
num_filters
,
stride
,
padding
,
channels
=
None
,
num_groups
=
1
,
act
=
'hard_swish'
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
weight_attr
=
ParamAttr
(
initializer
=
KaimingNormal
()),
bias_attr
=
False
)
self
.
_batch_norm
=
BatchNorm
(
num_filters
,
act
=
act
,
param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)))
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
DepthwiseSeparable
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
scale
,
dw_size
=
3
,
padding
=
1
,
use_se
=
False
):
super
(
DepthwiseSeparable
,
self
).
__init__
()
self
.
use_se
=
use_se
self
.
_depthwise_conv
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_filters
=
int
(
num_filters1
*
scale
),
filter_size
=
dw_size
,
stride
=
stride
,
padding
=
padding
,
num_groups
=
int
(
num_groups
*
scale
))
if
use_se
:
self
.
_se
=
SEModule
(
int
(
num_filters1
*
scale
))
self
.
_pointwise_conv
=
ConvBNLayer
(
num_channels
=
int
(
num_filters1
*
scale
),
filter_size
=
1
,
num_filters
=
int
(
num_filters2
*
scale
),
stride
=
1
,
padding
=
0
)
def
forward
(
self
,
inputs
):
y
=
self
.
_depthwise_conv
(
inputs
)
if
self
.
use_se
:
y
=
self
.
_se
(
y
)
y
=
self
.
_pointwise_conv
(
y
)
return
y
class
MobileNetV1Enhance
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
scale
=
0.5
,
**
kwargs
):
super
().
__init__
()
self
.
scale
=
scale
self
.
block_list
=
[]
self
.
conv1
=
ConvBNLayer
(
num_channels
=
3
,
filter_size
=
3
,
channels
=
3
,
num_filters
=
int
(
32
*
scale
),
stride
=
2
,
padding
=
1
)
conv2_1
=
DepthwiseSeparable
(
num_channels
=
int
(
32
*
scale
),
num_filters1
=
32
,
num_filters2
=
64
,
num_groups
=
32
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_1
)
conv2_2
=
DepthwiseSeparable
(
num_channels
=
int
(
64
*
scale
),
num_filters1
=
64
,
num_filters2
=
128
,
num_groups
=
64
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_2
)
conv3_1
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
128
,
num_groups
=
128
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv3_1
)
conv3_2
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
256
,
num_groups
=
128
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv3_2
)
conv4_1
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
256
,
num_groups
=
256
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv4_1
)
conv4_2
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
512
,
num_groups
=
256
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv4_2
)
for
_
in
range
(
5
):
conv5
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
512
,
num_groups
=
512
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
False
)
self
.
block_list
.
append
(
conv5
)
conv5_6
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
1024
,
num_groups
=
512
,
stride
=
(
2
,
1
),
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
True
)
self
.
block_list
.
append
(
conv5_6
)
conv6
=
DepthwiseSeparable
(
num_channels
=
int
(
1024
*
scale
),
num_filters1
=
1024
,
num_filters2
=
1024
,
num_groups
=
1024
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
use_se
=
True
,
scale
=
scale
)
self
.
block_list
.
append
(
conv6
)
self
.
block_list
=
nn
.
Sequential
(
*
self
.
block_list
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
int
(
1024
*
scale
)
def
forward
(
self
,
inputs
):
y
=
self
.
conv1
(
inputs
)
y
=
self
.
block_list
(
y
)
y
=
self
.
pool
(
y
)
return
y
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
channel
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
Conv2D
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
self
.
conv2
=
Conv2D
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
hardsigmoid
(
outputs
)
return
paddle
.
multiply
(
x
=
inputs
,
y
=
outputs
)
ppocr/modeling/backbones/table_mobilenet_v3.py
0 → 100644
View file @
adc62fcd
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
__all__
=
[
'MobileNetV3'
]
def
make_divisible
(
v
,
divisor
=
8
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
'large'
,
scale
=
0.5
,
disable_se
=
False
,
**
kwargs
):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
super
(
MobileNetV3
,
self
).
__init__
()
self
.
disable_se
=
disable_se
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
1
],
[
3
,
64
,
24
,
False
,
'relu'
,
2
],
[
3
,
72
,
24
,
False
,
'relu'
,
1
],
[
5
,
72
,
40
,
True
,
'relu'
,
2
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
3
,
240
,
80
,
False
,
'hardswish'
,
2
],
[
3
,
200
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
184
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
184
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
480
,
112
,
True
,
'hardswish'
,
1
],
[
3
,
672
,
112
,
True
,
'hardswish'
,
1
],
[
5
,
672
,
160
,
True
,
'hardswish'
,
2
],
[
5
,
960
,
160
,
True
,
'hardswish'
,
1
],
[
5
,
960
,
160
,
True
,
'hardswish'
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
2
],
[
3
,
72
,
24
,
False
,
'relu'
,
2
],
[
3
,
88
,
24
,
False
,
'relu'
,
1
],
[
5
,
96
,
40
,
True
,
'hardswish'
,
2
],
[
5
,
240
,
40
,
True
,
'hardswish'
,
1
],
[
5
,
240
,
40
,
True
,
'hardswish'
,
1
],
[
5
,
120
,
48
,
True
,
'hardswish'
,
1
],
[
5
,
144
,
48
,
True
,
'hardswish'
,
1
],
[
5
,
288
,
96
,
True
,
'hardswish'
,
2
],
[
5
,
576
,
96
,
True
,
'hardswish'
,
1
],
[
5
,
576
,
96
,
True
,
'hardswish'
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
scale
in
supported_scale
,
\
"supported scale are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv1'
)
self
.
stages
=
[]
self
.
out_channels
=
[]
block_list
=
[]
i
=
0
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
cfg
:
se
=
se
and
not
self
.
disable_se
start_idx
=
2
if
model_name
==
'large'
else
0
if
s
==
2
and
i
>
start_idx
:
self
.
out_channels
.
append
(
inplanes
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
block_list
=
[]
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
block_list
.
append
(
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
,
name
=
'conv_last'
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
for
i
,
stage
in
enumerate
(
self
.
stages
):
self
.
add_sublayer
(
sublayer
=
stage
,
name
=
"stage{}"
.
format
(
i
))
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
out_list
=
[]
for
stage
in
self
.
stages
:
x
=
stage
(
x
)
out_list
.
append
(
x
)
return
out_list
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_bn_scale"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_bn_offset"
),
moving_mean_name
=
name
+
"_bn_mean"
,
moving_variance_name
=
name
+
"_bn_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
if
self
.
if_act
:
if
self
.
act
==
"relu"
:
x
=
F
.
relu
(
x
)
elif
self
.
act
==
"hardswish"
:
x
=
F
.
hardswish
(
x
)
else
:
print
(
"The activation function({}) is selected incorrectly."
.
format
(
self
.
act
))
exit
()
return
x
class
ResidualUnit
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
kernel_size
,
stride
,
use_se
,
act
=
None
,
name
=
''
):
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_se
=
use_se
self
.
expand_conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
mid_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_expand"
)
self
.
bottleneck_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
int
((
kernel_size
-
1
)
//
2
),
groups
=
mid_channels
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_depthwise"
)
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_channels
,
name
=
name
+
"_se"
)
self
.
linear_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
name
+
"_linear"
)
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
x
=
self
.
bottleneck_conv
(
x
)
if
self
.
if_se
:
x
=
self
.
mid_se
(
x
)
x
=
self
.
linear_conv
(
x
)
if
self
.
if_shortcut
:
x
=
paddle
.
add
(
inputs
,
x
)
return
x
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
,
name
=
""
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_1_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_1_offset"
))
self
.
conv2
=
nn
.
Conv2D
(
in_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
+
"_2_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_2_offset"
))
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
F
.
hardsigmoid
(
outputs
,
slope
=
0.2
,
offset
=
0.5
)
return
inputs
*
outputs
\ No newline at end of file
ppocr/modeling/backbones/table_resnet_vd.py
0 → 100644
View file @
adc62fcd
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
__all__
=
[
"ResNet"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
inputs
):
if
self
.
is_vd_mode
:
inputs
=
self
.
_pool2d_avg
(
inputs
)
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
is_vd_mode
=
False
if
if_first
else
True
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
super
(
ResNet
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_channels
=
[
64
,
256
,
512
,
1024
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
"conv1_1"
)
self
.
conv1_2
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_2"
)
self
.
conv1_3
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_3"
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
stages
=
[]
self
.
out_channels
=
[]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
.
append
(
num_filters
[
block
]
*
4
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
else
:
for
block
in
range
(
len
(
depth
)):
block_list
=
[]
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
2
if
i
==
0
and
block
!=
0
else
1
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
block_list
.
append
(
basic_block
)
self
.
out_channels
.
append
(
num_filters
[
block
])
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
def
forward
(
self
,
inputs
):
y
=
self
.
conv1_1
(
inputs
)
y
=
self
.
conv1_2
(
y
)
y
=
self
.
conv1_3
(
y
)
y
=
self
.
pool2d_max
(
y
)
out
=
[]
for
block
in
self
.
stages
:
y
=
block
(
y
)
out
.
append
(
y
)
return
out
ppocr/modeling/heads/__init__.py
View file @
adc62fcd
...
@@ -32,8 +32,12 @@ def build_head(config):
...
@@ -32,8 +32,12 @@ def build_head(config):
from
.cls_head
import
ClsHead
from
.cls_head
import
ClsHead
support_dict
=
[
support_dict
=
[
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'DBHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
,
'TransformerOptim'
]
'SRNHead'
,
'PGHead'
,
'TransformerOptim'
,
'TableAttentionHead'
]
#table head
from
.table_att_head
import
TableAttentionHead
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
...
...
ppocr/modeling/heads/cls_head.py
View file @
adc62fcd
...
@@ -43,7 +43,7 @@ class ClsHead(nn.Layer):
...
@@ -43,7 +43,7 @@ class ClsHead(nn.Layer):
initializer
=
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)),
initializer
=
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)),
bias_attr
=
ParamAttr
(
name
=
"fc_0.b_0"
),
)
bias_attr
=
ParamAttr
(
name
=
"fc_0.b_0"
),
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
x
=
self
.
pool
(
x
)
x
=
self
.
pool
(
x
)
x
=
paddle
.
reshape
(
x
,
shape
=
[
x
.
shape
[
0
],
x
.
shape
[
1
]])
x
=
paddle
.
reshape
(
x
,
shape
=
[
x
.
shape
[
0
],
x
.
shape
[
1
]])
x
=
self
.
fc
(
x
)
x
=
self
.
fc
(
x
)
...
...
ppocr/modeling/heads/det_db_head.py
View file @
adc62fcd
...
@@ -106,7 +106,7 @@ class DBHead(nn.Layer):
...
@@ -106,7 +106,7 @@ class DBHead(nn.Layer):
def
step_function
(
self
,
x
,
y
):
def
step_function
(
self
,
x
,
y
):
return
paddle
.
reciprocal
(
1
+
paddle
.
exp
(
-
self
.
k
*
(
x
-
y
)))
return
paddle
.
reciprocal
(
1
+
paddle
.
exp
(
-
self
.
k
*
(
x
-
y
)))
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
shrink_maps
=
self
.
binarize
(
x
)
shrink_maps
=
self
.
binarize
(
x
)
if
not
self
.
training
:
if
not
self
.
training
:
return
{
'maps'
:
shrink_maps
}
return
{
'maps'
:
shrink_maps
}
...
...
ppocr/modeling/heads/det_east_head.py
View file @
adc62fcd
...
@@ -109,7 +109,7 @@ class EASTHead(nn.Layer):
...
@@ -109,7 +109,7 @@ class EASTHead(nn.Layer):
act
=
None
,
act
=
None
,
name
=
"f_geo"
)
name
=
"f_geo"
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
f_det
=
self
.
det_conv1
(
x
)
f_det
=
self
.
det_conv1
(
x
)
f_det
=
self
.
det_conv2
(
f_det
)
f_det
=
self
.
det_conv2
(
f_det
)
f_score
=
self
.
score_conv
(
f_det
)
f_score
=
self
.
score_conv
(
f_det
)
...
...
ppocr/modeling/heads/det_sast_head.py
View file @
adc62fcd
...
@@ -116,7 +116,7 @@ class SASTHead(nn.Layer):
...
@@ -116,7 +116,7 @@ class SASTHead(nn.Layer):
self
.
head1
=
SAST_Header1
(
in_channels
)
self
.
head1
=
SAST_Header1
(
in_channels
)
self
.
head2
=
SAST_Header2
(
in_channels
)
self
.
head2
=
SAST_Header2
(
in_channels
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
f_score
,
f_border
=
self
.
head1
(
x
)
f_score
,
f_border
=
self
.
head1
(
x
)
f_tvo
,
f_tco
=
self
.
head2
(
x
)
f_tvo
,
f_tco
=
self
.
head2
(
x
)
...
...
ppocr/modeling/heads/e2e_pg_head.py
View file @
adc62fcd
...
@@ -220,7 +220,7 @@ class PGHead(nn.Layer):
...
@@ -220,7 +220,7 @@ class PGHead(nn.Layer):
weight_attr
=
ParamAttr
(
name
=
"conv_f_direc{}"
.
format
(
4
)),
weight_attr
=
ParamAttr
(
name
=
"conv_f_direc{}"
.
format
(
4
)),
bias_attr
=
False
)
bias_attr
=
False
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
,
targets
=
None
):
f_score
=
self
.
conv_f_score1
(
x
)
f_score
=
self
.
conv_f_score1
(
x
)
f_score
=
self
.
conv_f_score2
(
f_score
)
f_score
=
self
.
conv_f_score2
(
f_score
)
f_score
=
self
.
conv_f_score3
(
f_score
)
f_score
=
self
.
conv_f_score3
(
f_score
)
...
...
ppocr/modeling/heads/rec_ctc_head.py
View file @
adc62fcd
...
@@ -33,19 +33,47 @@ def get_para_bias_attr(l2_decay, k):
...
@@ -33,19 +33,47 @@ def get_para_bias_attr(l2_decay, k):
class
CTCHead
(
nn
.
Layer
):
class
CTCHead
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
fc_decay
=
0.0004
,
**
kwargs
):
def
__init__
(
self
,
in_channels
,
out_channels
,
fc_decay
=
0.0004
,
mid_channels
=
None
,
**
kwargs
):
super
(
CTCHead
,
self
).
__init__
()
super
(
CTCHead
,
self
).
__init__
()
weight_attr
,
bias_attr
=
get_para_bias_attr
(
if
mid_channels
is
None
:
l2_decay
=
fc_decay
,
k
=
in_channels
)
weight_attr
,
bias_attr
=
get_para_bias_attr
(
self
.
fc
=
nn
.
Linear
(
l2_decay
=
fc_decay
,
k
=
in_channels
)
in_channels
,
self
.
fc
=
nn
.
Linear
(
out_channels
,
in_channels
,
weight_attr
=
weight_attr
,
out_channels
,
bias_attr
=
bias_attr
)
weight_attr
=
weight_attr
,
bias_attr
=
bias_attr
)
else
:
weight_attr1
,
bias_attr1
=
get_para_bias_attr
(
l2_decay
=
fc_decay
,
k
=
in_channels
)
self
.
fc1
=
nn
.
Linear
(
in_channels
,
mid_channels
,
weight_attr
=
weight_attr1
,
bias_attr
=
bias_attr1
)
weight_attr2
,
bias_attr2
=
get_para_bias_attr
(
l2_decay
=
fc_decay
,
k
=
mid_channels
)
self
.
fc2
=
nn
.
Linear
(
mid_channels
,
out_channels
,
weight_attr
=
weight_attr2
,
bias_attr
=
bias_attr2
)
self
.
out_channels
=
out_channels
self
.
out_channels
=
out_channels
self
.
mid_channels
=
mid_channels
def
forward
(
self
,
x
,
labels
=
None
):
def
forward
(
self
,
x
,
targets
=
None
):
predicts
=
self
.
fc
(
x
)
if
self
.
mid_channels
is
None
:
predicts
=
self
.
fc
(
x
)
else
:
predicts
=
self
.
fc1
(
x
)
predicts
=
self
.
fc2
(
predicts
)
if
not
self
.
training
:
if
not
self
.
training
:
predicts
=
F
.
softmax
(
predicts
,
axis
=
2
)
predicts
=
F
.
softmax
(
predicts
,
axis
=
2
)
return
predicts
return
predicts
ppocr/modeling/heads/rec_srn_head.py
View file @
adc62fcd
...
@@ -250,7 +250,8 @@ class SRNHead(nn.Layer):
...
@@ -250,7 +250,8 @@ class SRNHead(nn.Layer):
self
.
gsrm
.
wrap_encoder1
.
prepare_decoder
.
emb0
=
self
.
gsrm
.
wrap_encoder0
.
prepare_decoder
.
emb0
self
.
gsrm
.
wrap_encoder1
.
prepare_decoder
.
emb0
=
self
.
gsrm
.
wrap_encoder0
.
prepare_decoder
.
emb0
def
forward
(
self
,
inputs
,
others
):
def
forward
(
self
,
inputs
,
targets
=
None
):
others
=
targets
[
-
4
:]
encoder_word_pos
=
others
[
0
]
encoder_word_pos
=
others
[
0
]
gsrm_word_pos
=
others
[
1
]
gsrm_word_pos
=
others
[
1
]
gsrm_slf_attn_bias1
=
others
[
2
]
gsrm_slf_attn_bias1
=
others
[
2
]
...
...
ppocr/modeling/heads/table_att_head.py
0 → 100644
View file @
adc62fcd
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
numpy
as
np
class
TableAttentionHead
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
hidden_size
,
loc_type
,
in_max_len
=
488
,
**
kwargs
):
super
(
TableAttentionHead
,
self
).
__init__
()
self
.
input_size
=
in_channels
[
-
1
]
self
.
hidden_size
=
hidden_size
self
.
elem_num
=
30
self
.
max_text_length
=
100
self
.
max_elem_length
=
500
self
.
max_cell_num
=
500
self
.
structure_attention_cell
=
AttentionGRUCell
(
self
.
input_size
,
hidden_size
,
self
.
elem_num
,
use_gru
=
False
)
self
.
structure_generator
=
nn
.
Linear
(
hidden_size
,
self
.
elem_num
)
self
.
loc_type
=
loc_type
self
.
in_max_len
=
in_max_len
if
self
.
loc_type
==
1
:
self
.
loc_generator
=
nn
.
Linear
(
hidden_size
,
4
)
else
:
if
self
.
in_max_len
==
640
:
self
.
loc_fea_trans
=
nn
.
Linear
(
400
,
self
.
max_elem_length
+
1
)
elif
self
.
in_max_len
==
800
:
self
.
loc_fea_trans
=
nn
.
Linear
(
625
,
self
.
max_elem_length
+
1
)
else
:
self
.
loc_fea_trans
=
nn
.
Linear
(
256
,
self
.
max_elem_length
+
1
)
self
.
loc_generator
=
nn
.
Linear
(
self
.
input_size
+
hidden_size
,
4
)
def
_char_to_onehot
(
self
,
input_char
,
onehot_dim
):
input_ont_hot
=
F
.
one_hot
(
input_char
,
onehot_dim
)
return
input_ont_hot
def
forward
(
self
,
inputs
,
targets
=
None
):
# if and else branch are both needed when you want to assign a variable
# if you modify the var in just one branch, then the modification will not work.
fea
=
inputs
[
-
1
]
if
len
(
fea
.
shape
)
==
3
:
pass
else
:
last_shape
=
int
(
np
.
prod
(
fea
.
shape
[
2
:]))
# gry added
fea
=
paddle
.
reshape
(
fea
,
[
fea
.
shape
[
0
],
fea
.
shape
[
1
],
last_shape
])
fea
=
fea
.
transpose
([
0
,
2
,
1
])
# (NTC)(batch, width, channels)
batch_size
=
fea
.
shape
[
0
]
hidden
=
paddle
.
zeros
((
batch_size
,
self
.
hidden_size
))
output_hiddens
=
[]
if
self
.
training
and
targets
is
not
None
:
structure
=
targets
[
0
]
for
i
in
range
(
self
.
max_elem_length
+
1
):
elem_onehots
=
self
.
_char_to_onehot
(
structure
[:,
i
],
onehot_dim
=
self
.
elem_num
)
(
outputs
,
hidden
),
alpha
=
self
.
structure_attention_cell
(
hidden
,
fea
,
elem_onehots
)
output_hiddens
.
append
(
paddle
.
unsqueeze
(
outputs
,
axis
=
1
))
output
=
paddle
.
concat
(
output_hiddens
,
axis
=
1
)
structure_probs
=
self
.
structure_generator
(
output
)
if
self
.
loc_type
==
1
:
loc_preds
=
self
.
loc_generator
(
output
)
loc_preds
=
F
.
sigmoid
(
loc_preds
)
else
:
loc_fea
=
fea
.
transpose
([
0
,
2
,
1
])
loc_fea
=
self
.
loc_fea_trans
(
loc_fea
)
loc_fea
=
loc_fea
.
transpose
([
0
,
2
,
1
])
loc_concat
=
paddle
.
concat
([
output
,
loc_fea
],
axis
=
2
)
loc_preds
=
self
.
loc_generator
(
loc_concat
)
loc_preds
=
F
.
sigmoid
(
loc_preds
)
else
:
temp_elem
=
paddle
.
zeros
(
shape
=
[
batch_size
],
dtype
=
"int32"
)
structure_probs
=
None
loc_preds
=
None
elem_onehots
=
None
outputs
=
None
alpha
=
None
max_elem_length
=
paddle
.
to_tensor
(
self
.
max_elem_length
)
i
=
0
while
i
<
max_elem_length
+
1
:
elem_onehots
=
self
.
_char_to_onehot
(
temp_elem
,
onehot_dim
=
self
.
elem_num
)
(
outputs
,
hidden
),
alpha
=
self
.
structure_attention_cell
(
hidden
,
fea
,
elem_onehots
)
output_hiddens
.
append
(
paddle
.
unsqueeze
(
outputs
,
axis
=
1
))
structure_probs_step
=
self
.
structure_generator
(
outputs
)
temp_elem
=
structure_probs_step
.
argmax
(
axis
=
1
,
dtype
=
"int32"
)
i
+=
1
output
=
paddle
.
concat
(
output_hiddens
,
axis
=
1
)
structure_probs
=
self
.
structure_generator
(
output
)
structure_probs
=
F
.
softmax
(
structure_probs
)
if
self
.
loc_type
==
1
:
loc_preds
=
self
.
loc_generator
(
output
)
loc_preds
=
F
.
sigmoid
(
loc_preds
)
else
:
loc_fea
=
fea
.
transpose
([
0
,
2
,
1
])
loc_fea
=
self
.
loc_fea_trans
(
loc_fea
)
loc_fea
=
loc_fea
.
transpose
([
0
,
2
,
1
])
loc_concat
=
paddle
.
concat
([
output
,
loc_fea
],
axis
=
2
)
loc_preds
=
self
.
loc_generator
(
loc_concat
)
loc_preds
=
F
.
sigmoid
(
loc_preds
)
return
{
'structure_probs'
:
structure_probs
,
'loc_preds'
:
loc_preds
}
class
AttentionGRUCell
(
nn
.
Layer
):
def
__init__
(
self
,
input_size
,
hidden_size
,
num_embeddings
,
use_gru
=
False
):
super
(
AttentionGRUCell
,
self
).
__init__
()
self
.
i2h
=
nn
.
Linear
(
input_size
,
hidden_size
,
bias_attr
=
False
)
self
.
h2h
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
score
=
nn
.
Linear
(
hidden_size
,
1
,
bias_attr
=
False
)
self
.
rnn
=
nn
.
GRUCell
(
input_size
=
input_size
+
num_embeddings
,
hidden_size
=
hidden_size
)
self
.
hidden_size
=
hidden_size
def
forward
(
self
,
prev_hidden
,
batch_H
,
char_onehots
):
batch_H_proj
=
self
.
i2h
(
batch_H
)
prev_hidden_proj
=
paddle
.
unsqueeze
(
self
.
h2h
(
prev_hidden
),
axis
=
1
)
res
=
paddle
.
add
(
batch_H_proj
,
prev_hidden_proj
)
res
=
paddle
.
tanh
(
res
)
e
=
self
.
score
(
res
)
alpha
=
F
.
softmax
(
e
,
axis
=
1
)
alpha
=
paddle
.
transpose
(
alpha
,
[
0
,
2
,
1
])
context
=
paddle
.
squeeze
(
paddle
.
mm
(
alpha
,
batch_H
),
axis
=
1
)
concat_context
=
paddle
.
concat
([
context
,
char_onehots
],
1
)
cur_hidden
=
self
.
rnn
(
concat_context
,
prev_hidden
)
return
cur_hidden
,
alpha
class
AttentionLSTM
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
hidden_size
,
**
kwargs
):
super
(
AttentionLSTM
,
self
).
__init__
()
self
.
input_size
=
in_channels
self
.
hidden_size
=
hidden_size
self
.
num_classes
=
out_channels
self
.
attention_cell
=
AttentionLSTMCell
(
in_channels
,
hidden_size
,
out_channels
,
use_gru
=
False
)
self
.
generator
=
nn
.
Linear
(
hidden_size
,
out_channels
)
def
_char_to_onehot
(
self
,
input_char
,
onehot_dim
):
input_ont_hot
=
F
.
one_hot
(
input_char
,
onehot_dim
)
return
input_ont_hot
def
forward
(
self
,
inputs
,
targets
=
None
,
batch_max_length
=
25
):
batch_size
=
inputs
.
shape
[
0
]
num_steps
=
batch_max_length
hidden
=
(
paddle
.
zeros
((
batch_size
,
self
.
hidden_size
)),
paddle
.
zeros
(
(
batch_size
,
self
.
hidden_size
)))
output_hiddens
=
[]
if
targets
is
not
None
:
for
i
in
range
(
num_steps
):
# one-hot vectors for a i-th char
char_onehots
=
self
.
_char_to_onehot
(
targets
[:,
i
],
onehot_dim
=
self
.
num_classes
)
hidden
,
alpha
=
self
.
attention_cell
(
hidden
,
inputs
,
char_onehots
)
hidden
=
(
hidden
[
1
][
0
],
hidden
[
1
][
1
])
output_hiddens
.
append
(
paddle
.
unsqueeze
(
hidden
[
0
],
axis
=
1
))
output
=
paddle
.
concat
(
output_hiddens
,
axis
=
1
)
probs
=
self
.
generator
(
output
)
else
:
targets
=
paddle
.
zeros
(
shape
=
[
batch_size
],
dtype
=
"int32"
)
probs
=
None
for
i
in
range
(
num_steps
):
char_onehots
=
self
.
_char_to_onehot
(
targets
,
onehot_dim
=
self
.
num_classes
)
hidden
,
alpha
=
self
.
attention_cell
(
hidden
,
inputs
,
char_onehots
)
probs_step
=
self
.
generator
(
hidden
[
0
])
hidden
=
(
hidden
[
1
][
0
],
hidden
[
1
][
1
])
if
probs
is
None
:
probs
=
paddle
.
unsqueeze
(
probs_step
,
axis
=
1
)
else
:
probs
=
paddle
.
concat
(
[
probs
,
paddle
.
unsqueeze
(
probs_step
,
axis
=
1
)],
axis
=
1
)
next_input
=
probs_step
.
argmax
(
axis
=
1
)
targets
=
next_input
return
probs
class
AttentionLSTMCell
(
nn
.
Layer
):
def
__init__
(
self
,
input_size
,
hidden_size
,
num_embeddings
,
use_gru
=
False
):
super
(
AttentionLSTMCell
,
self
).
__init__
()
self
.
i2h
=
nn
.
Linear
(
input_size
,
hidden_size
,
bias_attr
=
False
)
self
.
h2h
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
score
=
nn
.
Linear
(
hidden_size
,
1
,
bias_attr
=
False
)
if
not
use_gru
:
self
.
rnn
=
nn
.
LSTMCell
(
input_size
=
input_size
+
num_embeddings
,
hidden_size
=
hidden_size
)
else
:
self
.
rnn
=
nn
.
GRUCell
(
input_size
=
input_size
+
num_embeddings
,
hidden_size
=
hidden_size
)
self
.
hidden_size
=
hidden_size
def
forward
(
self
,
prev_hidden
,
batch_H
,
char_onehots
):
batch_H_proj
=
self
.
i2h
(
batch_H
)
prev_hidden_proj
=
paddle
.
unsqueeze
(
self
.
h2h
(
prev_hidden
[
0
]),
axis
=
1
)
res
=
paddle
.
add
(
batch_H_proj
,
prev_hidden_proj
)
res
=
paddle
.
tanh
(
res
)
e
=
self
.
score
(
res
)
alpha
=
F
.
softmax
(
e
,
axis
=
1
)
alpha
=
paddle
.
transpose
(
alpha
,
[
0
,
2
,
1
])
context
=
paddle
.
squeeze
(
paddle
.
mm
(
alpha
,
batch_H
),
axis
=
1
)
concat_context
=
paddle
.
concat
([
context
,
char_onehots
],
1
)
cur_hidden
=
self
.
rnn
(
concat_context
,
prev_hidden
)
return
cur_hidden
,
alpha
ppocr/modeling/necks/__init__.py
View file @
adc62fcd
...
@@ -21,7 +21,8 @@ def build_neck(config):
...
@@ -21,7 +21,8 @@ def build_neck(config):
from
.sast_fpn
import
SASTFPN
from
.sast_fpn
import
SASTFPN
from
.rnn
import
SequenceEncoder
from
.rnn
import
SequenceEncoder
from
.pg_fpn
import
PGFPN
from
.pg_fpn
import
PGFPN
support_dict
=
[
'DBFPN'
,
'EASTFPN'
,
'SASTFPN'
,
'SequenceEncoder'
,
'PGFPN'
]
from
.table_fpn
import
TableFPN
support_dict
=
[
'DBFPN'
,
'EASTFPN'
,
'SASTFPN'
,
'SequenceEncoder'
,
'PGFPN'
,
'TableFPN'
]
module_name
=
config
.
pop
(
'name'
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
assert
module_name
in
support_dict
,
Exception
(
'neck only support {}'
.
format
(
...
...
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment