Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
PaddleOCR_paddle_onnxruntime
Commits
f1506916
Commit
f1506916
authored
May 18, 2023
by
sugon_cxj
Browse files
first commit
parent
55c28ed5
Pipeline
#266
canceled with stages
Changes
432
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2201 additions
and
0 deletions
+2201
-0
ppocr/modeling/backbones/rec_mobilenet_v3.py
ppocr/modeling/backbones/rec_mobilenet_v3.py
+138
-0
ppocr/modeling/backbones/rec_mv1_enhance.py
ppocr/modeling/backbones/rec_mv1_enhance.py
+256
-0
ppocr/modeling/backbones/rec_nrtr_mtb.py
ppocr/modeling/backbones/rec_nrtr_mtb.py
+48
-0
ppocr/modeling/backbones/rec_resnet_31.py
ppocr/modeling/backbones/rec_resnet_31.py
+210
-0
ppocr/modeling/backbones/rec_resnet_aster.py
ppocr/modeling/backbones/rec_resnet_aster.py
+143
-0
ppocr/modeling/backbones/rec_resnet_fpn.py
ppocr/modeling/backbones/rec_resnet_fpn.py
+306
-0
ppocr/modeling/backbones/rec_resnet_vd.py
ppocr/modeling/backbones/rec_resnet_vd.py
+286
-0
ppocr/modeling/backbones/rec_svtrnet.py
ppocr/modeling/backbones/rec_svtrnet.py
+584
-0
ppocr/modeling/backbones/vqa_layoutlm.py
ppocr/modeling/backbones/vqa_layoutlm.py
+172
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+58
-0
ppocr/modeling/heads/__pycache__/__init__.cpython-37.pyc
ppocr/modeling/heads/__pycache__/__init__.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/cls_head.cpython-37.pyc
ppocr/modeling/heads/__pycache__/cls_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/det_db_head.cpython-37.pyc
ppocr/modeling/heads/__pycache__/det_db_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/det_east_head.cpython-37.pyc
...r/modeling/heads/__pycache__/det_east_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/det_fce_head.cpython-37.pyc
ppocr/modeling/heads/__pycache__/det_fce_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/det_pse_head.cpython-37.pyc
ppocr/modeling/heads/__pycache__/det_pse_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/det_sast_head.cpython-37.pyc
...r/modeling/heads/__pycache__/det_sast_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/e2e_pg_head.cpython-37.pyc
ppocr/modeling/heads/__pycache__/e2e_pg_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/kie_sdmgr_head.cpython-37.pyc
.../modeling/heads/__pycache__/kie_sdmgr_head.cpython-37.pyc
+0
-0
ppocr/modeling/heads/__pycache__/multiheadAttention.cpython-37.pyc
...eling/heads/__pycache__/multiheadAttention.cpython-37.pyc
+0
-0
No files found.
ppocr/modeling/backbones/rec_mobilenet_v3.py
0 → 100755
View file @
f1506916
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
nn
from
ppocr.modeling.backbones.det_mobilenet_v3
import
ResidualUnit
,
ConvBNLayer
,
make_divisible
__all__
=
[
'MobileNetV3'
]
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
'small'
,
scale
=
0.5
,
large_stride
=
None
,
small_stride
=
None
,
disable_se
=
False
,
**
kwargs
):
super
(
MobileNetV3
,
self
).
__init__
()
self
.
disable_se
=
disable_se
if
small_stride
is
None
:
small_stride
=
[
2
,
2
,
2
,
2
]
if
large_stride
is
None
:
large_stride
=
[
1
,
2
,
2
,
2
]
assert
isinstance
(
large_stride
,
list
),
"large_stride type must "
\
"be list but got {}"
.
format
(
type
(
large_stride
))
assert
isinstance
(
small_stride
,
list
),
"small_stride type must "
\
"be list but got {}"
.
format
(
type
(
small_stride
))
assert
len
(
large_stride
)
==
4
,
"large_stride length must be "
\
"4 but got {}"
.
format
(
len
(
large_stride
))
assert
len
(
small_stride
)
==
4
,
"small_stride length must be "
\
"4 but got {}"
.
format
(
len
(
small_stride
))
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
large_stride
[
0
]],
[
3
,
64
,
24
,
False
,
'relu'
,
(
large_stride
[
1
],
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
1
],
[
5
,
72
,
40
,
True
,
'relu'
,
(
large_stride
[
2
],
1
)],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
3
,
240
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
200
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
184
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
184
,
80
,
False
,
'hardswish'
,
1
],
[
3
,
480
,
112
,
True
,
'hardswish'
,
1
],
[
3
,
672
,
112
,
True
,
'hardswish'
,
1
],
[
5
,
672
,
160
,
True
,
'hardswish'
,
(
large_stride
[
3
],
1
)],
[
5
,
960
,
160
,
True
,
'hardswish'
,
1
],
[
5
,
960
,
160
,
True
,
'hardswish'
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
(
small_stride
[
0
],
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
(
small_stride
[
1
],
1
)],
[
3
,
88
,
24
,
False
,
'relu'
,
1
],
[
5
,
96
,
40
,
True
,
'hardswish'
,
(
small_stride
[
2
],
1
)],
[
5
,
240
,
40
,
True
,
'hardswish'
,
1
],
[
5
,
240
,
40
,
True
,
'hardswish'
,
1
],
[
5
,
120
,
48
,
True
,
'hardswish'
,
1
],
[
5
,
144
,
48
,
True
,
'hardswish'
,
1
],
[
5
,
288
,
96
,
True
,
'hardswish'
,
(
small_stride
[
3
],
1
)],
[
5
,
576
,
96
,
True
,
'hardswish'
,
1
],
[
5
,
576
,
96
,
True
,
'hardswish'
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
scale
in
supported_scale
,
\
"supported scales are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
)
i
=
0
block_list
=
[]
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
cfg
:
se
=
se
and
not
self
.
disable_se
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hardswish'
)
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
blocks
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
pool
(
x
)
return
x
ppocr/modeling/backbones/rec_mv1_enhance.py
0 → 100755
View file @
f1506916
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
numpy
as
np
import
paddle
from
paddle
import
ParamAttr
,
reshape
,
transpose
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
from
paddle.nn
import
Conv2D
,
BatchNorm
,
Linear
,
Dropout
from
paddle.nn
import
AdaptiveAvgPool2D
,
MaxPool2D
,
AvgPool2D
from
paddle.nn.initializer
import
KaimingNormal
from
paddle.regularizer
import
L2Decay
from
paddle.nn.functional
import
hardswish
,
hardsigmoid
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
filter_size
,
num_filters
,
stride
,
padding
,
channels
=
None
,
num_groups
=
1
,
act
=
'hard_swish'
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
_conv
=
Conv2D
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
weight_attr
=
ParamAttr
(
initializer
=
KaimingNormal
()),
bias_attr
=
False
)
self
.
_batch_norm
=
BatchNorm
(
num_filters
,
act
=
act
,
param_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)),
bias_attr
=
ParamAttr
(
regularizer
=
L2Decay
(
0.0
)))
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
DepthwiseSeparable
(
nn
.
Layer
):
def
__init__
(
self
,
num_channels
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
scale
,
dw_size
=
3
,
padding
=
1
,
use_se
=
False
):
super
(
DepthwiseSeparable
,
self
).
__init__
()
self
.
use_se
=
use_se
self
.
_depthwise_conv
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_filters
=
int
(
num_filters1
*
scale
),
filter_size
=
dw_size
,
stride
=
stride
,
padding
=
padding
,
num_groups
=
int
(
num_groups
*
scale
))
if
use_se
:
self
.
_se
=
SEModule
(
int
(
num_filters1
*
scale
))
self
.
_pointwise_conv
=
ConvBNLayer
(
num_channels
=
int
(
num_filters1
*
scale
),
filter_size
=
1
,
num_filters
=
int
(
num_filters2
*
scale
),
stride
=
1
,
padding
=
0
)
def
forward
(
self
,
inputs
):
y
=
self
.
_depthwise_conv
(
inputs
)
if
self
.
use_se
:
y
=
self
.
_se
(
y
)
y
=
self
.
_pointwise_conv
(
y
)
return
y
class
MobileNetV1Enhance
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
scale
=
0.5
,
last_conv_stride
=
1
,
last_pool_type
=
'max'
,
**
kwargs
):
super
().
__init__
()
self
.
scale
=
scale
self
.
block_list
=
[]
self
.
conv1
=
ConvBNLayer
(
num_channels
=
3
,
filter_size
=
3
,
channels
=
3
,
num_filters
=
int
(
32
*
scale
),
stride
=
2
,
padding
=
1
)
conv2_1
=
DepthwiseSeparable
(
num_channels
=
int
(
32
*
scale
),
num_filters1
=
32
,
num_filters2
=
64
,
num_groups
=
32
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_1
)
conv2_2
=
DepthwiseSeparable
(
num_channels
=
int
(
64
*
scale
),
num_filters1
=
64
,
num_filters2
=
128
,
num_groups
=
64
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_2
)
conv3_1
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
128
,
num_groups
=
128
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv3_1
)
conv3_2
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
256
,
num_groups
=
128
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv3_2
)
conv4_1
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
256
,
num_groups
=
256
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv4_1
)
conv4_2
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
512
,
num_groups
=
256
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv4_2
)
for
_
in
range
(
5
):
conv5
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
512
,
num_groups
=
512
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
False
)
self
.
block_list
.
append
(
conv5
)
conv5_6
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
1024
,
num_groups
=
512
,
stride
=
(
2
,
1
),
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
True
)
self
.
block_list
.
append
(
conv5_6
)
conv6
=
DepthwiseSeparable
(
num_channels
=
int
(
1024
*
scale
),
num_filters1
=
1024
,
num_filters2
=
1024
,
num_groups
=
1024
,
stride
=
last_conv_stride
,
dw_size
=
5
,
padding
=
2
,
use_se
=
True
,
scale
=
scale
)
self
.
block_list
.
append
(
conv6
)
self
.
block_list
=
nn
.
Sequential
(
*
self
.
block_list
)
if
last_pool_type
==
'avg'
:
self
.
pool
=
nn
.
AvgPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
else
:
self
.
pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
int
(
1024
*
scale
)
def
forward
(
self
,
inputs
):
y
=
self
.
conv1
(
inputs
)
y
=
self
.
block_list
(
y
)
y
=
self
.
pool
(
y
)
return
y
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
channel
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
AdaptiveAvgPool2D
(
1
)
self
.
conv1
=
Conv2D
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
self
.
conv2
=
Conv2D
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(),
bias_attr
=
ParamAttr
())
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
hardsigmoid
(
outputs
)
return
paddle
.
multiply
(
x
=
inputs
,
y
=
outputs
)
ppocr/modeling/backbones/rec_nrtr_mtb.py
0 → 100755
View file @
f1506916
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
nn
import
paddle
class
MTB
(
nn
.
Layer
):
def
__init__
(
self
,
cnn_num
,
in_channels
):
super
(
MTB
,
self
).
__init__
()
self
.
block
=
nn
.
Sequential
()
self
.
out_channels
=
in_channels
self
.
cnn_num
=
cnn_num
if
self
.
cnn_num
==
2
:
for
i
in
range
(
self
.
cnn_num
):
self
.
block
.
add_sublayer
(
'conv_{}'
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
in_channels
if
i
==
0
else
32
*
(
2
**
(
i
-
1
)),
out_channels
=
32
*
(
2
**
i
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
))
self
.
block
.
add_sublayer
(
'relu_{}'
.
format
(
i
),
nn
.
ReLU
())
self
.
block
.
add_sublayer
(
'bn_{}'
.
format
(
i
),
nn
.
BatchNorm2D
(
32
*
(
2
**
i
)))
def
forward
(
self
,
images
):
x
=
self
.
block
(
images
)
if
self
.
cnn_num
==
2
:
# (b, w, h, c)
x
=
paddle
.
transpose
(
x
,
[
0
,
3
,
2
,
1
])
x_shape
=
paddle
.
shape
(
x
)
x
=
paddle
.
reshape
(
x
,
[
x_shape
[
0
],
x_shape
[
1
],
x_shape
[
2
]
*
x_shape
[
3
]])
return
x
ppocr/modeling/backbones/rec_resnet_31.py
0 → 100755
View file @
f1506916
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
import
numpy
as
np
__all__
=
[
"ResNet31"
]
def
conv3x3
(
in_channel
,
out_channel
,
stride
=
1
):
return
nn
.
Conv2D
(
in_channel
,
out_channel
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
class
BasicBlock
(
nn
.
Layer
):
expansion
=
1
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
downsample
=
False
):
super
().
__init__
()
self
.
conv1
=
conv3x3
(
in_channels
,
channels
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2D
(
channels
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
conv3x3
(
channels
,
channels
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
)
self
.
downsample
=
downsample
if
downsample
:
self
.
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
channels
*
self
.
expansion
,
1
,
stride
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
channels
*
self
.
expansion
),
)
else
:
self
.
downsample
=
nn
.
Sequential
()
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet31
(
nn
.
Layer
):
'''
Args:
in_channels (int): Number of channels of input image tensor.
layers (list[int]): List of BasicBlock number for each stage.
channels (list[int]): List of out_channels of Conv2d layer.
out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
'''
def
__init__
(
self
,
in_channels
=
3
,
layers
=
[
1
,
2
,
5
,
3
],
channels
=
[
64
,
128
,
256
,
256
,
512
,
512
,
512
],
out_indices
=
None
,
last_stage_pool
=
False
):
super
(
ResNet31
,
self
).
__init__
()
assert
isinstance
(
in_channels
,
int
)
assert
isinstance
(
last_stage_pool
,
bool
)
self
.
out_indices
=
out_indices
self
.
last_stage_pool
=
last_stage_pool
# conv 1 (Conv Conv)
self
.
conv1_1
=
nn
.
Conv2D
(
in_channels
,
channels
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_1
=
nn
.
BatchNorm2D
(
channels
[
0
])
self
.
relu1_1
=
nn
.
ReLU
()
self
.
conv1_2
=
nn
.
Conv2D
(
channels
[
0
],
channels
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_2
=
nn
.
BatchNorm2D
(
channels
[
1
])
self
.
relu1_2
=
nn
.
ReLU
()
# conv 2 (Max-pooling, Residual block, Conv)
self
.
pool2
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block2
=
self
.
_make_layer
(
channels
[
1
],
channels
[
2
],
layers
[
0
])
self
.
conv2
=
nn
.
Conv2D
(
channels
[
2
],
channels
[
2
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
[
2
])
self
.
relu2
=
nn
.
ReLU
()
# conv 3 (Max-pooling, Residual block, Conv)
self
.
pool3
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block3
=
self
.
_make_layer
(
channels
[
2
],
channels
[
3
],
layers
[
1
])
self
.
conv3
=
nn
.
Conv2D
(
channels
[
3
],
channels
[
3
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn3
=
nn
.
BatchNorm2D
(
channels
[
3
])
self
.
relu3
=
nn
.
ReLU
()
# conv 4 (Max-pooling, Residual block, Conv)
self
.
pool4
=
nn
.
MaxPool2D
(
kernel_size
=
(
2
,
1
),
stride
=
(
2
,
1
),
padding
=
0
,
ceil_mode
=
True
)
self
.
block4
=
self
.
_make_layer
(
channels
[
3
],
channels
[
4
],
layers
[
2
])
self
.
conv4
=
nn
.
Conv2D
(
channels
[
4
],
channels
[
4
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn4
=
nn
.
BatchNorm2D
(
channels
[
4
])
self
.
relu4
=
nn
.
ReLU
()
# conv 5 ((Max-pooling), Residual block, Conv)
self
.
pool5
=
None
if
self
.
last_stage_pool
:
self
.
pool5
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block5
=
self
.
_make_layer
(
channels
[
4
],
channels
[
5
],
layers
[
3
])
self
.
conv5
=
nn
.
Conv2D
(
channels
[
5
],
channels
[
5
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn5
=
nn
.
BatchNorm2D
(
channels
[
5
])
self
.
relu5
=
nn
.
ReLU
()
self
.
out_channels
=
channels
[
-
1
]
def
_make_layer
(
self
,
input_channels
,
output_channels
,
blocks
):
layers
=
[]
for
_
in
range
(
blocks
):
downsample
=
None
if
input_channels
!=
output_channels
:
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
input_channels
,
output_channels
,
kernel_size
=
1
,
stride
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
output_channels
),
)
layers
.
append
(
BasicBlock
(
input_channels
,
output_channels
,
downsample
=
downsample
))
input_channels
=
output_channels
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x
=
self
.
conv1_1
(
x
)
x
=
self
.
bn1_1
(
x
)
x
=
self
.
relu1_1
(
x
)
x
=
self
.
conv1_2
(
x
)
x
=
self
.
bn1_2
(
x
)
x
=
self
.
relu1_2
(
x
)
outs
=
[]
for
i
in
range
(
4
):
layer_index
=
i
+
2
pool_layer
=
getattr
(
self
,
f
'pool
{
layer_index
}
'
)
block_layer
=
getattr
(
self
,
f
'block
{
layer_index
}
'
)
conv_layer
=
getattr
(
self
,
f
'conv
{
layer_index
}
'
)
bn_layer
=
getattr
(
self
,
f
'bn
{
layer_index
}
'
)
relu_layer
=
getattr
(
self
,
f
'relu
{
layer_index
}
'
)
if
pool_layer
is
not
None
:
x
=
pool_layer
(
x
)
x
=
block_layer
(
x
)
x
=
conv_layer
(
x
)
x
=
bn_layer
(
x
)
x
=
relu_layer
(
x
)
outs
.
append
(
x
)
if
self
.
out_indices
is
not
None
:
return
tuple
([
outs
[
i
]
for
i
in
self
.
out_indices
])
return
x
ppocr/modeling/backbones/rec_resnet_aster.py
0 → 100755
View file @
f1506916
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/resnet_aster.py
"""
import
paddle
import
paddle.nn
as
nn
import
sys
import
math
def
conv3x3
(
in_planes
,
out_planes
,
stride
=
1
):
"""3x3 convolution with padding"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
bias_attr
=
False
)
def
conv1x1
(
in_planes
,
out_planes
,
stride
=
1
):
"""1x1 convolution"""
return
nn
.
Conv2D
(
in_planes
,
out_planes
,
kernel_size
=
1
,
stride
=
stride
,
bias_attr
=
False
)
def
get_sinusoid_encoding
(
n_position
,
feat_dim
,
wave_length
=
10000
):
# [n_position]
positions
=
paddle
.
arange
(
0
,
n_position
)
# [feat_dim]
dim_range
=
paddle
.
arange
(
0
,
feat_dim
)
dim_range
=
paddle
.
pow
(
wave_length
,
2
*
(
dim_range
//
2
)
/
feat_dim
)
# [n_position, feat_dim]
angles
=
paddle
.
unsqueeze
(
positions
,
axis
=
1
)
/
paddle
.
unsqueeze
(
dim_range
,
axis
=
0
)
angles
=
paddle
.
cast
(
angles
,
"float32"
)
angles
[:,
0
::
2
]
=
paddle
.
sin
(
angles
[:,
0
::
2
])
angles
[:,
1
::
2
]
=
paddle
.
cos
(
angles
[:,
1
::
2
])
return
angles
class
AsterBlock
(
nn
.
Layer
):
def
__init__
(
self
,
inplanes
,
planes
,
stride
=
1
,
downsample
=
None
):
super
(
AsterBlock
,
self
).
__init__
()
self
.
conv1
=
conv1x1
(
inplanes
,
planes
,
stride
)
self
.
bn1
=
nn
.
BatchNorm2D
(
planes
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
conv3x3
(
planes
,
planes
)
self
.
bn2
=
nn
.
BatchNorm2D
(
planes
)
self
.
downsample
=
downsample
self
.
stride
=
stride
def
forward
(
self
,
x
):
residual
=
x
out
=
self
.
conv1
(
x
)
out
=
self
.
bn1
(
out
)
out
=
self
.
relu
(
out
)
out
=
self
.
conv2
(
out
)
out
=
self
.
bn2
(
out
)
if
self
.
downsample
is
not
None
:
residual
=
self
.
downsample
(
x
)
out
+=
residual
out
=
self
.
relu
(
out
)
return
out
class
ResNet_ASTER
(
nn
.
Layer
):
"""For aster or crnn"""
def
__init__
(
self
,
with_lstm
=
True
,
n_group
=
1
,
in_channels
=
3
):
super
(
ResNet_ASTER
,
self
).
__init__
()
self
.
with_lstm
=
with_lstm
self
.
n_group
=
n_group
self
.
layer0
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
32
,
kernel_size
=
(
3
,
3
),
stride
=
1
,
padding
=
1
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
32
),
nn
.
ReLU
())
self
.
inplanes
=
32
self
.
layer1
=
self
.
_make_layer
(
32
,
3
,
[
2
,
2
])
# [16, 50]
self
.
layer2
=
self
.
_make_layer
(
64
,
4
,
[
2
,
2
])
# [8, 25]
self
.
layer3
=
self
.
_make_layer
(
128
,
6
,
[
2
,
1
])
# [4, 25]
self
.
layer4
=
self
.
_make_layer
(
256
,
6
,
[
2
,
1
])
# [2, 25]
self
.
layer5
=
self
.
_make_layer
(
512
,
3
,
[
2
,
1
])
# [1, 25]
if
with_lstm
:
self
.
rnn
=
nn
.
LSTM
(
512
,
256
,
direction
=
"bidirect"
,
num_layers
=
2
)
self
.
out_channels
=
2
*
256
else
:
self
.
out_channels
=
512
def
_make_layer
(
self
,
planes
,
blocks
,
stride
):
downsample
=
None
if
stride
!=
[
1
,
1
]
or
self
.
inplanes
!=
planes
:
downsample
=
nn
.
Sequential
(
conv1x1
(
self
.
inplanes
,
planes
,
stride
),
nn
.
BatchNorm2D
(
planes
))
layers
=
[]
layers
.
append
(
AsterBlock
(
self
.
inplanes
,
planes
,
stride
,
downsample
))
self
.
inplanes
=
planes
for
_
in
range
(
1
,
blocks
):
layers
.
append
(
AsterBlock
(
self
.
inplanes
,
planes
))
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
x0
=
self
.
layer0
(
x
)
x1
=
self
.
layer1
(
x0
)
x2
=
self
.
layer2
(
x1
)
x3
=
self
.
layer3
(
x2
)
x4
=
self
.
layer4
(
x3
)
x5
=
self
.
layer5
(
x4
)
cnn_feat
=
x5
.
squeeze
(
2
)
# [N, c, w]
cnn_feat
=
paddle
.
transpose
(
cnn_feat
,
perm
=
[
0
,
2
,
1
])
if
self
.
with_lstm
:
rnn_feat
,
_
=
self
.
rnn
(
cnn_feat
)
return
rnn_feat
else
:
return
cnn_feat
ppocr/modeling/backbones/rec_resnet_fpn.py
0 → 100755
View file @
f1506916
#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
,
ParamAttr
from
paddle.nn
import
functional
as
F
import
paddle
import
numpy
as
np
__all__
=
[
"ResNetFPN"
]
class
ResNetFPN
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
1
,
layers
=
50
,
**
kwargs
):
super
(
ResNetFPN
,
self
).
__init__
()
supported_layers
=
{
18
:
{
'depth'
:
[
2
,
2
,
2
,
2
],
'block_class'
:
BasicBlock
},
34
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BasicBlock
},
50
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BottleneckBlock
},
101
:
{
'depth'
:
[
3
,
4
,
23
,
3
],
'block_class'
:
BottleneckBlock
},
152
:
{
'depth'
:
[
3
,
8
,
36
,
3
],
'block_class'
:
BottleneckBlock
}
}
stride_list
=
[(
2
,
2
),
(
2
,
2
),
(
1
,
1
),
(
1
,
1
)]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
depth
=
supported_layers
[
layers
][
'depth'
]
self
.
F
=
[]
self
.
conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
64
,
kernel_size
=
7
,
stride
=
2
,
act
=
"relu"
,
name
=
"conv1"
)
self
.
block_list
=
[]
in_ch
=
64
if
layers
>=
50
:
for
block
in
range
(
len
(
self
.
depth
)):
for
i
in
range
(
self
.
depth
[
block
]):
if
layers
in
[
101
,
152
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
block_list
=
self
.
add_sublayer
(
"bottleneckBlock_{}_{}"
.
format
(
block
,
i
),
BottleneckBlock
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block
],
stride
=
stride_list
[
block
]
if
i
==
0
else
1
,
name
=
conv_name
))
in_ch
=
num_filters
[
block
]
*
4
self
.
block_list
.
append
(
block_list
)
self
.
F
.
append
(
block_list
)
else
:
for
block
in
range
(
len
(
self
.
depth
)):
for
i
in
range
(
self
.
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
basic_block
=
self
.
add_sublayer
(
conv_name
,
BasicBlock
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block
],
stride
=
stride_list
[
block
]
if
i
==
0
else
1
,
is_first
=
block
==
i
==
0
,
name
=
conv_name
))
in_ch
=
basic_block
.
out_channels
self
.
block_list
.
append
(
basic_block
)
out_ch_list
=
[
in_ch
//
4
,
in_ch
//
2
,
in_ch
]
self
.
base_block
=
[]
self
.
conv_trans
=
[]
self
.
bn_block
=
[]
for
i
in
[
-
2
,
-
3
]:
in_channels
=
out_ch_list
[
i
+
1
]
+
out_ch_list
[
i
]
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_0"
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_ch_list
[
i
],
kernel_size
=
1
,
weight_attr
=
ParamAttr
(
trainable
=
True
),
bias_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_1"
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
out_ch_list
[
i
],
out_channels
=
out_ch_list
[
i
],
kernel_size
=
3
,
padding
=
1
,
weight_attr
=
ParamAttr
(
trainable
=
True
),
bias_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_2"
.
format
(
i
),
nn
.
BatchNorm
(
num_channels
=
out_ch_list
[
i
],
act
=
"relu"
,
param_attr
=
ParamAttr
(
trainable
=
True
),
bias_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
base_block
.
append
(
self
.
add_sublayer
(
"F_{}_base_block_3"
.
format
(
i
),
nn
.
Conv2D
(
in_channels
=
out_ch_list
[
i
],
out_channels
=
512
,
kernel_size
=
1
,
bias_attr
=
ParamAttr
(
trainable
=
True
),
weight_attr
=
ParamAttr
(
trainable
=
True
))))
self
.
out_channels
=
512
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
fpn_list
=
[]
F
=
[]
for
i
in
range
(
len
(
self
.
depth
)):
fpn_list
.
append
(
np
.
sum
(
self
.
depth
[:
i
+
1
]))
for
i
,
block
in
enumerate
(
self
.
block_list
):
x
=
block
(
x
)
for
number
in
fpn_list
:
if
i
+
1
==
number
:
F
.
append
(
x
)
base
=
F
[
-
1
]
j
=
0
for
i
,
block
in
enumerate
(
self
.
base_block
):
if
i
%
3
==
0
and
i
<
6
:
j
=
j
+
1
b
,
c
,
w
,
h
=
F
[
-
j
-
1
].
shape
if
[
w
,
h
]
==
list
(
base
.
shape
[
2
:]):
base
=
base
else
:
base
=
self
.
conv_trans
[
j
-
1
](
base
)
base
=
self
.
bn_block
[
j
-
1
](
base
)
base
=
paddle
.
concat
([
base
,
F
[
-
j
-
1
]],
axis
=
1
)
base
=
block
(
base
)
return
base
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
2
if
stride
==
(
1
,
1
)
else
kernel_size
,
dilation
=
2
if
stride
==
(
1
,
1
)
else
1
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'.conv2d.output.1.w_0'
),
bias_attr
=
False
,
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
name
+
'.output.1.w_0'
),
bias_attr
=
ParamAttr
(
name
=
name
+
'.output.1.b_0'
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ShortCut
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
is_first
=
False
):
super
(
ShortCut
,
self
).
__init__
()
self
.
use_conv
=
True
if
in_channels
!=
out_channels
or
stride
!=
1
or
is_first
==
True
:
if
stride
==
(
1
,
1
):
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
1
,
name
=
name
)
else
:
# stride==(2,2)
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
self
.
use_conv
=
False
def
forward
(
self
,
x
):
if
self
.
use_conv
:
x
=
self
.
conv
(
x
)
return
x
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
stride
=
stride
,
is_first
=
False
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
*
4
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
self
.
conv2
(
y
)
y
=
y
+
self
.
short
(
x
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
is_first
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
stride
=
stride
,
is_first
=
is_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
y
+
self
.
short
(
x
)
return
F
.
relu
(
y
)
ppocr/modeling/backbones/rec_resnet_vd.py
0 → 100755
View file @
f1506916
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
ParamAttr
import
paddle.nn
as
nn
import
paddle.nn.functional
as
F
__all__
=
[
"ResNet"
]
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
is_vd_mode
=
False
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
is_vd_mode
=
is_vd_mode
self
.
_pool2d_avg
=
nn
.
AvgPool2D
(
kernel_size
=
stride
,
stride
=
stride
,
padding
=
0
,
ceil_mode
=
True
)
self
.
_conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
1
if
is_vd_mode
else
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
_batch_norm
=
nn
.
BatchNorm
(
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
'_scale'
),
bias_attr
=
ParamAttr
(
bn_name
+
'_offset'
),
moving_mean_name
=
bn_name
+
'_mean'
,
moving_variance_name
=
bn_name
+
'_variance'
)
def
forward
(
self
,
inputs
):
if
self
.
is_vd_mode
:
inputs
=
self
.
_pool2d_avg
(
inputs
)
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
return
y
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
stride
=
stride
,
is_vd_mode
=
not
if_first
and
stride
[
0
]
!=
1
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
conv2
=
self
.
conv2
(
conv1
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv2
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
shortcut
=
True
,
if_first
=
False
,
name
=
None
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
stride
=
stride
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
if
not
shortcut
:
self
.
short
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
stride
,
is_vd_mode
=
not
if_first
and
stride
[
0
]
!=
1
,
name
=
name
+
"_branch1"
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
inputs
):
y
=
self
.
conv0
(
inputs
)
conv1
=
self
.
conv1
(
y
)
if
self
.
shortcut
:
short
=
inputs
else
:
short
=
self
.
short
(
inputs
)
y
=
paddle
.
add
(
x
=
short
,
y
=
conv1
)
y
=
F
.
relu
(
y
)
return
y
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
super
(
ResNet
,
self
).
__init__
()
self
.
layers
=
layers
supported_layers
=
[
18
,
34
,
50
,
101
,
152
,
200
]
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
,
layers
)
if
layers
==
18
:
depth
=
[
2
,
2
,
2
,
2
]
elif
layers
==
34
or
layers
==
50
:
depth
=
[
3
,
4
,
6
,
3
]
elif
layers
==
101
:
depth
=
[
3
,
4
,
23
,
3
]
elif
layers
==
152
:
depth
=
[
3
,
8
,
36
,
3
]
elif
layers
==
200
:
depth
=
[
3
,
12
,
48
,
3
]
num_channels
=
[
64
,
256
,
512
,
1024
]
if
layers
>=
50
else
[
64
,
64
,
128
,
256
]
num_filters
=
[
64
,
128
,
256
,
512
]
self
.
conv1_1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_1"
)
self
.
conv1_2
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_2"
)
self
.
conv1_3
=
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
"conv1_3"
)
self
.
pool2d_max
=
nn
.
MaxPool2D
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
block_list
=
[]
if
layers
>=
50
:
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
if
layers
in
[
101
,
152
,
200
]
and
block
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
bottleneck_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BottleneckBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
]
*
4
,
out_channels
=
num_filters
[
block
],
stride
=
stride
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
self
.
block_list
.
append
(
bottleneck_block
)
self
.
out_channels
=
num_filters
[
block
]
*
4
else
:
for
block
in
range
(
len
(
depth
)):
shortcut
=
False
for
i
in
range
(
depth
[
block
]):
conv_name
=
"res"
+
str
(
block
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
basic_block
=
self
.
add_sublayer
(
'bb_%d_%d'
%
(
block
,
i
),
BasicBlock
(
in_channels
=
num_channels
[
block
]
if
i
==
0
else
num_filters
[
block
],
out_channels
=
num_filters
[
block
],
stride
=
stride
,
shortcut
=
shortcut
,
if_first
=
block
==
i
==
0
,
name
=
conv_name
))
shortcut
=
True
self
.
block_list
.
append
(
basic_block
)
self
.
out_channels
=
num_filters
[
block
]
self
.
out_pool
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
def
forward
(
self
,
inputs
):
y
=
self
.
conv1_1
(
inputs
)
y
=
self
.
conv1_2
(
y
)
y
=
self
.
conv1_3
(
y
)
y
=
self
.
pool2d_max
(
y
)
for
block
in
self
.
block_list
:
y
=
block
(
y
)
y
=
self
.
out_pool
(
y
)
return
y
ppocr/modeling/backbones/rec_svtrnet.py
0 → 100755
View file @
f1506916
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
ParamAttr
from
paddle.nn.initializer
import
KaimingNormal
import
numpy
as
np
import
paddle
import
paddle.nn
as
nn
from
paddle.nn.initializer
import
TruncatedNormal
,
Constant
,
Normal
trunc_normal_
=
TruncatedNormal
(
std
=
.
02
)
normal_
=
Normal
zeros_
=
Constant
(
value
=
0.
)
ones_
=
Constant
(
value
=
1.
)
def
drop_path
(
x
,
drop_prob
=
0.
,
training
=
False
):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
"""
if
drop_prob
==
0.
or
not
training
:
return
x
keep_prob
=
paddle
.
to_tensor
(
1
-
drop_prob
)
shape
=
(
paddle
.
shape
(
x
)[
0
],
)
+
(
1
,
)
*
(
x
.
ndim
-
1
)
random_tensor
=
keep_prob
+
paddle
.
rand
(
shape
,
dtype
=
x
.
dtype
)
random_tensor
=
paddle
.
floor
(
random_tensor
)
# binarize
output
=
x
.
divide
(
keep_prob
)
*
random_tensor
return
output
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
=
3
,
stride
=
1
,
padding
=
0
,
bias_attr
=
False
,
groups
=
1
,
act
=
nn
.
GELU
):
super
().
__init__
()
self
.
conv
=
nn
.
Conv2D
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
paddle
.
ParamAttr
(
initializer
=
nn
.
initializer
.
KaimingUniform
()),
bias_attr
=
bias_attr
)
self
.
norm
=
nn
.
BatchNorm2D
(
out_channels
)
self
.
act
=
act
()
def
forward
(
self
,
inputs
):
out
=
self
.
conv
(
inputs
)
out
=
self
.
norm
(
out
)
out
=
self
.
act
(
out
)
return
out
class
DropPath
(
nn
.
Layer
):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""
def
__init__
(
self
,
drop_prob
=
None
):
super
(
DropPath
,
self
).
__init__
()
self
.
drop_prob
=
drop_prob
def
forward
(
self
,
x
):
return
drop_path
(
x
,
self
.
drop_prob
,
self
.
training
)
class
Identity
(
nn
.
Layer
):
def
__init__
(
self
):
super
(
Identity
,
self
).
__init__
()
def
forward
(
self
,
input
):
return
input
class
Mlp
(
nn
.
Layer
):
def
__init__
(
self
,
in_features
,
hidden_features
=
None
,
out_features
=
None
,
act_layer
=
nn
.
GELU
,
drop
=
0.
):
super
().
__init__
()
out_features
=
out_features
or
in_features
hidden_features
=
hidden_features
or
in_features
self
.
fc1
=
nn
.
Linear
(
in_features
,
hidden_features
)
self
.
act
=
act_layer
()
self
.
fc2
=
nn
.
Linear
(
hidden_features
,
out_features
)
self
.
drop
=
nn
.
Dropout
(
drop
)
def
forward
(
self
,
x
):
x
=
self
.
fc1
(
x
)
x
=
self
.
act
(
x
)
x
=
self
.
drop
(
x
)
x
=
self
.
fc2
(
x
)
x
=
self
.
drop
(
x
)
return
x
class
ConvMixer
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
num_heads
=
8
,
HW
=
[
8
,
25
],
local_k
=
[
3
,
3
],
):
super
().
__init__
()
self
.
HW
=
HW
self
.
dim
=
dim
self
.
local_mixer
=
nn
.
Conv2D
(
dim
,
dim
,
local_k
,
1
,
[
local_k
[
0
]
//
2
,
local_k
[
1
]
//
2
],
groups
=
num_heads
,
weight_attr
=
ParamAttr
(
initializer
=
KaimingNormal
()))
def
forward
(
self
,
x
):
h
=
self
.
HW
[
0
]
w
=
self
.
HW
[
1
]
x
=
x
.
transpose
([
0
,
2
,
1
]).
reshape
([
0
,
self
.
dim
,
h
,
w
])
x
=
self
.
local_mixer
(
x
)
x
=
x
.
flatten
(
2
).
transpose
([
0
,
2
,
1
])
return
x
class
Attention
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
num_heads
=
8
,
mixer
=
'Global'
,
HW
=
[
8
,
25
],
local_k
=
[
7
,
11
],
qkv_bias
=
False
,
qk_scale
=
None
,
attn_drop
=
0.
,
proj_drop
=
0.
):
super
().
__init__
()
self
.
num_heads
=
num_heads
head_dim
=
dim
//
num_heads
self
.
scale
=
qk_scale
or
head_dim
**-
0.5
self
.
qkv
=
nn
.
Linear
(
dim
,
dim
*
3
,
bias_attr
=
qkv_bias
)
self
.
attn_drop
=
nn
.
Dropout
(
attn_drop
)
self
.
proj
=
nn
.
Linear
(
dim
,
dim
)
self
.
proj_drop
=
nn
.
Dropout
(
proj_drop
)
self
.
HW
=
HW
if
HW
is
not
None
:
H
=
HW
[
0
]
W
=
HW
[
1
]
self
.
N
=
H
*
W
self
.
C
=
dim
if
mixer
==
'Local'
and
HW
is
not
None
:
hk
=
local_k
[
0
]
wk
=
local_k
[
1
]
mask
=
paddle
.
ones
([
H
*
W
,
H
+
hk
-
1
,
W
+
wk
-
1
],
dtype
=
'float32'
)
for
h
in
range
(
0
,
H
):
for
w
in
range
(
0
,
W
):
mask
[
h
*
W
+
w
,
h
:
h
+
hk
,
w
:
w
+
wk
]
=
0.
mask_paddle
=
mask
[:,
hk
//
2
:
H
+
hk
//
2
,
wk
//
2
:
W
+
wk
//
2
].
flatten
(
1
)
mask_inf
=
paddle
.
full
([
H
*
W
,
H
*
W
],
'-inf'
,
dtype
=
'float32'
)
mask
=
paddle
.
where
(
mask_paddle
<
1
,
mask_paddle
,
mask_inf
)
self
.
mask
=
mask
.
unsqueeze
([
0
,
1
])
self
.
mixer
=
mixer
def
forward
(
self
,
x
):
if
self
.
HW
is
not
None
:
N
=
self
.
N
C
=
self
.
C
else
:
_
,
N
,
C
=
x
.
shape
qkv
=
self
.
qkv
(
x
).
reshape
((
0
,
N
,
3
,
self
.
num_heads
,
C
//
self
.
num_heads
)).
transpose
((
2
,
0
,
3
,
1
,
4
))
q
,
k
,
v
=
qkv
[
0
]
*
self
.
scale
,
qkv
[
1
],
qkv
[
2
]
attn
=
(
q
.
matmul
(
k
.
transpose
((
0
,
1
,
3
,
2
))))
if
self
.
mixer
==
'Local'
:
attn
+=
self
.
mask
attn
=
nn
.
functional
.
softmax
(
attn
,
axis
=-
1
)
attn
=
self
.
attn_drop
(
attn
)
x
=
(
attn
.
matmul
(
v
)).
transpose
((
0
,
2
,
1
,
3
)).
reshape
((
0
,
N
,
C
))
x
=
self
.
proj
(
x
)
x
=
self
.
proj_drop
(
x
)
return
x
class
Block
(
nn
.
Layer
):
def
__init__
(
self
,
dim
,
num_heads
,
mixer
=
'Global'
,
local_mixer
=
[
7
,
11
],
HW
=
[
8
,
25
],
mlp_ratio
=
4.
,
qkv_bias
=
False
,
qk_scale
=
None
,
drop
=
0.
,
attn_drop
=
0.
,
drop_path
=
0.
,
act_layer
=
nn
.
GELU
,
norm_layer
=
'nn.LayerNorm'
,
epsilon
=
1e-6
,
prenorm
=
True
):
super
().
__init__
()
if
isinstance
(
norm_layer
,
str
):
self
.
norm1
=
eval
(
norm_layer
)(
dim
,
epsilon
=
epsilon
)
else
:
self
.
norm1
=
norm_layer
(
dim
)
if
mixer
==
'Global'
or
mixer
==
'Local'
:
self
.
mixer
=
Attention
(
dim
,
num_heads
=
num_heads
,
mixer
=
mixer
,
HW
=
HW
,
local_k
=
local_mixer
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
attn_drop
=
attn_drop
,
proj_drop
=
drop
)
elif
mixer
==
'Conv'
:
self
.
mixer
=
ConvMixer
(
dim
,
num_heads
=
num_heads
,
HW
=
HW
,
local_k
=
local_mixer
)
else
:
raise
TypeError
(
"The mixer must be one of [Global, Local, Conv]"
)
self
.
drop_path
=
DropPath
(
drop_path
)
if
drop_path
>
0.
else
Identity
()
if
isinstance
(
norm_layer
,
str
):
self
.
norm2
=
eval
(
norm_layer
)(
dim
,
epsilon
=
epsilon
)
else
:
self
.
norm2
=
norm_layer
(
dim
)
mlp_hidden_dim
=
int
(
dim
*
mlp_ratio
)
self
.
mlp_ratio
=
mlp_ratio
self
.
mlp
=
Mlp
(
in_features
=
dim
,
hidden_features
=
mlp_hidden_dim
,
act_layer
=
act_layer
,
drop
=
drop
)
self
.
prenorm
=
prenorm
def
forward
(
self
,
x
):
if
self
.
prenorm
:
x
=
self
.
norm1
(
x
+
self
.
drop_path
(
self
.
mixer
(
x
)))
x
=
self
.
norm2
(
x
+
self
.
drop_path
(
self
.
mlp
(
x
)))
else
:
x
=
x
+
self
.
drop_path
(
self
.
mixer
(
self
.
norm1
(
x
)))
x
=
x
+
self
.
drop_path
(
self
.
mlp
(
self
.
norm2
(
x
)))
return
x
class
PatchEmbed
(
nn
.
Layer
):
""" Image to Patch Embedding
"""
def
__init__
(
self
,
img_size
=
[
32
,
100
],
in_channels
=
3
,
embed_dim
=
768
,
sub_num
=
2
):
super
().
__init__
()
num_patches
=
(
img_size
[
1
]
//
(
2
**
sub_num
))
*
\
(
img_size
[
0
]
//
(
2
**
sub_num
))
self
.
img_size
=
img_size
self
.
num_patches
=
num_patches
self
.
embed_dim
=
embed_dim
self
.
norm
=
None
if
sub_num
==
2
:
self
.
proj
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
embed_dim
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
act
=
nn
.
GELU
,
bias_attr
=
None
),
ConvBNLayer
(
in_channels
=
embed_dim
//
2
,
out_channels
=
embed_dim
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
act
=
nn
.
GELU
,
bias_attr
=
None
))
if
sub_num
==
3
:
self
.
proj
=
nn
.
Sequential
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
embed_dim
//
4
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
act
=
nn
.
GELU
,
bias_attr
=
None
),
ConvBNLayer
(
in_channels
=
embed_dim
//
4
,
out_channels
=
embed_dim
//
2
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
act
=
nn
.
GELU
,
bias_attr
=
None
),
ConvBNLayer
(
in_channels
=
embed_dim
//
2
,
out_channels
=
embed_dim
,
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
act
=
nn
.
GELU
,
bias_attr
=
None
))
def
forward
(
self
,
x
):
B
,
C
,
H
,
W
=
x
.
shape
assert
H
==
self
.
img_size
[
0
]
and
W
==
self
.
img_size
[
1
],
\
f
"Input image size (
{
H
}
*
{
W
}
) doesn't match model (
{
self
.
img_size
[
0
]
}
*
{
self
.
img_size
[
1
]
}
)."
x
=
self
.
proj
(
x
).
flatten
(
2
).
transpose
((
0
,
2
,
1
))
return
x
class
SubSample
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
types
=
'Pool'
,
stride
=
[
2
,
1
],
sub_norm
=
'nn.LayerNorm'
,
act
=
None
):
super
().
__init__
()
self
.
types
=
types
if
types
==
'Pool'
:
self
.
avgpool
=
nn
.
AvgPool2D
(
kernel_size
=
[
3
,
5
],
stride
=
stride
,
padding
=
[
1
,
2
])
self
.
maxpool
=
nn
.
MaxPool2D
(
kernel_size
=
[
3
,
5
],
stride
=
stride
,
padding
=
[
1
,
2
])
self
.
proj
=
nn
.
Linear
(
in_channels
,
out_channels
)
else
:
self
.
conv
=
nn
.
Conv2D
(
in_channels
,
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
padding
=
1
,
weight_attr
=
ParamAttr
(
initializer
=
KaimingNormal
()))
self
.
norm
=
eval
(
sub_norm
)(
out_channels
)
if
act
is
not
None
:
self
.
act
=
act
()
else
:
self
.
act
=
None
def
forward
(
self
,
x
):
if
self
.
types
==
'Pool'
:
x1
=
self
.
avgpool
(
x
)
x2
=
self
.
maxpool
(
x
)
x
=
(
x1
+
x2
)
*
0.5
out
=
self
.
proj
(
x
.
flatten
(
2
).
transpose
((
0
,
2
,
1
)))
else
:
x
=
self
.
conv
(
x
)
out
=
x
.
flatten
(
2
).
transpose
((
0
,
2
,
1
))
out
=
self
.
norm
(
out
)
if
self
.
act
is
not
None
:
out
=
self
.
act
(
out
)
return
out
class
SVTRNet
(
nn
.
Layer
):
def
__init__
(
self
,
img_size
=
[
32
,
100
],
in_channels
=
3
,
embed_dim
=
[
64
,
128
,
256
],
depth
=
[
3
,
6
,
3
],
num_heads
=
[
2
,
4
,
8
],
mixer
=
[
'Local'
]
*
6
+
[
'Global'
]
*
6
,
# Local atten, Global atten, Conv
local_mixer
=
[[
7
,
11
],
[
7
,
11
],
[
7
,
11
]],
patch_merging
=
'Conv'
,
# Conv, Pool, None
mlp_ratio
=
4
,
qkv_bias
=
True
,
qk_scale
=
None
,
drop_rate
=
0.
,
last_drop
=
0.1
,
attn_drop_rate
=
0.
,
drop_path_rate
=
0.1
,
norm_layer
=
'nn.LayerNorm'
,
sub_norm
=
'nn.LayerNorm'
,
epsilon
=
1e-6
,
out_channels
=
192
,
out_char_num
=
25
,
block_unit
=
'Block'
,
act
=
'nn.GELU'
,
last_stage
=
True
,
sub_num
=
2
,
prenorm
=
True
,
use_lenhead
=
False
,
**
kwargs
):
super
().
__init__
()
self
.
img_size
=
img_size
self
.
embed_dim
=
embed_dim
self
.
out_channels
=
out_channels
self
.
prenorm
=
prenorm
patch_merging
=
None
if
patch_merging
!=
'Conv'
and
patch_merging
!=
'Pool'
else
patch_merging
self
.
patch_embed
=
PatchEmbed
(
img_size
=
img_size
,
in_channels
=
in_channels
,
embed_dim
=
embed_dim
[
0
],
sub_num
=
sub_num
)
num_patches
=
self
.
patch_embed
.
num_patches
self
.
HW
=
[
img_size
[
0
]
//
(
2
**
sub_num
),
img_size
[
1
]
//
(
2
**
sub_num
)]
self
.
pos_embed
=
self
.
create_parameter
(
shape
=
[
1
,
num_patches
,
embed_dim
[
0
]],
default_initializer
=
zeros_
)
self
.
add_parameter
(
"pos_embed"
,
self
.
pos_embed
)
self
.
pos_drop
=
nn
.
Dropout
(
p
=
drop_rate
)
Block_unit
=
eval
(
block_unit
)
dpr
=
np
.
linspace
(
0
,
drop_path_rate
,
sum
(
depth
))
self
.
blocks1
=
nn
.
LayerList
([
Block_unit
(
dim
=
embed_dim
[
0
],
num_heads
=
num_heads
[
0
],
mixer
=
mixer
[
0
:
depth
[
0
]][
i
],
HW
=
self
.
HW
,
local_mixer
=
local_mixer
[
0
],
mlp_ratio
=
mlp_ratio
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
drop
=
drop_rate
,
act_layer
=
eval
(
act
),
attn_drop
=
attn_drop_rate
,
drop_path
=
dpr
[
0
:
depth
[
0
]][
i
],
norm_layer
=
norm_layer
,
epsilon
=
epsilon
,
prenorm
=
prenorm
)
for
i
in
range
(
depth
[
0
])
])
if
patch_merging
is
not
None
:
self
.
sub_sample1
=
SubSample
(
embed_dim
[
0
],
embed_dim
[
1
],
sub_norm
=
sub_norm
,
stride
=
[
2
,
1
],
types
=
patch_merging
)
HW
=
[
self
.
HW
[
0
]
//
2
,
self
.
HW
[
1
]]
else
:
HW
=
self
.
HW
self
.
patch_merging
=
patch_merging
self
.
blocks2
=
nn
.
LayerList
([
Block_unit
(
dim
=
embed_dim
[
1
],
num_heads
=
num_heads
[
1
],
mixer
=
mixer
[
depth
[
0
]:
depth
[
0
]
+
depth
[
1
]][
i
],
HW
=
HW
,
local_mixer
=
local_mixer
[
1
],
mlp_ratio
=
mlp_ratio
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
drop
=
drop_rate
,
act_layer
=
eval
(
act
),
attn_drop
=
attn_drop_rate
,
drop_path
=
dpr
[
depth
[
0
]:
depth
[
0
]
+
depth
[
1
]][
i
],
norm_layer
=
norm_layer
,
epsilon
=
epsilon
,
prenorm
=
prenorm
)
for
i
in
range
(
depth
[
1
])
])
if
patch_merging
is
not
None
:
self
.
sub_sample2
=
SubSample
(
embed_dim
[
1
],
embed_dim
[
2
],
sub_norm
=
sub_norm
,
stride
=
[
2
,
1
],
types
=
patch_merging
)
HW
=
[
self
.
HW
[
0
]
//
4
,
self
.
HW
[
1
]]
else
:
HW
=
self
.
HW
self
.
blocks3
=
nn
.
LayerList
([
Block_unit
(
dim
=
embed_dim
[
2
],
num_heads
=
num_heads
[
2
],
mixer
=
mixer
[
depth
[
0
]
+
depth
[
1
]:][
i
],
HW
=
HW
,
local_mixer
=
local_mixer
[
2
],
mlp_ratio
=
mlp_ratio
,
qkv_bias
=
qkv_bias
,
qk_scale
=
qk_scale
,
drop
=
drop_rate
,
act_layer
=
eval
(
act
),
attn_drop
=
attn_drop_rate
,
drop_path
=
dpr
[
depth
[
0
]
+
depth
[
1
]:][
i
],
norm_layer
=
norm_layer
,
epsilon
=
epsilon
,
prenorm
=
prenorm
)
for
i
in
range
(
depth
[
2
])
])
self
.
last_stage
=
last_stage
if
last_stage
:
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2D
([
1
,
out_char_num
])
self
.
last_conv
=
nn
.
Conv2D
(
in_channels
=
embed_dim
[
2
],
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias_attr
=
False
)
self
.
hardswish
=
nn
.
Hardswish
()
self
.
dropout
=
nn
.
Dropout
(
p
=
last_drop
,
mode
=
"downscale_in_infer"
)
if
not
prenorm
:
self
.
norm
=
eval
(
norm_layer
)(
embed_dim
[
-
1
],
epsilon
=
epsilon
)
self
.
use_lenhead
=
use_lenhead
if
use_lenhead
:
self
.
len_conv
=
nn
.
Linear
(
embed_dim
[
2
],
self
.
out_channels
)
self
.
hardswish_len
=
nn
.
Hardswish
()
self
.
dropout_len
=
nn
.
Dropout
(
p
=
last_drop
,
mode
=
"downscale_in_infer"
)
trunc_normal_
(
self
.
pos_embed
)
self
.
apply
(
self
.
_init_weights
)
def
_init_weights
(
self
,
m
):
if
isinstance
(
m
,
nn
.
Linear
):
trunc_normal_
(
m
.
weight
)
if
isinstance
(
m
,
nn
.
Linear
)
and
m
.
bias
is
not
None
:
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
LayerNorm
):
zeros_
(
m
.
bias
)
ones_
(
m
.
weight
)
def
forward_features
(
self
,
x
):
x
=
self
.
patch_embed
(
x
)
x
=
x
+
self
.
pos_embed
x
=
self
.
pos_drop
(
x
)
for
blk
in
self
.
blocks1
:
x
=
blk
(
x
)
if
self
.
patch_merging
is
not
None
:
x
=
self
.
sub_sample1
(
x
.
transpose
([
0
,
2
,
1
]).
reshape
(
[
0
,
self
.
embed_dim
[
0
],
self
.
HW
[
0
],
self
.
HW
[
1
]]))
for
blk
in
self
.
blocks2
:
x
=
blk
(
x
)
if
self
.
patch_merging
is
not
None
:
x
=
self
.
sub_sample2
(
x
.
transpose
([
0
,
2
,
1
]).
reshape
(
[
0
,
self
.
embed_dim
[
1
],
self
.
HW
[
0
]
//
2
,
self
.
HW
[
1
]]))
for
blk
in
self
.
blocks3
:
x
=
blk
(
x
)
if
not
self
.
prenorm
:
x
=
self
.
norm
(
x
)
return
x
def
forward
(
self
,
x
):
x
=
self
.
forward_features
(
x
)
if
self
.
use_lenhead
:
len_x
=
self
.
len_conv
(
x
.
mean
(
1
))
len_x
=
self
.
dropout_len
(
self
.
hardswish_len
(
len_x
))
if
self
.
last_stage
:
if
self
.
patch_merging
is
not
None
:
h
=
self
.
HW
[
0
]
//
4
else
:
h
=
self
.
HW
[
0
]
x
=
self
.
avg_pool
(
x
.
transpose
([
0
,
2
,
1
]).
reshape
(
[
0
,
self
.
embed_dim
[
2
],
h
,
self
.
HW
[
1
]]))
x
=
self
.
last_conv
(
x
)
x
=
self
.
hardswish
(
x
)
x
=
self
.
dropout
(
x
)
if
self
.
use_lenhead
:
return
x
,
len_x
return
x
ppocr/modeling/backbones/vqa_layoutlm.py
0 → 100755
View file @
f1506916
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
from
paddle
import
nn
from
paddlenlp.transformers
import
LayoutXLMModel
,
LayoutXLMForTokenClassification
,
LayoutXLMForRelationExtraction
from
paddlenlp.transformers
import
LayoutLMModel
,
LayoutLMForTokenClassification
from
paddlenlp.transformers
import
LayoutLMv2Model
,
LayoutLMv2ForTokenClassification
,
LayoutLMv2ForRelationExtraction
__all__
=
[
"LayoutXLMForSer"
,
'LayoutLMForSer'
]
pretrained_model_dict
=
{
LayoutXLMModel
:
'layoutxlm-base-uncased'
,
LayoutLMModel
:
'layoutlm-base-uncased'
,
LayoutLMv2Model
:
'layoutlmv2-base-uncased'
}
class
NLPBaseModel
(
nn
.
Layer
):
def
__init__
(
self
,
base_model_class
,
model_class
,
type
=
'ser'
,
pretrained
=
True
,
checkpoints
=
None
,
**
kwargs
):
super
(
NLPBaseModel
,
self
).
__init__
()
if
checkpoints
is
not
None
:
self
.
model
=
model_class
.
from_pretrained
(
checkpoints
)
else
:
pretrained_model_name
=
pretrained_model_dict
[
base_model_class
]
if
pretrained
:
base_model
=
base_model_class
.
from_pretrained
(
pretrained_model_name
)
else
:
base_model
=
base_model_class
(
**
base_model_class
.
pretrained_init_configuration
[
pretrained_model_name
])
if
type
==
'ser'
:
self
.
model
=
model_class
(
base_model
,
num_classes
=
kwargs
[
'num_classes'
],
dropout
=
None
)
else
:
self
.
model
=
model_class
(
base_model
,
dropout
=
None
)
self
.
out_channels
=
1
class
LayoutLMForSer
(
NLPBaseModel
):
def
__init__
(
self
,
num_classes
,
pretrained
=
True
,
checkpoints
=
None
,
**
kwargs
):
super
(
LayoutLMForSer
,
self
).
__init__
(
LayoutLMModel
,
LayoutLMForTokenClassification
,
'ser'
,
pretrained
,
checkpoints
,
num_classes
=
num_classes
)
def
forward
(
self
,
x
):
x
=
self
.
model
(
input_ids
=
x
[
0
],
bbox
=
x
[
2
],
attention_mask
=
x
[
4
],
token_type_ids
=
x
[
5
],
position_ids
=
None
,
output_hidden_states
=
False
)
return
x
class
LayoutLMv2ForSer
(
NLPBaseModel
):
def
__init__
(
self
,
num_classes
,
pretrained
=
True
,
checkpoints
=
None
,
**
kwargs
):
super
(
LayoutLMv2ForSer
,
self
).
__init__
(
LayoutLMv2Model
,
LayoutLMv2ForTokenClassification
,
'ser'
,
pretrained
,
checkpoints
,
num_classes
=
num_classes
)
def
forward
(
self
,
x
):
x
=
self
.
model
(
input_ids
=
x
[
0
],
bbox
=
x
[
2
],
image
=
x
[
3
],
attention_mask
=
x
[
4
],
token_type_ids
=
x
[
5
],
position_ids
=
None
,
head_mask
=
None
,
labels
=
None
)
return
x
[
0
]
class
LayoutXLMForSer
(
NLPBaseModel
):
def
__init__
(
self
,
num_classes
,
pretrained
=
True
,
checkpoints
=
None
,
**
kwargs
):
super
(
LayoutXLMForSer
,
self
).
__init__
(
LayoutXLMModel
,
LayoutXLMForTokenClassification
,
'ser'
,
pretrained
,
checkpoints
,
num_classes
=
num_classes
)
def
forward
(
self
,
x
):
x
=
self
.
model
(
input_ids
=
x
[
0
],
bbox
=
x
[
2
],
image
=
x
[
3
],
attention_mask
=
x
[
4
],
token_type_ids
=
x
[
5
],
position_ids
=
None
,
head_mask
=
None
,
labels
=
None
)
return
x
[
0
]
class
LayoutLMv2ForRe
(
NLPBaseModel
):
def
__init__
(
self
,
pretrained
=
True
,
checkpoints
=
None
,
**
kwargs
):
super
(
LayoutLMv2ForRe
,
self
).
__init__
(
LayoutLMv2Model
,
LayoutLMv2ForRelationExtraction
,
're'
,
pretrained
,
checkpoints
)
def
forward
(
self
,
x
):
x
=
self
.
model
(
input_ids
=
x
[
0
],
bbox
=
x
[
1
],
labels
=
None
,
image
=
x
[
2
],
attention_mask
=
x
[
3
],
token_type_ids
=
x
[
4
],
position_ids
=
None
,
head_mask
=
None
,
entities
=
x
[
5
],
relations
=
x
[
6
])
return
x
class
LayoutXLMForRe
(
NLPBaseModel
):
def
__init__
(
self
,
pretrained
=
True
,
checkpoints
=
None
,
**
kwargs
):
super
(
LayoutXLMForRe
,
self
).
__init__
(
LayoutXLMModel
,
LayoutXLMForRelationExtraction
,
're'
,
pretrained
,
checkpoints
)
def
forward
(
self
,
x
):
x
=
self
.
model
(
input_ids
=
x
[
0
],
bbox
=
x
[
1
],
labels
=
None
,
image
=
x
[
2
],
attention_mask
=
x
[
3
],
token_type_ids
=
x
[
4
],
position_ids
=
None
,
head_mask
=
None
,
entities
=
x
[
5
],
relations
=
x
[
6
])
return
x
ppocr/modeling/heads/__init__.py
0 → 100755
View file @
f1506916
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'build_head'
]
def
build_head
(
config
):
# det head
from
.det_db_head
import
DBHead
from
.det_east_head
import
EASTHead
from
.det_sast_head
import
SASTHead
from
.det_pse_head
import
PSEHead
from
.det_fce_head
import
FCEHead
from
.e2e_pg_head
import
PGHead
# rec head
from
.rec_ctc_head
import
CTCHead
from
.rec_att_head
import
AttentionHead
from
.rec_srn_head
import
SRNHead
from
.rec_nrtr_head
import
Transformer
from
.rec_sar_head
import
SARHead
from
.rec_aster_head
import
AsterHead
from
.rec_pren_head
import
PRENHead
from
.rec_multi_head
import
MultiHead
# cls head
from
.cls_head
import
ClsHead
#kie head
from
.kie_sdmgr_head
import
SDMGRHead
from
.table_att_head
import
TableAttentionHead
support_dict
=
[
'DBHead'
,
'PSEHead'
,
'FCEHead'
,
'EASTHead'
,
'SASTHead'
,
'CTCHead'
,
'ClsHead'
,
'AttentionHead'
,
'SRNHead'
,
'PGHead'
,
'Transformer'
,
'TableAttentionHead'
,
'SARHead'
,
'AsterHead'
,
'SDMGRHead'
,
'PRENHead'
,
'MultiHead'
]
#table head
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
ppocr/modeling/heads/__pycache__/__init__.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/cls_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/det_db_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/det_east_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/det_fce_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/det_pse_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/det_sast_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/e2e_pg_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/kie_sdmgr_head.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
ppocr/modeling/heads/__pycache__/multiheadAttention.cpython-37.pyc
0 → 100644
View file @
f1506916
File added
Prev
1
…
10
11
12
13
14
15
16
17
18
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment