Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
4ffb5b62
Unverified
Commit
4ffb5b62
authored
Oct 13, 2020
by
zhoujun
Committed by
GitHub
Oct 13, 2020
Browse files
Merge pull request #924 from WenmuZhou/dygraph
Dygraph
parents
bc93c549
aad3093a
Changes
371
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
1489 additions
and
0 deletions
+1489
-0
ppocr/modeling/architectures/__init__.py
ppocr/modeling/architectures/__init__.py
+16
-0
ppocr/modeling/architectures/model.py
ppocr/modeling/architectures/model.py
+129
-0
ppocr/modeling/backbones/__init__.py
ppocr/modeling/backbones/__init__.py
+36
-0
ppocr/modeling/backbones/det_mobilenet_v3.py
ppocr/modeling/backbones/det_mobilenet_v3.py
+278
-0
ppocr/modeling/backbones/det_resnet_vd.py
ppocr/modeling/backbones/det_resnet_vd.py
+329
-0
ppocr/modeling/backbones/rec_mobilenet_v3.py
ppocr/modeling/backbones/rec_mobilenet_v3.py
+148
-0
ppocr/modeling/backbones/rec_resnet_vd.py
ppocr/modeling/backbones/rec_resnet_vd.py
+312
-0
ppocr/modeling/heads/__init__.py
ppocr/modeling/heads/__init__.py
+30
-0
ppocr/modeling/heads/det_db_head.py
ppocr/modeling/heads/det_db_head.py
+128
-0
ppocr/modeling/heads/rec_ctc_head.py
ppocr/modeling/heads/rec_ctc_head.py
+51
-0
ppocr/modeling/losses/__init__.py
ppocr/modeling/losses/__init__.py
+32
-0
No files found.
Too many changes to show.
To preserve performance only
371 of 371+
files are displayed.
Plain diff
Email patch
ppocr/modeling/architectures/__init__.py
0 → 100755
View file @
4ffb5b62
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
.model
import
Model
__all__
=
[
'Model'
]
\ No newline at end of file
ppocr/modeling/architectures/model.py
0 → 100644
View file @
4ffb5b62
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
os
,
sys
__dir__
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
sys
.
path
.
append
(
__dir__
)
sys
.
path
.
append
(
'/home/zhoujun20/PaddleOCR'
)
import
paddle
from
paddle
import
nn
from
ppocr.modeling.transform
import
build_transform
from
ppocr.modeling.backbones
import
build_backbone
from
ppocr.modeling.necks
import
build_neck
from
ppocr.modeling.heads
import
build_head
__all__
=
[
'Model'
]
class
Model
(
nn
.
Layer
):
def
__init__
(
self
,
config
):
"""
Detection module for OCR.
args:
config (dict): the super parameters for module.
"""
super
(
Model
,
self
).
__init__
()
algorithm
=
config
[
'algorithm'
]
self
.
type
=
config
[
'type'
]
self
.
model_name
=
'{}_{}'
.
format
(
self
.
type
,
algorithm
)
in_channels
=
config
.
get
(
'in_channels'
,
3
)
# build transfrom,
# for rec, transfrom can be TPS,None
# for det and cls, transfrom shoule to be None,
# if you make model differently, you can use transfrom in det and cls
if
'Transform'
not
in
config
or
config
[
'Transform'
]
is
None
:
self
.
use_transform
=
False
else
:
self
.
use_transform
=
True
config
[
'Transform'
][
'in_channels'
]
=
in_channels
self
.
transform
=
build_transform
(
config
[
'Transform'
])
in_channels
=
self
.
transform
.
out_channels
# build backbone, backbone is need for del, rec and cls
config
[
"Backbone"
][
'in_channels'
]
=
in_channels
self
.
backbone
=
build_backbone
(
config
[
"Backbone"
],
self
.
type
)
in_channels
=
self
.
backbone
.
out_channels
# build neck
# for rec, neck can be cnn,rnn or reshape(None)
# for det, neck can be FPN, BIFPN and so on.
# for cls, neck should be none
if
'Neck'
not
in
config
or
config
[
'Neck'
]
is
None
:
self
.
use_neck
=
False
else
:
self
.
use_neck
=
True
config
[
'Neck'
][
'in_channels'
]
=
in_channels
self
.
neck
=
build_neck
(
config
[
'Neck'
])
in_channels
=
self
.
neck
.
out_channels
# # build head, head is need for del, rec and cls
config
[
"Head"
][
'in_channels'
]
=
in_channels
self
.
head
=
build_head
(
config
[
"Head"
])
# @paddle.jit.to_static
def
forward
(
self
,
x
):
if
self
.
use_transform
:
x
=
self
.
transform
(
x
)
x
=
self
.
backbone
(
x
)
if
self
.
use_neck
:
x
=
self
.
neck
(
x
)
x
=
self
.
head
(
x
)
return
x
def
check_static
():
import
numpy
as
np
from
ppocr.utils.save_load
import
load_dygraph_pretrain
from
ppocr.utils.logging
import
get_logger
from
tools
import
program
config
=
program
.
load_config
(
'configs/det/det_r50_vd_db.yml'
)
# import cv2
# data = cv2.imread('doc/imgs/1.jpg')
# data = normalize(data)
logger
=
get_logger
()
data
=
np
.
zeros
((
1
,
3
,
640
,
640
),
dtype
=
np
.
float32
)
paddle
.
disable_static
()
config
[
'Architecture'
][
'in_channels'
]
=
3
config
[
'Architecture'
][
"Head"
][
'out_channels'
]
=
6624
model
=
Model
(
config
[
'Architecture'
])
model
.
eval
()
load_dygraph_pretrain
(
model
,
logger
,
'/Users/zhoujun20/Desktop/code/PaddleOCR/db/db'
,
load_static_weights
=
True
)
x
=
paddle
.
to_variable
(
data
)
y
=
model
(
x
)
for
y1
in
y
:
print
(
y1
.
shape
)
#
# # from matplotlib import pyplot as plt
# # plt.imshow(y.numpy())
# # plt.show()
static_out
=
np
.
load
(
'/Users/zhoujun20/Desktop/code/PaddleOCR/db/db.npy'
)
diff
=
y
.
numpy
()
-
static_out
print
(
y
.
shape
,
static_out
.
shape
,
diff
.
mean
())
if
__name__
==
'__main__'
:
check_static
()
ppocr/modeling/backbones/__init__.py
0 → 100755
View file @
4ffb5b62
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'build_backbone'
]
def
build_backbone
(
config
,
model_type
):
if
model_type
==
'det'
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.det_resnet_vd
import
ResNet
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNet_SAST'
]
elif
model_type
==
'rec'
:
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_resnet_vd
import
ResNet
support_dict
=
[
'MobileNetV3'
,
'ResNet'
,
'ResNet_FPN'
]
else
:
raise
NotImplementedError
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'when model typs is {}, backbone only support {}'
.
format
(
model_type
,
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
ppocr/modeling/backbones/det_mobilenet_v3.py
0 → 100755
View file @
4ffb5b62
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
__all__
=
[
'MobileNetV3'
]
def
make_divisible
(
v
,
divisor
=
8
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
'large'
,
scale
=
0.5
,
**
kwargs
):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
super
(
MobileNetV3
,
self
).
__init__
()
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
1
],
[
3
,
64
,
24
,
False
,
'relu'
,
2
],
[
3
,
72
,
24
,
False
,
'relu'
,
1
],
[
5
,
72
,
40
,
True
,
'relu'
,
2
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
3
,
240
,
80
,
False
,
'hard_swish'
,
2
],
[
3
,
200
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
184
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
184
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
480
,
112
,
True
,
'hard_swish'
,
1
],
[
3
,
672
,
112
,
True
,
'hard_swish'
,
1
],
[
5
,
672
,
160
,
True
,
'hard_swish'
,
2
],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
2
],
[
3
,
72
,
24
,
False
,
'relu'
,
2
],
[
3
,
88
,
24
,
False
,
'relu'
,
1
],
[
5
,
96
,
40
,
True
,
'hard_swish'
,
2
],
[
5
,
240
,
40
,
True
,
'hard_swish'
,
1
],
[
5
,
240
,
40
,
True
,
'hard_swish'
,
1
],
[
5
,
120
,
48
,
True
,
'hard_swish'
,
1
],
[
5
,
144
,
48
,
True
,
'hard_swish'
,
1
],
[
5
,
288
,
96
,
True
,
'hard_swish'
,
2
],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
scale
in
supported_scale
,
\
"supported scale are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hard_swish'
,
name
=
'conv1'
)
self
.
stages
=
[]
self
.
out_channels
=
[]
block_list
=
[]
i
=
0
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
cfg
:
if
s
==
2
and
i
>
2
:
self
.
out_channels
.
append
(
inplanes
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
block_list
=
[]
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
block_list
.
append
(
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hard_swish'
,
name
=
'conv_last'
))
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
for
i
,
stage
in
enumerate
(
self
.
stages
):
self
.
add_sublayer
(
sublayer
=
stage
,
name
=
"stage{}"
.
format
(
i
))
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
out_list
=
[]
for
stage
in
self
.
stages
:
x
=
stage
(
x
)
out_list
.
append
(
x
)
return
out_list
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
'_weights'
),
bias_attr
=
False
)
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
None
,
param_attr
=
ParamAttr
(
name
=
name
+
"_bn_scale"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_bn_offset"
),
moving_mean_name
=
name
+
"_bn_mean"
,
moving_variance_name
=
name
+
"_bn_variance"
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
if
self
.
if_act
:
if
self
.
act
==
"relu"
:
x
=
F
.
relu
(
x
)
elif
self
.
act
==
"hard_swish"
:
x
=
F
.
hard_swish
(
x
)
else
:
print
(
"The activation function is selected incorrectly."
)
exit
()
return
x
class
ResidualUnit
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
kernel_size
,
stride
,
use_se
,
act
=
None
,
name
=
''
):
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_se
=
use_se
self
.
expand_conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
mid_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_expand"
)
self
.
bottleneck_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
int
((
kernel_size
-
1
)
//
2
),
groups
=
mid_channels
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_depthwise"
)
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_channels
,
name
=
name
+
"_se"
)
self
.
linear_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
name
+
"_linear"
)
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
x
=
self
.
bottleneck_conv
(
x
)
if
self
.
if_se
:
x
=
self
.
mid_se
(
x
)
x
=
self
.
linear_conv
(
x
)
if
self
.
if_shortcut
:
x
=
paddle
.
elementwise_add
(
inputs
,
x
)
return
x
class
SEModule
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
,
name
=
""
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
Pool2D
(
pool_type
=
"avg"
,
global_pooling
=
True
,
use_cudnn
=
False
)
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_1_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_1_offset"
))
self
.
conv2
=
nn
.
Conv2d
(
in_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
weight_attr
=
ParamAttr
(
name
+
"_2_weights"
),
bias_attr
=
ParamAttr
(
name
=
name
+
"_2_offset"
))
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
F
.
hard_sigmoid
(
outputs
)
return
inputs
*
outputs
ppocr/modeling/backbones/det_resnet_vd.py
0 → 100644
View file @
4ffb5b62
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
from
paddle.nn
import
functional
as
F
from
paddle
import
ParamAttr
__all__
=
[
"ResNet"
]
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
50
,
**
kwargs
):
"""
the Resnet backbone network for detection module.
Args:
params(dict): the super parameters for network build
"""
super
(
ResNet
,
self
).
__init__
()
supported_layers
=
{
18
:
{
'depth'
:
[
2
,
2
,
2
,
2
],
'block_class'
:
BasicBlock
},
34
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BasicBlock
},
50
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BottleneckBlock
},
101
:
{
'depth'
:
[
3
,
4
,
23
,
3
],
'block_class'
:
BottleneckBlock
},
152
:
{
'depth'
:
[
3
,
8
,
36
,
3
],
'block_class'
:
BottleneckBlock
},
200
:
{
'depth'
:
[
3
,
12
,
48
,
3
],
'block_class'
:
BottleneckBlock
}
}
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
.
keys
(),
layers
)
is_3x3
=
True
depth
=
supported_layers
[
layers
][
'depth'
]
block_class
=
supported_layers
[
layers
][
'block_class'
]
num_filters
=
[
64
,
128
,
256
,
512
]
conv
=
[]
if
is_3x3
==
False
:
conv
.
append
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
64
,
kernel_size
=
7
,
stride
=
2
,
act
=
'relu'
))
else
:
conv
.
append
(
ConvBNLayer
(
in_channels
=
3
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
2
,
act
=
'relu'
,
name
=
'conv1_1'
))
conv
.
append
(
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_2'
))
conv
.
append
(
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_3'
))
self
.
conv1
=
nn
.
Sequential
(
*
conv
)
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
self
.
stages
=
[]
self
.
out_channels
=
[]
in_ch
=
64
for
block_index
in
range
(
len
(
depth
)):
block_list
=
[]
for
i
in
range
(
depth
[
block_index
]):
if
layers
>=
50
:
if
layers
in
[
101
,
152
,
200
]
and
block_index
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
chr
(
97
+
i
)
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
chr
(
97
+
i
)
block_list
.
append
(
block_class
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block_index
],
stride
=
2
if
i
==
0
and
block_index
!=
0
else
1
,
if_first
=
block_index
==
i
==
0
,
name
=
conv_name
))
in_ch
=
block_list
[
-
1
].
out_channels
self
.
out_channels
.
append
(
in_ch
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
for
i
,
stage
in
enumerate
(
self
.
stages
):
self
.
add_sublayer
(
sublayer
=
stage
,
name
=
"stage{}"
.
format
(
i
))
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
pool
(
x
)
out_list
=
[]
for
stage
in
self
.
stages
:
x
=
stage
(
x
)
out_list
.
append
(
x
)
return
out_list
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ConvBNLayerNew
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayerNew
,
self
).
__init__
()
self
.
pool
=
nn
.
AvgPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
1
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
pool
(
x
)
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ShortCut
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
=
False
):
super
(
ShortCut
,
self
).
__init__
()
self
.
use_conv
=
True
if
in_channels
!=
out_channels
or
stride
!=
1
:
if
if_first
:
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
self
.
conv
=
ConvBNLayerNew
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
elif
if_first
:
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
self
.
use_conv
=
False
def
forward
(
self
,
x
):
if
self
.
use_conv
:
x
=
self
.
conv
(
x
)
return
x
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
stride
=
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
*
4
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
self
.
conv2
(
y
)
y
=
y
+
self
.
short
(
x
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
stride
=
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
y
+
self
.
short
(
x
)
return
F
.
relu
(
y
)
if
__name__
==
'__main__'
:
import
paddle
paddle
.
disable_static
()
x
=
paddle
.
zeros
([
1
,
3
,
640
,
640
])
x
=
paddle
.
to_variable
(
x
)
print
(
x
.
shape
)
net
=
ResNet
(
layers
=
18
)
y
=
net
(
x
)
for
stage
in
y
:
print
(
stage
.
shape
)
# paddle.save(net.state_dict(),'1.pth')
ppocr/modeling/backbones/rec_mobilenet_v3.py
0 → 100644
View file @
4ffb5b62
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddle
import
nn
from
ppocr.modeling.backbones.det_mobilenet_v3
import
ResidualUnit
,
ConvBNLayer
,
make_divisible
__all__
=
[
'MobileNetV3'
]
class
MobileNetV3
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
'small'
,
scale
=
0.5
,
large_stride
=
None
,
small_stride
=
None
,
**
kwargs
):
super
(
MobileNetV3
,
self
).
__init__
()
if
small_stride
is
None
:
small_stride
=
[
2
,
2
,
2
,
2
]
if
large_stride
is
None
:
large_stride
=
[
1
,
2
,
2
,
2
]
assert
isinstance
(
large_stride
,
list
),
"large_stride type must "
\
"be list but got {}"
.
format
(
type
(
large_stride
))
assert
isinstance
(
small_stride
,
list
),
"small_stride type must "
\
"be list but got {}"
.
format
(
type
(
small_stride
))
assert
len
(
large_stride
)
==
4
,
"large_stride length must be "
\
"4 but got {}"
.
format
(
len
(
large_stride
))
assert
len
(
small_stride
)
==
4
,
"small_stride length must be "
\
"4 but got {}"
.
format
(
len
(
small_stride
))
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
'relu'
,
large_stride
[
0
]],
[
3
,
64
,
24
,
False
,
'relu'
,
(
large_stride
[
1
],
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
1
],
[
5
,
72
,
40
,
True
,
'relu'
,
(
large_stride
[
2
],
1
)],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
5
,
120
,
40
,
True
,
'relu'
,
1
],
[
3
,
240
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
200
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
184
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
184
,
80
,
False
,
'hard_swish'
,
1
],
[
3
,
480
,
112
,
True
,
'hard_swish'
,
1
],
[
3
,
672
,
112
,
True
,
'hard_swish'
,
1
],
[
5
,
672
,
160
,
True
,
'hard_swish'
,
(
large_stride
[
3
],
1
)],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
[
5
,
960
,
160
,
True
,
'hard_swish'
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
'relu'
,
(
small_stride
[
0
],
1
)],
[
3
,
72
,
24
,
False
,
'relu'
,
(
small_stride
[
1
],
1
)],
[
3
,
88
,
24
,
False
,
'relu'
,
1
],
[
5
,
96
,
40
,
True
,
'hard_swish'
,
(
small_stride
[
2
],
1
)],
[
5
,
240
,
40
,
True
,
'hard_swish'
,
1
],
[
5
,
240
,
40
,
True
,
'hard_swish'
,
1
],
[
5
,
120
,
48
,
True
,
'hard_swish'
,
1
],
[
5
,
144
,
48
,
True
,
'hard_swish'
,
1
],
[
5
,
288
,
96
,
True
,
'hard_swish'
,
(
small_stride
[
3
],
1
)],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
[
5
,
576
,
96
,
True
,
'hard_swish'
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
scale
in
supported_scale
,
\
"supported scales are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
'hard_swish'
,
name
=
'conv1'
)
i
=
0
block_list
=
[]
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
(
k
,
exp
,
c
,
se
,
nl
,
s
)
in
cfg
:
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
'conv'
+
str
(
i
+
2
)))
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
'hard_swish'
,
name
=
'conv_last'
)
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
blocks
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
pool
(
x
)
return
x
if
__name__
==
'__main__'
:
import
paddle
paddle
.
disable_static
()
x
=
paddle
.
zeros
((
1
,
3
,
32
,
320
))
x
=
paddle
.
to_variable
(
x
)
net
=
MobileNetV3
(
model_name
=
'small'
,
small_stride
=
[
1
,
2
,
2
,
2
])
y
=
net
(
x
)
print
(
y
.
shape
)
ppocr/modeling/backbones/rec_resnet_vd.py
0 → 100644
View file @
4ffb5b62
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
paddle
import
nn
,
ParamAttr
from
paddle.nn
import
functional
as
F
__all__
=
[
"ResNet"
]
class
ResNet
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
=
3
,
layers
=
34
):
super
(
ResNet
,
self
).
__init__
()
supported_layers
=
{
18
:
{
'depth'
:
[
2
,
2
,
2
,
2
],
'block_class'
:
BasicBlock
},
34
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BasicBlock
},
50
:
{
'depth'
:
[
3
,
4
,
6
,
3
],
'block_class'
:
BottleneckBlock
},
101
:
{
'depth'
:
[
3
,
4
,
23
,
3
],
'block_class'
:
BottleneckBlock
},
152
:
{
'depth'
:
[
3
,
8
,
36
,
3
],
'block_class'
:
BottleneckBlock
},
200
:
{
'depth'
:
[
3
,
12
,
48
,
3
],
'block_class'
:
BottleneckBlock
}
}
assert
layers
in
supported_layers
,
\
"supported layers are {} but input layer is {}"
.
format
(
supported_layers
.
keys
(),
layers
)
is_3x3
=
True
num_filters
=
[
64
,
128
,
256
,
512
]
depth
=
supported_layers
[
layers
][
'depth'
]
block_class
=
supported_layers
[
layers
][
'block_class'
]
conv
=
[]
if
is_3x3
==
False
:
conv
.
append
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
64
,
kernel_size
=
7
,
stride
=
1
,
act
=
'relu'
))
else
:
conv
.
append
(
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_1'
))
conv
.
append
(
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
32
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_2'
))
conv
.
append
(
ConvBNLayer
(
in_channels
=
32
,
out_channels
=
64
,
kernel_size
=
3
,
stride
=
1
,
act
=
'relu'
,
name
=
'conv1_3'
))
self
.
conv1
=
nn
.
Sequential
(
*
conv
)
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
)
block_list
=
[]
in_ch
=
64
for
block_index
in
range
(
len
(
depth
)):
for
i
in
range
(
depth
[
block_index
]):
if
layers
>=
50
:
if
layers
in
[
101
,
152
,
200
]
and
block_index
==
2
:
if
i
==
0
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
"a"
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
"b"
+
str
(
i
)
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
chr
(
97
+
i
)
else
:
conv_name
=
"res"
+
str
(
block_index
+
2
)
+
chr
(
97
+
i
)
if
i
==
0
and
block_index
!=
0
:
stride
=
(
2
,
1
)
else
:
stride
=
(
1
,
1
)
block_list
.
append
(
block_class
(
in_channels
=
in_ch
,
out_channels
=
num_filters
[
block_index
],
stride
=
stride
,
if_first
=
block_index
==
i
==
0
,
name
=
conv_name
))
in_ch
=
block_list
[
-
1
].
out_channels
self
.
block_list
=
nn
.
Sequential
(
*
block_list
)
self
.
add_sublayer
(
sublayer
=
self
.
block_list
,
name
=
"block_list"
)
self
.
pool_out
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
in_ch
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
pool
(
x
)
x
=
self
.
block_list
(
x
)
x
=
self
.
pool_out
(
x
)
return
x
class
ConvBNLayer
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ConvBNLayerNew
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
act
=
None
,
name
=
None
):
super
(
ConvBNLayerNew
,
self
).
__init__
()
self
.
pool
=
nn
.
AvgPool2d
(
kernel_size
=
stride
,
stride
=
stride
,
padding
=
0
,
ceil_mode
=
True
)
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
1
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
weight_attr
=
ParamAttr
(
name
=
name
+
"_weights"
),
bias_attr
=
False
)
if
name
==
"conv1"
:
bn_name
=
"bn_"
+
name
else
:
bn_name
=
"bn"
+
name
[
3
:]
self
.
bn
=
nn
.
BatchNorm
(
num_channels
=
out_channels
,
act
=
act
,
param_attr
=
ParamAttr
(
name
=
bn_name
+
"_scale"
),
bias_attr
=
ParamAttr
(
name
=
bn_name
+
"_offset"
),
moving_mean_name
=
bn_name
+
"_mean"
,
moving_variance_name
=
bn_name
+
"_variance"
)
def
__call__
(
self
,
x
):
x
=
self
.
pool
(
x
)
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
ShortCut
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
=
False
):
super
(
ShortCut
,
self
).
__init__
()
self
.
use_conv
=
True
if
in_channels
!=
out_channels
or
stride
[
0
]
!=
1
:
if
if_first
:
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
self
.
conv
=
ConvBNLayerNew
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
elif
if_first
:
self
.
conv
=
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
name
=
name
)
else
:
self
.
use_conv
=
False
def
forward
(
self
,
x
):
if
self
.
use_conv
:
x
=
self
.
conv
(
x
)
return
x
class
BottleneckBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
):
super
(
BottleneckBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
act
=
'relu'
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
stride
=
stride
,
act
=
'relu'
,
name
=
name
+
"_branch2b"
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
*
4
,
kernel_size
=
1
,
act
=
None
,
name
=
name
+
"_branch2c"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
*
4
,
stride
=
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
*
4
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
self
.
conv2
(
y
)
y
=
y
+
self
.
short
(
x
)
y
=
F
.
relu
(
y
)
return
y
class
BasicBlock
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
name
,
if_first
):
super
(
BasicBlock
,
self
).
__init__
()
self
.
conv0
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
'relu'
,
stride
=
stride
,
name
=
name
+
"_branch2a"
)
self
.
conv1
=
ConvBNLayer
(
in_channels
=
out_channels
,
out_channels
=
out_channels
,
kernel_size
=
3
,
act
=
None
,
name
=
name
+
"_branch2b"
)
self
.
short
=
ShortCut
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
stride
=
stride
,
if_first
=
if_first
,
name
=
name
+
"_branch1"
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
):
y
=
self
.
conv0
(
x
)
y
=
self
.
conv1
(
y
)
y
=
y
+
self
.
short
(
x
)
return
F
.
relu
(
y
)
ppocr/modeling/heads/__init__.py
0 → 100755
View file @
4ffb5b62
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
'build_head'
]
def
build_head
(
config
):
# det head
from
.det_db_head
import
DBHead
# rec head
from
.rec_ctc_head
import
CTC
support_dict
=
[
'DBHead'
,
'CTC'
]
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'head only support {}'
.
format
(
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
ppocr/modeling/heads/det_db_head.py
0 → 100644
View file @
4ffb5b62
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
nn
import
paddle.nn.functional
as
F
from
paddle
import
ParamAttr
def
get_bias_attr
(
k
,
name
):
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
paddle
.
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
bias_attr
=
ParamAttr
(
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
bias_attr
class
Head
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
name_list
):
super
(
Head
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
weight_attr
=
ParamAttr
(
name
=
name_list
[
0
]
+
'.w_0'
),
bias_attr
=
False
)
self
.
conv_bn1
=
nn
.
BatchNorm
(
num_channels
=
in_channels
//
4
,
param_attr
=
ParamAttr
(
name
=
name_list
[
1
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
)),
bias_attr
=
ParamAttr
(
name
=
name_list
[
1
]
+
'.b_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1e-4
)),
moving_mean_name
=
name_list
[
1
]
+
'.w_1'
,
moving_variance_name
=
name_list
[
1
]
+
'.w_2'
,
act
=
'relu'
)
self
.
conv2
=
nn
.
ConvTranspose2d
(
in_channels
=
in_channels
//
4
,
out_channels
=
in_channels
//
4
,
kernel_size
=
2
,
stride
=
2
,
weight_attr
=
ParamAttr
(
name
=
name_list
[
2
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
MSRA
(
uniform
=
False
)),
bias_attr
=
get_bias_attr
(
in_channels
//
4
,
name_list
[
-
1
]
+
"conv2"
))
self
.
conv_bn2
=
nn
.
BatchNorm
(
num_channels
=
in_channels
//
4
,
param_attr
=
ParamAttr
(
name
=
name_list
[
3
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1.0
)),
bias_attr
=
ParamAttr
(
name
=
name_list
[
3
]
+
'.b_0'
,
initializer
=
paddle
.
nn
.
initializer
.
Constant
(
value
=
1e-4
)),
moving_mean_name
=
name_list
[
3
]
+
'.w_1'
,
moving_variance_name
=
name_list
[
3
]
+
'.w_2'
,
act
=
"relu"
)
self
.
conv3
=
nn
.
ConvTranspose2d
(
in_channels
=
in_channels
//
4
,
out_channels
=
1
,
kernel_size
=
2
,
stride
=
2
,
weight_attr
=
ParamAttr
(
name
=
name_list
[
4
]
+
'.w_0'
,
initializer
=
paddle
.
nn
.
initializer
.
MSRA
(
uniform
=
False
)),
bias_attr
=
get_bias_attr
(
in_channels
//
4
,
name_list
[
-
1
]
+
"conv3"
),
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
conv_bn1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
conv_bn2
(
x
)
x
=
self
.
conv3
(
x
)
x
=
F
.
sigmoid
(
x
)
return
x
class
DBHead
(
nn
.
Layer
):
"""
Differentiable Binarization (DB) for text detection:
see https://arxiv.org/abs/1911.08947
args:
params(dict): super parameters for build DB network
"""
def
__init__
(
self
,
in_channels
,
k
=
50
,
**
kwargs
):
super
(
DBHead
,
self
).
__init__
()
self
.
k
=
k
binarize_name_list
=
[
'conv2d_56'
,
'batch_norm_47'
,
'conv2d_transpose_0'
,
'batch_norm_48'
,
'conv2d_transpose_1'
,
'binarize'
]
thresh_name_list
=
[
'conv2d_57'
,
'batch_norm_49'
,
'conv2d_transpose_2'
,
'batch_norm_50'
,
'conv2d_transpose_3'
,
'thresh'
]
self
.
binarize
=
Head
(
in_channels
,
binarize_name_list
)
self
.
thresh
=
Head
(
in_channels
,
thresh_name_list
)
def
step_function
(
self
,
x
,
y
):
return
paddle
.
reciprocal
(
1
+
paddle
.
exp
(
-
self
.
k
*
(
x
-
y
)))
def
forward
(
self
,
x
):
shrink_maps
=
self
.
binarize
(
x
)
if
not
self
.
training
:
return
shrink_maps
threshold_maps
=
self
.
thresh
(
x
)
binary_maps
=
self
.
step_function
(
shrink_maps
,
threshold_maps
)
y
=
paddle
.
concat
([
shrink_maps
,
threshold_maps
,
binary_maps
],
axis
=
1
)
return
y
ppocr/modeling/heads/rec_ctc_head.py
0 → 100755
View file @
4ffb5b62
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
math
import
paddle
from
paddle
import
ParamAttr
,
nn
def
get_para_bias_attr
(
l2_decay
,
k
,
name
):
regularizer
=
paddle
.
fluid
.
regularizer
.
L2Decay
(
l2_decay
)
stdv
=
1.0
/
math
.
sqrt
(
k
*
1.0
)
initializer
=
nn
.
initializer
.
Uniform
(
-
stdv
,
stdv
)
weight_attr
=
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_w_attr"
)
bias_attr
=
ParamAttr
(
regularizer
=
regularizer
,
initializer
=
initializer
,
name
=
name
+
"_b_attr"
)
return
[
weight_attr
,
bias_attr
]
class
CTC
(
nn
.
Layer
):
def
__init__
(
self
,
in_channels
,
out_channels
,
fc_decay
=
1e-5
,
**
kwargs
):
super
(
CTC
,
self
).
__init__
()
weight_attr
,
bias_attr
=
get_para_bias_attr
(
l2_decay
=
fc_decay
,
k
=
in_channels
,
name
=
'ctc_fc'
)
self
.
fc
=
nn
.
Linear
(
in_channels
,
out_channels
,
weight_attr
=
weight_attr
,
bias_attr
=
bias_attr
,
name
=
'ctc_fc'
)
self
.
out_channels
=
out_channels
def
forward
(
self
,
x
,
labels
=
None
):
predicts
=
self
.
fc
(
x
)
return
predicts
ppocr/modeling/losses/__init__.py
0 → 100755
View file @
4ffb5b62
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
copy
def
build_loss
(
config
):
# det loss
from
.det_db_loss
import
DBLoss
# rec loss
from
.rec_ctc_loss
import
CTCLoss
support_dict
=
[
'DBLoss'
,
'CTCLoss'
]
config
=
copy
.
deepcopy
(
config
)
module_name
=
config
.
pop
(
'name'
)
assert
module_name
in
support_dict
,
Exception
(
'loss only support {}'
.
format
(
support_dict
))
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
Prev
1
…
15
16
17
18
19
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment