Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
paddle_dbnet
Commits
8485edfc
Commit
8485edfc
authored
Nov 08, 2021
by
tink2123
Browse files
Merge branch 'dygraph' of
https://github.com/PaddlePaddle/PaddleOCR
into paddle2onnx
parents
48a6ebad
0b37c118
Changes
81
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
220 additions
and
81 deletions
+220
-81
doc/joinus.PNG
doc/joinus.PNG
+0
-0
ppocr/data/imaug/east_process.py
ppocr/data/imaug/east_process.py
+10
-13
ppocr/data/imaug/iaa_augment.py
ppocr/data/imaug/iaa_augment.py
+4
-0
ppocr/data/imaug/make_border_map.py
ppocr/data/imaug/make_border_map.py
+17
-1
ppocr/data/imaug/make_pse_gt.py
ppocr/data/imaug/make_pse_gt.py
+35
-14
ppocr/data/imaug/make_shrink_map.py
ppocr/data/imaug/make_shrink_map.py
+17
-1
ppocr/data/imaug/random_crop_data.py
ppocr/data/imaug/random_crop_data.py
+17
-1
ppocr/data/imaug/sast_process.py
ppocr/data/imaug/sast_process.py
+4
-1
ppocr/data/imaug/text_image_aug/augment.py
ppocr/data/imaug/text_image_aug/augment.py
+4
-0
ppocr/data/imaug/text_image_aug/warp_mls.py
ppocr/data/imaug/text_image_aug/warp_mls.py
+5
-1
ppocr/losses/ace_loss.py
ppocr/losses/ace_loss.py
+4
-1
ppocr/losses/center_loss.py
ppocr/losses/center_loss.py
+3
-0
ppocr/losses/det_basic_loss.py
ppocr/losses/det_basic_loss.py
+5
-2
ppocr/losses/det_db_loss.py
ppocr/losses/det_db_loss.py
+4
-0
ppocr/losses/det_pse_loss.py
ppocr/losses/det_pse_loss.py
+4
-0
ppocr/losses/rec_nrtr_loss.py
ppocr/losses/rec_nrtr_loss.py
+1
-1
ppocr/modeling/backbones/rec_mv1_enhance.py
ppocr/modeling/backbones/rec_mv1_enhance.py
+2
-0
ppocr/modeling/backbones/rec_resnet_31.py
ppocr/modeling/backbones/rec_resnet_31.py
+69
-35
ppocr/modeling/backbones/rec_resnet_aster.py
ppocr/modeling/backbones/rec_resnet_aster.py
+4
-1
ppocr/modeling/heads/det_pse_head.py
ppocr/modeling/heads/det_pse_head.py
+11
-9
No files found.
doc/joinus.PNG
View replaced file @
48a6ebad
View file @
8485edfc
210 KB
|
W:
|
H:
184 KB
|
W:
|
H:
2-up
Swipe
Onion skin
ppocr/data/imaug/east_process.py
View file @
8485edfc
...
@@ -11,7 +11,10 @@
...
@@ -11,7 +11,10 @@
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#See the License for the specific language governing permissions and
#limitations under the License.
#limitations under the License.
"""
This code is refered from:
https://github.com/songdejia/EAST/blob/master/data_utils.py
"""
import
math
import
math
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
...
@@ -24,10 +27,10 @@ __all__ = ['EASTProcessTrain']
...
@@ -24,10 +27,10 @@ __all__ = ['EASTProcessTrain']
class
EASTProcessTrain
(
object
):
class
EASTProcessTrain
(
object
):
def
__init__
(
self
,
def
__init__
(
self
,
image_shape
=
[
512
,
512
],
image_shape
=
[
512
,
512
],
background_ratio
=
0.125
,
background_ratio
=
0.125
,
min_crop_side_ratio
=
0.1
,
min_crop_side_ratio
=
0.1
,
min_text_size
=
10
,
min_text_size
=
10
,
**
kwargs
):
**
kwargs
):
self
.
input_size
=
image_shape
[
1
]
self
.
input_size
=
image_shape
[
1
]
self
.
random_scale
=
np
.
array
([
0.5
,
1
,
2.0
,
3.0
])
self
.
random_scale
=
np
.
array
([
0.5
,
1
,
2.0
,
3.0
])
...
@@ -282,12 +285,7 @@ class EASTProcessTrain(object):
...
@@ -282,12 +285,7 @@ class EASTProcessTrain(object):
1.0
/
max
(
min
(
poly_h
,
poly_w
),
1.0
)
1.0
/
max
(
min
(
poly_h
,
poly_w
),
1.0
)
return
score_map
,
geo_map
,
training_mask
return
score_map
,
geo_map
,
training_mask
def
crop_area
(
self
,
def
crop_area
(
self
,
im
,
polys
,
tags
,
crop_background
=
False
,
max_tries
=
50
):
im
,
polys
,
tags
,
crop_background
=
False
,
max_tries
=
50
):
"""
"""
make random crop from the input image
make random crop from the input image
:param im:
:param im:
...
@@ -435,5 +433,4 @@ class EASTProcessTrain(object):
...
@@ -435,5 +433,4 @@ class EASTProcessTrain(object):
data
[
'score_map'
]
=
score_map
data
[
'score_map'
]
=
score_map
data
[
'geo_map'
]
=
geo_map
data
[
'geo_map'
]
=
geo_map
data
[
'training_mask'
]
=
training_mask
data
[
'training_mask'
]
=
training_mask
# print(im.shape, score_map.shape, geo_map.shape, training_mask.shape)
return
data
return
data
\ No newline at end of file
ppocr/data/imaug/iaa_augment.py
View file @
8485edfc
...
@@ -11,6 +11,10 @@
...
@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
...
...
ppocr/data/imaug/make_border_map.py
View file @
8485edfc
# -*- coding:utf-8 -*-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
ppocr/data/imaug/make_pse_gt.py
View file @
8485edfc
# -*- coding:utf-8 -*-
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
@@ -12,12 +24,8 @@ from shapely.geometry import Polygon
...
@@ -12,12 +24,8 @@ from shapely.geometry import Polygon
__all__
=
[
'MakePseGt'
]
__all__
=
[
'MakePseGt'
]
class
MakePseGt
(
object
):
r
'''
Making binary mask from detection data with ICDAR format.
Typically following the process of class `MakeICDARData`.
'''
class
MakePseGt
(
object
):
def
__init__
(
self
,
kernel_num
=
7
,
size
=
640
,
min_shrink_ratio
=
0.4
,
**
kwargs
):
def
__init__
(
self
,
kernel_num
=
7
,
size
=
640
,
min_shrink_ratio
=
0.4
,
**
kwargs
):
self
.
kernel_num
=
kernel_num
self
.
kernel_num
=
kernel_num
self
.
min_shrink_ratio
=
min_shrink_ratio
self
.
min_shrink_ratio
=
min_shrink_ratio
...
@@ -38,16 +46,20 @@ class MakePseGt(object):
...
@@ -38,16 +46,20 @@ class MakePseGt(object):
text_polys
*=
scale
text_polys
*=
scale
gt_kernels
=
[]
gt_kernels
=
[]
for
i
in
range
(
1
,
self
.
kernel_num
+
1
):
for
i
in
range
(
1
,
self
.
kernel_num
+
1
):
# s1->sn, from big to small
# s1->sn, from big to small
rate
=
1.0
-
(
1.0
-
self
.
min_shrink_ratio
)
/
(
self
.
kernel_num
-
1
)
*
i
rate
=
1.0
-
(
1.0
-
self
.
min_shrink_ratio
)
/
(
self
.
kernel_num
-
1
text_kernel
,
ignore_tags
=
self
.
generate_kernel
(
image
.
shape
[
0
:
2
],
rate
,
text_polys
,
ignore_tags
)
)
*
i
text_kernel
,
ignore_tags
=
self
.
generate_kernel
(
image
.
shape
[
0
:
2
],
rate
,
text_polys
,
ignore_tags
)
gt_kernels
.
append
(
text_kernel
)
gt_kernels
.
append
(
text_kernel
)
training_mask
=
np
.
ones
(
image
.
shape
[
0
:
2
],
dtype
=
'uint8'
)
training_mask
=
np
.
ones
(
image
.
shape
[
0
:
2
],
dtype
=
'uint8'
)
for
i
in
range
(
text_polys
.
shape
[
0
]):
for
i
in
range
(
text_polys
.
shape
[
0
]):
if
ignore_tags
[
i
]:
if
ignore_tags
[
i
]:
cv2
.
fillPoly
(
training_mask
,
text_polys
[
i
].
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:],
0
)
cv2
.
fillPoly
(
training_mask
,
text_polys
[
i
].
astype
(
np
.
int32
)[
np
.
newaxis
,
:,
:],
0
)
gt_kernels
=
np
.
array
(
gt_kernels
)
gt_kernels
=
np
.
array
(
gt_kernels
)
gt_kernels
[
gt_kernels
>
0
]
=
1
gt_kernels
[
gt_kernels
>
0
]
=
1
...
@@ -59,16 +71,25 @@ class MakePseGt(object):
...
@@ -59,16 +71,25 @@ class MakePseGt(object):
data
[
'mask'
]
=
training_mask
.
astype
(
'float32'
)
data
[
'mask'
]
=
training_mask
.
astype
(
'float32'
)
return
data
return
data
def
generate_kernel
(
self
,
img_size
,
shrink_ratio
,
text_polys
,
ignore_tags
=
None
):
def
generate_kernel
(
self
,
img_size
,
shrink_ratio
,
text_polys
,
ignore_tags
=
None
):
"""
Refer to part of the code:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py
"""
h
,
w
=
img_size
h
,
w
=
img_size
text_kernel
=
np
.
zeros
((
h
,
w
),
dtype
=
np
.
float32
)
text_kernel
=
np
.
zeros
((
h
,
w
),
dtype
=
np
.
float32
)
for
i
,
poly
in
enumerate
(
text_polys
):
for
i
,
poly
in
enumerate
(
text_polys
):
polygon
=
Polygon
(
poly
)
polygon
=
Polygon
(
poly
)
distance
=
polygon
.
area
*
(
1
-
shrink_ratio
*
shrink_ratio
)
/
(
polygon
.
length
+
1e-6
)
distance
=
polygon
.
area
*
(
1
-
shrink_ratio
*
shrink_ratio
)
/
(
polygon
.
length
+
1e-6
)
subject
=
[
tuple
(
l
)
for
l
in
poly
]
subject
=
[
tuple
(
l
)
for
l
in
poly
]
pco
=
pyclipper
.
PyclipperOffset
()
pco
=
pyclipper
.
PyclipperOffset
()
pco
.
AddPath
(
subject
,
pyclipper
.
JT_ROUND
,
pco
.
AddPath
(
subject
,
pyclipper
.
JT_ROUND
,
pyclipper
.
ET_CLOSEDPOLYGON
)
pyclipper
.
ET_CLOSEDPOLYGON
)
shrinked
=
np
.
array
(
pco
.
Execute
(
-
distance
))
shrinked
=
np
.
array
(
pco
.
Execute
(
-
distance
))
if
len
(
shrinked
)
==
0
or
shrinked
.
size
==
0
:
if
len
(
shrinked
)
==
0
or
shrinked
.
size
==
0
:
...
...
ppocr/data/imaug/make_shrink_map.py
View file @
8485edfc
# -*- coding:utf-8 -*-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_shrink_map.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
ppocr/data/imaug/random_crop_data.py
View file @
8485edfc
# -*- coding:utf-8 -*-
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
ppocr/data/imaug/sast_process.py
View file @
8485edfc
...
@@ -11,7 +11,10 @@
...
@@ -11,7 +11,10 @@
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#See the License for the specific language governing permissions and
#limitations under the License.
#limitations under the License.
"""
This part code is refered from:
https://github.com/songdejia/EAST/blob/master/data_utils.py
"""
import
math
import
math
import
cv2
import
cv2
import
numpy
as
np
import
numpy
as
np
...
...
ppocr/data/imaug/text_image_aug/augment.py
View file @
8485edfc
...
@@ -11,6 +11,10 @@
...
@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py
"""
import
numpy
as
np
import
numpy
as
np
from
.warp_mls
import
WarpMLS
from
.warp_mls
import
WarpMLS
...
...
ppocr/data/imaug/text_image_aug/warp_mls.py
View file @
8485edfc
...
@@ -11,6 +11,10 @@
...
@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/warp_mls.py
"""
import
numpy
as
np
import
numpy
as
np
...
@@ -161,4 +165,4 @@ class WarpMLS:
...
@@ -161,4 +165,4 @@ class WarpMLS:
dst
=
np
.
clip
(
dst
,
0
,
255
)
dst
=
np
.
clip
(
dst
,
0
,
255
)
dst
=
np
.
array
(
dst
,
dtype
=
np
.
uint8
)
dst
=
np
.
array
(
dst
,
dtype
=
np
.
uint8
)
return
dst
return
dst
\ No newline at end of file
ppocr/losses/ace_loss.py
View file @
8485edfc
...
@@ -11,6 +11,9 @@
...
@@ -11,6 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# This code is refer from: https://github.com/viig99/LS-ACELoss
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -32,7 +35,7 @@ class ACELoss(nn.Layer):
...
@@ -32,7 +35,7 @@ class ACELoss(nn.Layer):
def
__call__
(
self
,
predicts
,
batch
):
def
__call__
(
self
,
predicts
,
batch
):
if
isinstance
(
predicts
,
(
list
,
tuple
)):
if
isinstance
(
predicts
,
(
list
,
tuple
)):
predicts
=
predicts
[
-
1
]
predicts
=
predicts
[
-
1
]
B
,
N
=
predicts
.
shape
[:
2
]
B
,
N
=
predicts
.
shape
[:
2
]
div
=
paddle
.
to_tensor
([
N
]).
astype
(
'float32'
)
div
=
paddle
.
to_tensor
([
N
]).
astype
(
'float32'
)
...
...
ppocr/losses/center_loss.py
View file @
8485edfc
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
#See the License for the specific language governing permissions and
#See the License for the specific language governing permissions and
#limitations under the License.
#limitations under the License.
# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -27,6 +29,7 @@ class CenterLoss(nn.Layer):
...
@@ -27,6 +29,7 @@ class CenterLoss(nn.Layer):
"""
"""
Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
num_classes
=
6625
,
num_classes
=
6625
,
feat_dim
=
96
,
feat_dim
=
96
,
...
...
ppocr/losses/det_basic_loss.py
View file @
8485edfc
...
@@ -11,7 +11,10 @@
...
@@ -11,7 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -147,4 +150,4 @@ class BCELoss(nn.Layer):
...
@@ -147,4 +150,4 @@ class BCELoss(nn.Layer):
def
forward
(
self
,
input
,
label
,
mask
=
None
,
weight
=
None
,
name
=
None
):
def
forward
(
self
,
input
,
label
,
mask
=
None
,
weight
=
None
,
name
=
None
):
loss
=
F
.
binary_cross_entropy
(
input
,
label
,
reduction
=
self
.
reduction
)
loss
=
F
.
binary_cross_entropy
(
input
,
label
,
reduction
=
self
.
reduction
)
return
loss
return
loss
\ No newline at end of file
ppocr/losses/det_db_loss.py
View file @
8485edfc
...
@@ -11,6 +11,10 @@
...
@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
...
...
ppocr/losses/det_pse_loss.py
View file @
8485edfc
...
@@ -11,6 +11,10 @@
...
@@ -11,6 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
"""
import
paddle
import
paddle
from
paddle
import
nn
from
paddle
import
nn
...
...
ppocr/losses/rec_nrtr_loss.py
View file @
8485edfc
...
@@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer):
...
@@ -22,7 +22,7 @@ class NRTRLoss(nn.Layer):
log_prb
=
F
.
log_softmax
(
pred
,
axis
=
1
)
log_prb
=
F
.
log_softmax
(
pred
,
axis
=
1
)
non_pad_mask
=
paddle
.
not_equal
(
non_pad_mask
=
paddle
.
not_equal
(
tgt
,
paddle
.
zeros
(
tgt
,
paddle
.
zeros
(
tgt
.
shape
,
dtype
=
'int64'
))
tgt
.
shape
,
dtype
=
tgt
.
dtype
))
loss
=
-
(
one_hot
*
log_prb
).
sum
(
axis
=
1
)
loss
=
-
(
one_hot
*
log_prb
).
sum
(
axis
=
1
)
loss
=
loss
.
masked_select
(
non_pad_mask
).
mean
()
loss
=
loss
.
masked_select
(
non_pad_mask
).
mean
()
else
:
else
:
...
...
ppocr/modeling/backbones/rec_mv1_enhance.py
View file @
8485edfc
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
...
...
ppocr/modeling/backbones/rec_resnet_31.py
View file @
8485edfc
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
"""
from
__future__
import
absolute_import
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
print_function
...
@@ -18,12 +37,12 @@ def conv3x3(in_channel, out_channel, stride=1):
...
@@ -18,12 +37,12 @@ def conv3x3(in_channel, out_channel, stride=1):
kernel_size
=
3
,
kernel_size
=
3
,
stride
=
stride
,
stride
=
stride
,
padding
=
1
,
padding
=
1
,
bias_attr
=
False
bias_attr
=
False
)
)
class
BasicBlock
(
nn
.
Layer
):
class
BasicBlock
(
nn
.
Layer
):
expansion
=
1
expansion
=
1
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
downsample
=
False
):
def
__init__
(
self
,
in_channels
,
channels
,
stride
=
1
,
downsample
=
False
):
super
().
__init__
()
super
().
__init__
()
self
.
conv1
=
conv3x3
(
in_channels
,
channels
,
stride
)
self
.
conv1
=
conv3x3
(
in_channels
,
channels
,
stride
)
...
@@ -34,9 +53,13 @@ class BasicBlock(nn.Layer):
...
@@ -34,9 +53,13 @@ class BasicBlock(nn.Layer):
self
.
downsample
=
downsample
self
.
downsample
=
downsample
if
downsample
:
if
downsample
:
self
.
downsample
=
nn
.
Sequential
(
self
.
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
in_channels
,
channels
*
self
.
expansion
,
1
,
stride
,
bias_attr
=
False
),
nn
.
Conv2D
(
nn
.
BatchNorm2D
(
channels
*
self
.
expansion
),
in_channels
,
)
channels
*
self
.
expansion
,
1
,
stride
,
bias_attr
=
False
),
nn
.
BatchNorm2D
(
channels
*
self
.
expansion
),
)
else
:
else
:
self
.
downsample
=
nn
.
Sequential
()
self
.
downsample
=
nn
.
Sequential
()
self
.
stride
=
stride
self
.
stride
=
stride
...
@@ -57,7 +80,7 @@ class BasicBlock(nn.Layer):
...
@@ -57,7 +80,7 @@ class BasicBlock(nn.Layer):
out
+=
residual
out
+=
residual
out
=
self
.
relu
(
out
)
out
=
self
.
relu
(
out
)
return
out
return
out
class
ResNet31
(
nn
.
Layer
):
class
ResNet31
(
nn
.
Layer
):
...
@@ -69,12 +92,13 @@ class ResNet31(nn.Layer):
...
@@ -69,12 +92,13 @@ class ResNet31(nn.Layer):
out_indices (None | Sequence[int]): Indices of output stages.
out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
'''
'''
def
__init__
(
self
,
in_channels
=
3
,
def
__init__
(
self
,
layers
=
[
1
,
2
,
5
,
3
],
in_channels
=
3
,
channels
=
[
64
,
128
,
256
,
256
,
512
,
512
,
512
],
layers
=
[
1
,
2
,
5
,
3
],
out_indices
=
None
,
channels
=
[
64
,
128
,
256
,
256
,
512
,
512
,
512
],
last_stage_pool
=
False
):
out_indices
=
None
,
last_stage_pool
=
False
):
super
(
ResNet31
,
self
).
__init__
()
super
(
ResNet31
,
self
).
__init__
()
assert
isinstance
(
in_channels
,
int
)
assert
isinstance
(
in_channels
,
int
)
assert
isinstance
(
last_stage_pool
,
bool
)
assert
isinstance
(
last_stage_pool
,
bool
)
...
@@ -83,46 +107,56 @@ class ResNet31(nn.Layer):
...
@@ -83,46 +107,56 @@ class ResNet31(nn.Layer):
self
.
last_stage_pool
=
last_stage_pool
self
.
last_stage_pool
=
last_stage_pool
# conv 1 (Conv Conv)
# conv 1 (Conv Conv)
self
.
conv1_1
=
nn
.
Conv2D
(
in_channels
,
channels
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv1_1
=
nn
.
Conv2D
(
in_channels
,
channels
[
0
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_1
=
nn
.
BatchNorm2D
(
channels
[
0
])
self
.
bn1_1
=
nn
.
BatchNorm2D
(
channels
[
0
])
self
.
relu1_1
=
nn
.
ReLU
()
self
.
relu1_1
=
nn
.
ReLU
()
self
.
conv1_2
=
nn
.
Conv2D
(
channels
[
0
],
channels
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv1_2
=
nn
.
Conv2D
(
channels
[
0
],
channels
[
1
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1_2
=
nn
.
BatchNorm2D
(
channels
[
1
])
self
.
bn1_2
=
nn
.
BatchNorm2D
(
channels
[
1
])
self
.
relu1_2
=
nn
.
ReLU
()
self
.
relu1_2
=
nn
.
ReLU
()
# conv 2 (Max-pooling, Residual block, Conv)
# conv 2 (Max-pooling, Residual block, Conv)
self
.
pool2
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
pool2
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block2
=
self
.
_make_layer
(
channels
[
1
],
channels
[
2
],
layers
[
0
])
self
.
block2
=
self
.
_make_layer
(
channels
[
1
],
channels
[
2
],
layers
[
0
])
self
.
conv2
=
nn
.
Conv2D
(
channels
[
2
],
channels
[
2
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv2
=
nn
.
Conv2D
(
channels
[
2
],
channels
[
2
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
[
2
])
self
.
bn2
=
nn
.
BatchNorm2D
(
channels
[
2
])
self
.
relu2
=
nn
.
ReLU
()
self
.
relu2
=
nn
.
ReLU
()
# conv 3 (Max-pooling, Residual block, Conv)
# conv 3 (Max-pooling, Residual block, Conv)
self
.
pool3
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
pool3
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block3
=
self
.
_make_layer
(
channels
[
2
],
channels
[
3
],
layers
[
1
])
self
.
block3
=
self
.
_make_layer
(
channels
[
2
],
channels
[
3
],
layers
[
1
])
self
.
conv3
=
nn
.
Conv2D
(
channels
[
3
],
channels
[
3
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv3
=
nn
.
Conv2D
(
channels
[
3
],
channels
[
3
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn3
=
nn
.
BatchNorm2D
(
channels
[
3
])
self
.
bn3
=
nn
.
BatchNorm2D
(
channels
[
3
])
self
.
relu3
=
nn
.
ReLU
()
self
.
relu3
=
nn
.
ReLU
()
# conv 4 (Max-pooling, Residual block, Conv)
# conv 4 (Max-pooling, Residual block, Conv)
self
.
pool4
=
nn
.
MaxPool2D
(
kernel_size
=
(
2
,
1
),
stride
=
(
2
,
1
),
padding
=
0
,
ceil_mode
=
True
)
self
.
pool4
=
nn
.
MaxPool2D
(
kernel_size
=
(
2
,
1
),
stride
=
(
2
,
1
),
padding
=
0
,
ceil_mode
=
True
)
self
.
block4
=
self
.
_make_layer
(
channels
[
3
],
channels
[
4
],
layers
[
2
])
self
.
block4
=
self
.
_make_layer
(
channels
[
3
],
channels
[
4
],
layers
[
2
])
self
.
conv4
=
nn
.
Conv2D
(
channels
[
4
],
channels
[
4
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv4
=
nn
.
Conv2D
(
channels
[
4
],
channels
[
4
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn4
=
nn
.
BatchNorm2D
(
channels
[
4
])
self
.
bn4
=
nn
.
BatchNorm2D
(
channels
[
4
])
self
.
relu4
=
nn
.
ReLU
()
self
.
relu4
=
nn
.
ReLU
()
# conv 5 ((Max-pooling), Residual block, Conv)
# conv 5 ((Max-pooling), Residual block, Conv)
self
.
pool5
=
None
self
.
pool5
=
None
if
self
.
last_stage_pool
:
if
self
.
last_stage_pool
:
self
.
pool5
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
pool5
=
nn
.
MaxPool2D
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
,
ceil_mode
=
True
)
self
.
block5
=
self
.
_make_layer
(
channels
[
4
],
channels
[
5
],
layers
[
3
])
self
.
block5
=
self
.
_make_layer
(
channels
[
4
],
channels
[
5
],
layers
[
3
])
self
.
conv5
=
nn
.
Conv2D
(
channels
[
5
],
channels
[
5
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv5
=
nn
.
Conv2D
(
channels
[
5
],
channels
[
5
],
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn5
=
nn
.
BatchNorm2D
(
channels
[
5
])
self
.
bn5
=
nn
.
BatchNorm2D
(
channels
[
5
])
self
.
relu5
=
nn
.
ReLU
()
self
.
relu5
=
nn
.
ReLU
()
self
.
out_channels
=
channels
[
-
1
]
self
.
out_channels
=
channels
[
-
1
]
def
_make_layer
(
self
,
input_channels
,
output_channels
,
blocks
):
def
_make_layer
(
self
,
input_channels
,
output_channels
,
blocks
):
layers
=
[]
layers
=
[]
for
_
in
range
(
blocks
):
for
_
in
range
(
blocks
):
...
@@ -130,19 +164,19 @@ class ResNet31(nn.Layer):
...
@@ -130,19 +164,19 @@ class ResNet31(nn.Layer):
if
input_channels
!=
output_channels
:
if
input_channels
!=
output_channels
:
downsample
=
nn
.
Sequential
(
downsample
=
nn
.
Sequential
(
nn
.
Conv2D
(
nn
.
Conv2D
(
input_channels
,
input_channels
,
output_channels
,
output_channels
,
kernel_size
=
1
,
kernel_size
=
1
,
stride
=
1
,
stride
=
1
,
bias_attr
=
False
),
bias_attr
=
False
),
nn
.
BatchNorm2D
(
output_channels
),
nn
.
BatchNorm2D
(
output_channels
),
)
)
layers
.
append
(
layers
.
append
(
BasicBlock
(
input_channels
,
output_channels
,
downsample
=
downsample
))
BasicBlock
(
input_channels
,
output_channels
,
downsample
=
downsample
))
input_channels
=
output_channels
input_channels
=
output_channels
return
nn
.
Sequential
(
*
layers
)
return
nn
.
Sequential
(
*
layers
)
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
x
=
self
.
conv1_1
(
x
)
x
=
self
.
conv1_1
(
x
)
x
=
self
.
bn1_1
(
x
)
x
=
self
.
bn1_1
(
x
)
...
@@ -166,11 +200,11 @@ class ResNet31(nn.Layer):
...
@@ -166,11 +200,11 @@ class ResNet31(nn.Layer):
x
=
block_layer
(
x
)
x
=
block_layer
(
x
)
x
=
conv_layer
(
x
)
x
=
conv_layer
(
x
)
x
=
bn_layer
(
x
)
x
=
bn_layer
(
x
)
x
=
relu_layer
(
x
)
x
=
relu_layer
(
x
)
outs
.
append
(
x
)
outs
.
append
(
x
)
if
self
.
out_indices
is
not
None
:
if
self
.
out_indices
is
not
None
:
return
tuple
([
outs
[
i
]
for
i
in
self
.
out_indices
])
return
tuple
([
outs
[
i
]
for
i
in
self
.
out_indices
])
return
x
return
x
ppocr/modeling/backbones/rec_resnet_aster.py
View file @
8485edfc
...
@@ -11,7 +11,10 @@
...
@@ -11,7 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/resnet_aster.py
"""
import
paddle
import
paddle
import
paddle.nn
as
nn
import
paddle.nn
as
nn
...
...
ppocr/modeling/heads/det_pse_head.py
View file @
8485edfc
# copyright (c) 202
0
PaddlePaddle Authors. All Rights Reserve.
# copyright (c) 202
1
PaddlePaddle Authors. All Rights Reserve.
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# you may not use this file except in compliance with the License.
...
@@ -11,22 +11,24 @@
...
@@ -11,22 +11,24 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
This code is refer from:
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
"""
from
paddle
import
nn
from
paddle
import
nn
class
PSEHead
(
nn
.
Layer
):
class
PSEHead
(
nn
.
Layer
):
def
__init__
(
self
,
def
__init__
(
self
,
in_channels
,
hidden_dim
=
256
,
out_channels
=
7
,
**
kwargs
):
in_channels
,
hidden_dim
=
256
,
out_channels
=
7
,
**
kwargs
):
super
(
PSEHead
,
self
).
__init__
()
super
(
PSEHead
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2D
(
in_channels
,
hidden_dim
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
conv1
=
nn
.
Conv2D
(
in_channels
,
hidden_dim
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
)
self
.
bn1
=
nn
.
BatchNorm2D
(
hidden_dim
)
self
.
bn1
=
nn
.
BatchNorm2D
(
hidden_dim
)
self
.
relu1
=
nn
.
ReLU
()
self
.
relu1
=
nn
.
ReLU
()
self
.
conv2
=
nn
.
Conv2D
(
hidden_dim
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
)
self
.
conv2
=
nn
.
Conv2D
(
hidden_dim
,
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
)
def
forward
(
self
,
x
,
**
kwargs
):
def
forward
(
self
,
x
,
**
kwargs
):
out
=
self
.
conv1
(
x
)
out
=
self
.
conv1
(
x
)
...
...
Prev
1
2
3
4
5
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment