Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
f4ffdfe8
Unverified
Commit
f4ffdfe8
authored
Apr 02, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Apr 02, 2025
Browse files
Merge pull request #2062 from myhloli/dev
feat: support 3.10~3.12 & remove paddle
parents
ec566d22
cb3a4314
Changes
78
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
3589 additions
and
0 deletions
+3589
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
...s/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
+48
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py
.../ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py
+418
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py
...les/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py
+0
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
...ocr2pytorch/pytorchocr/modeling/architectures/__init__.py
+25
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
...r2pytorch/pytorchocr/modeling/architectures/base_model.py
+105
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
...ddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
+62
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
...pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
+269
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
...dleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
+290
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
...eocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
+516
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
...pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
+136
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
...2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
+234
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
...eocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
+638
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
...dules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
+76
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
...r/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
+43
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
...r/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
+23
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
...addleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
+109
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py
...ddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py
+54
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py
...leocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py
+58
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py
...r/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py
+29
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py
...ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py
+456
-0
No files found.
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
0 → 100755
View file @
f4ffdfe8
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
unicode_literals
# from .iaa_augment import IaaAugment
# from .make_border_map import MakeBorderMap
# from .make_shrink_map import MakeShrinkMap
# from .random_crop_data import EastRandomCropData, PSERandomCrop
# from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg
# from .randaugment import RandAugment
from
.operators
import
*
# from .label_ops import *
# from .east_process import *
# from .sast_process import *
# from .gen_table_mask import *
def
transform
(
data
,
ops
=
None
):
""" transform """
if
ops
is
None
:
ops
=
[]
for
op
in
ops
:
data
=
op
(
data
)
if
data
is
None
:
return
None
return
data
def
create_operators
(
op_param_list
,
global_config
=
None
):
"""
create operators based on the config
Args:
params(list): a dict list, used to create some operators
"""
assert
isinstance
(
op_param_list
,
list
),
(
'operator config should be a list'
)
ops
=
[]
for
operator
in
op_param_list
:
assert
isinstance
(
operator
,
dict
)
and
len
(
operator
)
==
1
,
"yaml format error"
op_name
=
list
(
operator
)[
0
]
param
=
{}
if
operator
[
op_name
]
is
None
else
operator
[
op_name
]
if
global_config
is
not
None
:
param
.
update
(
global_config
)
op
=
eval
(
op_name
)(
**
param
)
ops
.
append
(
op
)
return
ops
\ No newline at end of file
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py
0 → 100755
View file @
f4ffdfe8
"""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
from
__future__
import
unicode_literals
import
sys
import
six
import
cv2
import
numpy
as
np
class
DecodeImage
(
object
):
""" decode image """
def
__init__
(
self
,
img_mode
=
'RGB'
,
channel_first
=
False
,
**
kwargs
):
self
.
img_mode
=
img_mode
self
.
channel_first
=
channel_first
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
if
six
.
PY2
:
assert
type
(
img
)
is
str
and
len
(
img
)
>
0
,
"invalid input 'img' in DecodeImage"
else
:
assert
type
(
img
)
is
bytes
and
len
(
img
)
>
0
,
"invalid input 'img' in DecodeImage"
img
=
np
.
frombuffer
(
img
,
dtype
=
'uint8'
)
img
=
cv2
.
imdecode
(
img
,
1
)
if
img
is
None
:
return
None
if
self
.
img_mode
==
'GRAY'
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
elif
self
.
img_mode
==
'RGB'
:
assert
img
.
shape
[
2
]
==
3
,
'invalid shape of image[%s]'
%
(
img
.
shape
)
img
=
img
[:,
:,
::
-
1
]
if
self
.
channel_first
:
img
=
img
.
transpose
((
2
,
0
,
1
))
data
[
'image'
]
=
img
return
data
class
NRTRDecodeImage
(
object
):
""" decode image """
def
__init__
(
self
,
img_mode
=
'RGB'
,
channel_first
=
False
,
**
kwargs
):
self
.
img_mode
=
img_mode
self
.
channel_first
=
channel_first
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
if
six
.
PY2
:
assert
type
(
img
)
is
str
and
len
(
img
)
>
0
,
"invalid input 'img' in DecodeImage"
else
:
assert
type
(
img
)
is
bytes
and
len
(
img
)
>
0
,
"invalid input 'img' in DecodeImage"
img
=
np
.
frombuffer
(
img
,
dtype
=
'uint8'
)
img
=
cv2
.
imdecode
(
img
,
1
)
if
img
is
None
:
return
None
if
self
.
img_mode
==
'GRAY'
:
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_GRAY2BGR
)
elif
self
.
img_mode
==
'RGB'
:
assert
img
.
shape
[
2
]
==
3
,
'invalid shape of image[%s]'
%
(
img
.
shape
)
img
=
img
[:,
:,
::
-
1
]
img
=
cv2
.
cvtColor
(
img
,
cv2
.
COLOR_BGR2GRAY
)
if
self
.
channel_first
:
img
=
img
.
transpose
((
2
,
0
,
1
))
data
[
'image'
]
=
img
return
data
class
NormalizeImage
(
object
):
""" normalize image such as substract mean, divide std
"""
def
__init__
(
self
,
scale
=
None
,
mean
=
None
,
std
=
None
,
order
=
'chw'
,
**
kwargs
):
if
isinstance
(
scale
,
str
):
scale
=
eval
(
scale
)
self
.
scale
=
np
.
float32
(
scale
if
scale
is
not
None
else
1.0
/
255.0
)
mean
=
mean
if
mean
is
not
None
else
[
0.485
,
0.456
,
0.406
]
std
=
std
if
std
is
not
None
else
[
0.229
,
0.224
,
0.225
]
shape
=
(
3
,
1
,
1
)
if
order
==
'chw'
else
(
1
,
1
,
3
)
self
.
mean
=
np
.
array
(
mean
).
reshape
(
shape
).
astype
(
'float32'
)
self
.
std
=
np
.
array
(
std
).
reshape
(
shape
).
astype
(
'float32'
)
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
from
PIL
import
Image
if
isinstance
(
img
,
Image
.
Image
):
img
=
np
.
array
(
img
)
assert
isinstance
(
img
,
np
.
ndarray
),
"invalid input 'img' in NormalizeImage"
data
[
'image'
]
=
(
img
.
astype
(
'float32'
)
*
self
.
scale
-
self
.
mean
)
/
self
.
std
return
data
class
ToCHWImage
(
object
):
""" convert hwc image to chw image
"""
def
__init__
(
self
,
**
kwargs
):
pass
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
from
PIL
import
Image
if
isinstance
(
img
,
Image
.
Image
):
img
=
np
.
array
(
img
)
data
[
'image'
]
=
img
.
transpose
((
2
,
0
,
1
))
return
data
class
Fasttext
(
object
):
def
__init__
(
self
,
path
=
"None"
,
**
kwargs
):
import
fasttext
self
.
fast_model
=
fasttext
.
load_model
(
path
)
def
__call__
(
self
,
data
):
label
=
data
[
'label'
]
fast_label
=
self
.
fast_model
[
label
]
data
[
'fast_label'
]
=
fast_label
return
data
class
KeepKeys
(
object
):
def
__init__
(
self
,
keep_keys
,
**
kwargs
):
self
.
keep_keys
=
keep_keys
def
__call__
(
self
,
data
):
data_list
=
[]
for
key
in
self
.
keep_keys
:
data_list
.
append
(
data
[
key
])
return
data_list
class
Resize
(
object
):
def
__init__
(
self
,
size
=
(
640
,
640
),
**
kwargs
):
self
.
size
=
size
def
resize_image
(
self
,
img
):
resize_h
,
resize_w
=
self
.
size
ori_h
,
ori_w
=
img
.
shape
[:
2
]
# (h, w, c)
ratio_h
=
float
(
resize_h
)
/
ori_h
ratio_w
=
float
(
resize_w
)
/
ori_w
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
return
img
,
[
ratio_h
,
ratio_w
]
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
text_polys
=
data
[
'polys'
]
img_resize
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image
(
img
)
new_boxes
=
[]
for
box
in
text_polys
:
new_box
=
[]
for
cord
in
box
:
new_box
.
append
([
cord
[
0
]
*
ratio_w
,
cord
[
1
]
*
ratio_h
])
new_boxes
.
append
(
new_box
)
data
[
'image'
]
=
img_resize
data
[
'polys'
]
=
np
.
array
(
new_boxes
,
dtype
=
np
.
float32
)
return
data
class
DetResizeForTest
(
object
):
def
__init__
(
self
,
**
kwargs
):
super
(
DetResizeForTest
,
self
).
__init__
()
self
.
resize_type
=
0
if
'image_shape'
in
kwargs
:
self
.
image_shape
=
kwargs
[
'image_shape'
]
self
.
resize_type
=
1
elif
'limit_side_len'
in
kwargs
:
self
.
limit_side_len
=
kwargs
[
'limit_side_len'
]
self
.
limit_type
=
kwargs
.
get
(
'limit_type'
,
'min'
)
elif
'resize_long'
in
kwargs
:
self
.
resize_type
=
2
self
.
resize_long
=
kwargs
.
get
(
'resize_long'
,
960
)
else
:
self
.
limit_side_len
=
736
self
.
limit_type
=
'min'
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
src_h
,
src_w
,
_
=
img
.
shape
if
self
.
resize_type
==
0
:
# img, shape = self.resize_image_type0(img)
img
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_type0
(
img
)
elif
self
.
resize_type
==
2
:
img
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_type2
(
img
)
else
:
# img, shape = self.resize_image_type1(img)
img
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_type1
(
img
)
data
[
'image'
]
=
img
data
[
'shape'
]
=
np
.
array
([
src_h
,
src_w
,
ratio_h
,
ratio_w
])
return
data
def
resize_image_type1
(
self
,
img
):
resize_h
,
resize_w
=
self
.
image_shape
ori_h
,
ori_w
=
img
.
shape
[:
2
]
# (h, w, c)
ratio_h
=
float
(
resize_h
)
/
ori_h
ratio_w
=
float
(
resize_w
)
/
ori_w
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
# return img, np.array([ori_h, ori_w])
return
img
,
[
ratio_h
,
ratio_w
]
def
resize_image_type0
(
self
,
img
):
"""
resize image to a size multiple of 32 which is required by the network
args:
img(array): array with shape [h, w, c]
return(tuple):
img, (ratio_h, ratio_w)
"""
limit_side_len
=
self
.
limit_side_len
h
,
w
,
c
=
img
.
shape
# limit the max side
if
self
.
limit_type
==
'max'
:
if
max
(
h
,
w
)
>
limit_side_len
:
if
h
>
w
:
ratio
=
float
(
limit_side_len
)
/
h
else
:
ratio
=
float
(
limit_side_len
)
/
w
else
:
ratio
=
1.
elif
self
.
limit_type
==
'min'
:
if
min
(
h
,
w
)
<
limit_side_len
:
if
h
<
w
:
ratio
=
float
(
limit_side_len
)
/
h
else
:
ratio
=
float
(
limit_side_len
)
/
w
else
:
ratio
=
1.
elif
self
.
limit_type
==
'resize_long'
:
ratio
=
float
(
limit_side_len
)
/
max
(
h
,
w
)
else
:
raise
Exception
(
'not support limit type, image '
)
resize_h
=
int
(
h
*
ratio
)
resize_w
=
int
(
w
*
ratio
)
resize_h
=
max
(
int
(
round
(
resize_h
/
32
)
*
32
),
32
)
resize_w
=
max
(
int
(
round
(
resize_w
/
32
)
*
32
),
32
)
try
:
if
int
(
resize_w
)
<=
0
or
int
(
resize_h
)
<=
0
:
return
None
,
(
None
,
None
)
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
except
:
print
(
img
.
shape
,
resize_w
,
resize_h
)
sys
.
exit
(
0
)
ratio_h
=
resize_h
/
float
(
h
)
ratio_w
=
resize_w
/
float
(
w
)
return
img
,
[
ratio_h
,
ratio_w
]
def
resize_image_type2
(
self
,
img
):
h
,
w
,
_
=
img
.
shape
resize_w
=
w
resize_h
=
h
if
resize_h
>
resize_w
:
ratio
=
float
(
self
.
resize_long
)
/
resize_h
else
:
ratio
=
float
(
self
.
resize_long
)
/
resize_w
resize_h
=
int
(
resize_h
*
ratio
)
resize_w
=
int
(
resize_w
*
ratio
)
max_stride
=
128
resize_h
=
(
resize_h
+
max_stride
-
1
)
//
max_stride
*
max_stride
resize_w
=
(
resize_w
+
max_stride
-
1
)
//
max_stride
*
max_stride
img
=
cv2
.
resize
(
img
,
(
int
(
resize_w
),
int
(
resize_h
)))
ratio_h
=
resize_h
/
float
(
h
)
ratio_w
=
resize_w
/
float
(
w
)
return
img
,
[
ratio_h
,
ratio_w
]
class
E2EResizeForTest
(
object
):
def
__init__
(
self
,
**
kwargs
):
super
(
E2EResizeForTest
,
self
).
__init__
()
self
.
max_side_len
=
kwargs
[
'max_side_len'
]
self
.
valid_set
=
kwargs
[
'valid_set'
]
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
src_h
,
src_w
,
_
=
img
.
shape
if
self
.
valid_set
==
'totaltext'
:
im_resized
,
[
ratio_h
,
ratio_w
]
=
self
.
resize_image_for_totaltext
(
img
,
max_side_len
=
self
.
max_side_len
)
else
:
im_resized
,
(
ratio_h
,
ratio_w
)
=
self
.
resize_image
(
img
,
max_side_len
=
self
.
max_side_len
)
data
[
'image'
]
=
im_resized
data
[
'shape'
]
=
np
.
array
([
src_h
,
src_w
,
ratio_h
,
ratio_w
])
return
data
def
resize_image_for_totaltext
(
self
,
im
,
max_side_len
=
512
):
h
,
w
,
_
=
im
.
shape
resize_w
=
w
resize_h
=
h
ratio
=
1.25
if
h
*
ratio
>
max_side_len
:
ratio
=
float
(
max_side_len
)
/
resize_h
resize_h
=
int
(
resize_h
*
ratio
)
resize_w
=
int
(
resize_w
*
ratio
)
max_stride
=
128
resize_h
=
(
resize_h
+
max_stride
-
1
)
//
max_stride
*
max_stride
resize_w
=
(
resize_w
+
max_stride
-
1
)
//
max_stride
*
max_stride
im
=
cv2
.
resize
(
im
,
(
int
(
resize_w
),
int
(
resize_h
)))
ratio_h
=
resize_h
/
float
(
h
)
ratio_w
=
resize_w
/
float
(
w
)
return
im
,
(
ratio_h
,
ratio_w
)
def
resize_image
(
self
,
im
,
max_side_len
=
512
):
"""
resize image to a size multiple of max_stride which is required by the network
:param im: the resized image
:param max_side_len: limit of max image size to avoid out of memory in gpu
:return: the resized image and the resize ratio
"""
h
,
w
,
_
=
im
.
shape
resize_w
=
w
resize_h
=
h
# Fix the longer side
if
resize_h
>
resize_w
:
ratio
=
float
(
max_side_len
)
/
resize_h
else
:
ratio
=
float
(
max_side_len
)
/
resize_w
resize_h
=
int
(
resize_h
*
ratio
)
resize_w
=
int
(
resize_w
*
ratio
)
max_stride
=
128
resize_h
=
(
resize_h
+
max_stride
-
1
)
//
max_stride
*
max_stride
resize_w
=
(
resize_w
+
max_stride
-
1
)
//
max_stride
*
max_stride
im
=
cv2
.
resize
(
im
,
(
int
(
resize_w
),
int
(
resize_h
)))
ratio_h
=
resize_h
/
float
(
h
)
ratio_w
=
resize_w
/
float
(
w
)
return
im
,
(
ratio_h
,
ratio_w
)
class
KieResize
(
object
):
def
__init__
(
self
,
**
kwargs
):
super
(
KieResize
,
self
).
__init__
()
self
.
max_side
,
self
.
min_side
=
kwargs
[
'img_scale'
][
0
],
kwargs
[
'img_scale'
][
1
]
def
__call__
(
self
,
data
):
img
=
data
[
'image'
]
points
=
data
[
'points'
]
src_h
,
src_w
,
_
=
img
.
shape
im_resized
,
scale_factor
,
[
ratio_h
,
ratio_w
],
[
new_h
,
new_w
]
=
self
.
resize_image
(
img
)
resize_points
=
self
.
resize_boxes
(
img
,
points
,
scale_factor
)
data
[
'ori_image'
]
=
img
data
[
'ori_boxes'
]
=
points
data
[
'points'
]
=
resize_points
data
[
'image'
]
=
im_resized
data
[
'shape'
]
=
np
.
array
([
new_h
,
new_w
])
return
data
def
resize_image
(
self
,
img
):
norm_img
=
np
.
zeros
([
1024
,
1024
,
3
],
dtype
=
'float32'
)
scale
=
[
512
,
1024
]
h
,
w
=
img
.
shape
[:
2
]
max_long_edge
=
max
(
scale
)
max_short_edge
=
min
(
scale
)
scale_factor
=
min
(
max_long_edge
/
max
(
h
,
w
),
max_short_edge
/
min
(
h
,
w
))
resize_w
,
resize_h
=
int
(
w
*
float
(
scale_factor
)
+
0.5
),
int
(
h
*
float
(
scale_factor
)
+
0.5
)
max_stride
=
32
resize_h
=
(
resize_h
+
max_stride
-
1
)
//
max_stride
*
max_stride
resize_w
=
(
resize_w
+
max_stride
-
1
)
//
max_stride
*
max_stride
im
=
cv2
.
resize
(
img
,
(
resize_w
,
resize_h
))
new_h
,
new_w
=
im
.
shape
[:
2
]
w_scale
=
new_w
/
w
h_scale
=
new_h
/
h
scale_factor
=
np
.
array
(
[
w_scale
,
h_scale
,
w_scale
,
h_scale
],
dtype
=
np
.
float32
)
norm_img
[:
new_h
,
:
new_w
,
:]
=
im
return
norm_img
,
scale_factor
,
[
h_scale
,
w_scale
],
[
new_h
,
new_w
]
def
resize_boxes
(
self
,
im
,
points
,
scale_factor
):
points
=
points
*
scale_factor
img_shape
=
im
.
shape
[:
2
]
points
[:,
0
::
2
]
=
np
.
clip
(
points
[:,
0
::
2
],
0
,
img_shape
[
1
])
points
[:,
1
::
2
]
=
np
.
clip
(
points
[:,
1
::
2
],
0
,
img_shape
[
0
])
return
points
magic_pdf/model/sub_modules/
table/structeqtable
/__init__.py
→
magic_pdf/model/sub_modules/
ocr/paddleocr2pytorch/pytorchocr/modeling
/__init__.py
View file @
f4ffdfe8
File moved
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
0 → 100644
View file @
f4ffdfe8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
copy
__all__
=
[
"build_model"
]
def
build_model
(
config
,
**
kwargs
):
from
.base_model
import
BaseModel
config
=
copy
.
deepcopy
(
config
)
module_class
=
BaseModel
(
config
,
**
kwargs
)
return
module_class
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
0 → 100644
View file @
f4ffdfe8
from
torch
import
nn
from
..backbones
import
build_backbone
from
..heads
import
build_head
from
..necks
import
build_neck
class
BaseModel
(
nn
.
Module
):
def
__init__
(
self
,
config
,
**
kwargs
):
"""
the module for OCR.
args:
config (dict): the super parameters for module.
"""
super
(
BaseModel
,
self
).
__init__
()
in_channels
=
config
.
get
(
"in_channels"
,
3
)
model_type
=
config
[
"model_type"
]
# build backbone, backbone is need for del, rec and cls
if
"Backbone"
not
in
config
or
config
[
"Backbone"
]
is
None
:
self
.
use_backbone
=
False
else
:
self
.
use_backbone
=
True
config
[
"Backbone"
][
"in_channels"
]
=
in_channels
self
.
backbone
=
build_backbone
(
config
[
"Backbone"
],
model_type
)
in_channels
=
self
.
backbone
.
out_channels
# build neck
# for rec, neck can be cnn,rnn or reshape(None)
# for det, neck can be FPN, BIFPN and so on.
# for cls, neck should be none
if
"Neck"
not
in
config
or
config
[
"Neck"
]
is
None
:
self
.
use_neck
=
False
else
:
self
.
use_neck
=
True
config
[
"Neck"
][
"in_channels"
]
=
in_channels
self
.
neck
=
build_neck
(
config
[
"Neck"
])
in_channels
=
self
.
neck
.
out_channels
# # build head, head is need for det, rec and cls
if
"Head"
not
in
config
or
config
[
"Head"
]
is
None
:
self
.
use_head
=
False
else
:
self
.
use_head
=
True
config
[
"Head"
][
"in_channels"
]
=
in_channels
self
.
head
=
build_head
(
config
[
"Head"
],
**
kwargs
)
self
.
return_all_feats
=
config
.
get
(
"return_all_feats"
,
False
)
self
.
_initialize_weights
()
def
_initialize_weights
(
self
):
# weight initialization
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
"fan_out"
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
nn
.
init
.
ones_
(
m
.
weight
)
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
Linear
):
nn
.
init
.
normal_
(
m
.
weight
,
0
,
0.01
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
ConvTranspose2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
,
mode
=
"fan_out"
)
if
m
.
bias
is
not
None
:
nn
.
init
.
zeros_
(
m
.
bias
)
def
forward
(
self
,
x
):
y
=
dict
()
if
self
.
use_backbone
:
x
=
self
.
backbone
(
x
)
if
isinstance
(
x
,
dict
):
y
.
update
(
x
)
else
:
y
[
"backbone_out"
]
=
x
final_name
=
"backbone_out"
if
self
.
use_neck
:
x
=
self
.
neck
(
x
)
if
isinstance
(
x
,
dict
):
y
.
update
(
x
)
else
:
y
[
"neck_out"
]
=
x
final_name
=
"neck_out"
if
self
.
use_head
:
x
=
self
.
head
(
x
)
# for multi head, save ctc neck out for udml
if
isinstance
(
x
,
dict
)
and
"ctc_nect"
in
x
.
keys
():
y
[
"neck_out"
]
=
x
[
"ctc_neck"
]
y
[
"head_out"
]
=
x
elif
isinstance
(
x
,
dict
):
y
.
update
(
x
)
else
:
y
[
"head_out"
]
=
x
if
self
.
return_all_feats
:
if
self
.
training
:
return
y
elif
isinstance
(
x
,
dict
):
return
x
else
:
return
{
final_name
:
x
}
else
:
return
x
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
0 → 100644
View file @
f4ffdfe8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
"build_backbone"
]
def
build_backbone
(
config
,
model_type
):
if
model_type
==
"det"
:
from
.det_mobilenet_v3
import
MobileNetV3
from
.rec_hgnet
import
PPHGNet_small
from
.rec_lcnetv3
import
PPLCNetV3
support_dict
=
[
"MobileNetV3"
,
"ResNet"
,
"ResNet_vd"
,
"ResNet_SAST"
,
"PPLCNetV3"
,
"PPHGNet_small"
,
]
elif
model_type
==
"rec"
or
model_type
==
"cls"
:
from
.rec_hgnet
import
PPHGNet_small
from
.rec_lcnetv3
import
PPLCNetV3
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_svtrnet
import
SVTRNet
from
.rec_mv1_enhance
import
MobileNetV1Enhance
support_dict
=
[
"MobileNetV1Enhance"
,
"MobileNetV3"
,
"ResNet"
,
"ResNetFPN"
,
"MTB"
,
"ResNet31"
,
"SVTRNet"
,
"ViTSTR"
,
"DenseNet"
,
"PPLCNetV3"
,
"PPHGNet_small"
,
]
else
:
raise
NotImplementedError
module_name
=
config
.
pop
(
"name"
)
assert
module_name
in
support_dict
,
Exception
(
"when model typs is {}, backbone only support {}"
.
format
(
model_type
,
support_dict
)
)
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
0 → 100644
View file @
f4ffdfe8
from
torch
import
nn
from
..common
import
Activation
def
make_divisible
(
v
,
divisor
=
8
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
ConvBNLayer
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
,
groups
=
1
,
if_act
=
True
,
act
=
None
,
name
=
None
,
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
if_act
=
if_act
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
bias
=
False
,
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
,
)
if
self
.
if_act
:
self
.
act
=
Activation
(
act_type
=
act
,
inplace
=
True
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
if
self
.
if_act
:
x
=
self
.
act
(
x
)
return
x
class
SEModule
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
reduction
=
4
,
name
=
""
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
,
)
self
.
relu1
=
Activation
(
act_type
=
"relu"
,
inplace
=
True
)
self
.
conv2
=
nn
.
Conv2d
(
in_channels
=
in_channels
//
reduction
,
out_channels
=
in_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
,
)
self
.
hard_sigmoid
=
Activation
(
act_type
=
"hard_sigmoid"
,
inplace
=
True
)
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
self
.
relu1
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
self
.
hard_sigmoid
(
outputs
)
outputs
=
inputs
*
outputs
return
outputs
class
ResidualUnit
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
kernel_size
,
stride
,
use_se
,
act
=
None
,
name
=
""
,
):
super
(
ResidualUnit
,
self
).
__init__
()
self
.
if_shortcut
=
stride
==
1
and
in_channels
==
out_channels
self
.
if_se
=
use_se
self
.
expand_conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
mid_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_expand"
,
)
self
.
bottleneck_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
int
((
kernel_size
-
1
)
//
2
),
groups
=
mid_channels
,
if_act
=
True
,
act
=
act
,
name
=
name
+
"_depthwise"
,
)
if
self
.
if_se
:
self
.
mid_se
=
SEModule
(
mid_channels
,
name
=
name
+
"_se"
)
self
.
linear_conv
=
ConvBNLayer
(
in_channels
=
mid_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
if_act
=
False
,
act
=
None
,
name
=
name
+
"_linear"
,
)
def
forward
(
self
,
inputs
):
x
=
self
.
expand_conv
(
inputs
)
x
=
self
.
bottleneck_conv
(
x
)
if
self
.
if_se
:
x
=
self
.
mid_se
(
x
)
x
=
self
.
linear_conv
(
x
)
if
self
.
if_shortcut
:
x
=
inputs
+
x
return
x
class
MobileNetV3
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
"large"
,
scale
=
0.5
,
disable_se
=
False
,
**
kwargs
):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
super
(
MobileNetV3
,
self
).
__init__
()
self
.
disable_se
=
disable_se
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
"relu"
,
1
],
[
3
,
64
,
24
,
False
,
"relu"
,
2
],
[
3
,
72
,
24
,
False
,
"relu"
,
1
],
[
5
,
72
,
40
,
True
,
"relu"
,
2
],
[
5
,
120
,
40
,
True
,
"relu"
,
1
],
[
5
,
120
,
40
,
True
,
"relu"
,
1
],
[
3
,
240
,
80
,
False
,
"hard_swish"
,
2
],
[
3
,
200
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
184
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
184
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
480
,
112
,
True
,
"hard_swish"
,
1
],
[
3
,
672
,
112
,
True
,
"hard_swish"
,
1
],
[
5
,
672
,
160
,
True
,
"hard_swish"
,
2
],
[
5
,
960
,
160
,
True
,
"hard_swish"
,
1
],
[
5
,
960
,
160
,
True
,
"hard_swish"
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
"relu"
,
2
],
[
3
,
72
,
24
,
False
,
"relu"
,
2
],
[
3
,
88
,
24
,
False
,
"relu"
,
1
],
[
5
,
96
,
40
,
True
,
"hard_swish"
,
2
],
[
5
,
240
,
40
,
True
,
"hard_swish"
,
1
],
[
5
,
240
,
40
,
True
,
"hard_swish"
,
1
],
[
5
,
120
,
48
,
True
,
"hard_swish"
,
1
],
[
5
,
144
,
48
,
True
,
"hard_swish"
,
1
],
[
5
,
288
,
96
,
True
,
"hard_swish"
,
2
],
[
5
,
576
,
96
,
True
,
"hard_swish"
,
1
],
[
5
,
576
,
96
,
True
,
"hard_swish"
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
(
scale
in
supported_scale
),
"supported scale are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
"hard_swish"
,
name
=
"conv1"
,
)
self
.
stages
=
nn
.
ModuleList
()
self
.
out_channels
=
[]
block_list
=
[]
i
=
0
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
k
,
exp
,
c
,
se
,
nl
,
s
in
cfg
:
se
=
se
and
not
self
.
disable_se
if
s
==
2
and
i
>
2
:
self
.
out_channels
.
append
(
inplanes
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
block_list
=
[]
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
),
)
)
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
block_list
.
append
(
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
"hard_swish"
,
name
=
"conv_last"
,
)
)
self
.
stages
.
append
(
nn
.
Sequential
(
*
block_list
))
self
.
out_channels
.
append
(
make_divisible
(
scale
*
cls_ch_squeeze
))
# for i, stage in enumerate(self.stages):
# self.add_sublayer(sublayer=stage, name="stage{}".format(i))
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
out_list
=
[]
for
stage
in
self
.
stages
:
x
=
stage
(
x
)
out_list
.
append
(
x
)
return
out_list
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
0 → 100644
View file @
f4ffdfe8
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
class
ConvBNAct
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
use_act
=
True
):
super
().
__init__
()
self
.
use_act
=
use_act
self
.
conv
=
nn
.
Conv2d
(
in_channels
,
out_channels
,
kernel_size
,
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
bias
=
False
,
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
)
if
self
.
use_act
:
self
.
act
=
nn
.
ReLU
()
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
if
self
.
use_act
:
x
=
self
.
act
(
x
)
return
x
class
ESEModule
(
nn
.
Module
):
def
__init__
(
self
,
channels
):
super
().
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
channels
,
out_channels
=
channels
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
)
self
.
sigmoid
=
nn
.
Sigmoid
()
def
forward
(
self
,
x
):
identity
=
x
x
=
self
.
avg_pool
(
x
)
x
=
self
.
conv
(
x
)
x
=
self
.
sigmoid
(
x
)
return
x
*
identity
class
HG_Block
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
layer_num
,
identity
=
False
,
):
super
().
__init__
()
self
.
identity
=
identity
self
.
layers
=
nn
.
ModuleList
()
self
.
layers
.
append
(
ConvBNAct
(
in_channels
=
in_channels
,
out_channels
=
mid_channels
,
kernel_size
=
3
,
stride
=
1
,
)
)
for
_
in
range
(
layer_num
-
1
):
self
.
layers
.
append
(
ConvBNAct
(
in_channels
=
mid_channels
,
out_channels
=
mid_channels
,
kernel_size
=
3
,
stride
=
1
,
)
)
# feature aggregation
total_channels
=
in_channels
+
layer_num
*
mid_channels
self
.
aggregation_conv
=
ConvBNAct
(
in_channels
=
total_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
)
self
.
att
=
ESEModule
(
out_channels
)
def
forward
(
self
,
x
):
identity
=
x
output
=
[]
output
.
append
(
x
)
for
layer
in
self
.
layers
:
x
=
layer
(
x
)
output
.
append
(
x
)
x
=
torch
.
cat
(
output
,
dim
=
1
)
x
=
self
.
aggregation_conv
(
x
)
x
=
self
.
att
(
x
)
if
self
.
identity
:
x
+=
identity
return
x
class
HG_Stage
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
mid_channels
,
out_channels
,
block_num
,
layer_num
,
downsample
=
True
,
stride
=
[
2
,
1
],
):
super
().
__init__
()
self
.
downsample
=
downsample
if
downsample
:
self
.
downsample
=
ConvBNAct
(
in_channels
=
in_channels
,
out_channels
=
in_channels
,
kernel_size
=
3
,
stride
=
stride
,
groups
=
in_channels
,
use_act
=
False
,
)
blocks_list
=
[]
blocks_list
.
append
(
HG_Block
(
in_channels
,
mid_channels
,
out_channels
,
layer_num
,
identity
=
False
)
)
for
_
in
range
(
block_num
-
1
):
blocks_list
.
append
(
HG_Block
(
out_channels
,
mid_channels
,
out_channels
,
layer_num
,
identity
=
True
)
)
self
.
blocks
=
nn
.
Sequential
(
*
blocks_list
)
def
forward
(
self
,
x
):
if
self
.
downsample
:
x
=
self
.
downsample
(
x
)
x
=
self
.
blocks
(
x
)
return
x
class
PPHGNet
(
nn
.
Module
):
"""
PPHGNet
Args:
stem_channels: list. Stem channel list of PPHGNet.
stage_config: dict. The configuration of each stage of PPHGNet. such as the number of channels, stride, etc.
layer_num: int. Number of layers of HG_Block.
use_last_conv: boolean. Whether to use a 1x1 convolutional layer before the classification layer.
class_expand: int=2048. Number of channels for the last 1x1 convolutional layer.
dropout_prob: float. Parameters of dropout, 0.0 means dropout is not used.
class_num: int=1000. The number of classes.
Returns:
model: nn.Layer. Specific PPHGNet model depends on args.
"""
def
__init__
(
self
,
stem_channels
,
stage_config
,
layer_num
,
in_channels
=
3
,
det
=
False
,
out_indices
=
None
,
):
super
().
__init__
()
self
.
det
=
det
self
.
out_indices
=
out_indices
if
out_indices
is
not
None
else
[
0
,
1
,
2
,
3
]
# stem
stem_channels
.
insert
(
0
,
in_channels
)
self
.
stem
=
nn
.
Sequential
(
*
[
ConvBNAct
(
in_channels
=
stem_channels
[
i
],
out_channels
=
stem_channels
[
i
+
1
],
kernel_size
=
3
,
stride
=
2
if
i
==
0
else
1
,
)
for
i
in
range
(
len
(
stem_channels
)
-
1
)
]
)
if
self
.
det
:
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
3
,
stride
=
2
,
padding
=
1
)
# stages
self
.
stages
=
nn
.
ModuleList
()
self
.
out_channels
=
[]
for
block_id
,
k
in
enumerate
(
stage_config
):
(
in_channels
,
mid_channels
,
out_channels
,
block_num
,
downsample
,
stride
,
)
=
stage_config
[
k
]
self
.
stages
.
append
(
HG_Stage
(
in_channels
,
mid_channels
,
out_channels
,
block_num
,
layer_num
,
downsample
,
stride
,
)
)
if
block_id
in
self
.
out_indices
:
self
.
out_channels
.
append
(
out_channels
)
if
not
self
.
det
:
self
.
out_channels
=
stage_config
[
"stage4"
][
2
]
self
.
_init_weights
()
def
_init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
nn
.
init
.
kaiming_normal_
(
m
.
weight
)
elif
isinstance
(
m
,
nn
.
BatchNorm2d
):
nn
.
init
.
ones_
(
m
.
weight
)
nn
.
init
.
zeros_
(
m
.
bias
)
elif
isinstance
(
m
,
nn
.
Linear
):
nn
.
init
.
zeros_
(
m
.
bias
)
def
forward
(
self
,
x
):
x
=
self
.
stem
(
x
)
if
self
.
det
:
x
=
self
.
pool
(
x
)
out
=
[]
for
i
,
stage
in
enumerate
(
self
.
stages
):
x
=
stage
(
x
)
if
self
.
det
and
i
in
self
.
out_indices
:
out
.
append
(
x
)
if
self
.
det
:
return
out
if
self
.
training
:
x
=
F
.
adaptive_avg_pool2d
(
x
,
[
1
,
40
])
else
:
x
=
F
.
avg_pool2d
(
x
,
[
3
,
2
])
return
x
def
PPHGNet_small
(
pretrained
=
False
,
use_ssld
=
False
,
det
=
False
,
**
kwargs
):
"""
PPHGNet_small
Args:
pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
If str, means the path of the pretrained model.
use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
Returns:
model: nn.Layer. Specific `PPHGNet_small` model depends on args.
"""
stage_config_det
=
{
# in_channels, mid_channels, out_channels, blocks, downsample
"stage1"
:
[
128
,
128
,
256
,
1
,
False
,
2
],
"stage2"
:
[
256
,
160
,
512
,
1
,
True
,
2
],
"stage3"
:
[
512
,
192
,
768
,
2
,
True
,
2
],
"stage4"
:
[
768
,
224
,
1024
,
1
,
True
,
2
],
}
stage_config_rec
=
{
# in_channels, mid_channels, out_channels, blocks, downsample
"stage1"
:
[
128
,
128
,
256
,
1
,
True
,
[
2
,
1
]],
"stage2"
:
[
256
,
160
,
512
,
1
,
True
,
[
1
,
2
]],
"stage3"
:
[
512
,
192
,
768
,
2
,
True
,
[
2
,
1
]],
"stage4"
:
[
768
,
224
,
1024
,
1
,
True
,
[
2
,
1
]],
}
model
=
PPHGNet
(
stem_channels
=
[
64
,
64
,
128
],
stage_config
=
stage_config_det
if
det
else
stage_config_rec
,
layer_num
=
6
,
det
=
det
,
**
kwargs
)
return
model
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
0 → 100644
View file @
f4ffdfe8
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
,
division
,
print_function
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
..common
import
Activation
NET_CONFIG_det
=
{
"blocks2"
:
# k, in_c, out_c, s, use_se
[[
3
,
16
,
32
,
1
,
False
]],
"blocks3"
:
[[
3
,
32
,
64
,
2
,
False
],
[
3
,
64
,
64
,
1
,
False
]],
"blocks4"
:
[[
3
,
64
,
128
,
2
,
False
],
[
3
,
128
,
128
,
1
,
False
]],
"blocks5"
:
[
[
3
,
128
,
256
,
2
,
False
],
[
5
,
256
,
256
,
1
,
False
],
[
5
,
256
,
256
,
1
,
False
],
[
5
,
256
,
256
,
1
,
False
],
[
5
,
256
,
256
,
1
,
False
],
],
"blocks6"
:
[
[
5
,
256
,
512
,
2
,
True
],
[
5
,
512
,
512
,
1
,
True
],
[
5
,
512
,
512
,
1
,
False
],
[
5
,
512
,
512
,
1
,
False
],
],
}
NET_CONFIG_rec
=
{
"blocks2"
:
# k, in_c, out_c, s, use_se
[[
3
,
16
,
32
,
1
,
False
]],
"blocks3"
:
[[
3
,
32
,
64
,
1
,
False
],
[
3
,
64
,
64
,
1
,
False
]],
"blocks4"
:
[[
3
,
64
,
128
,
(
2
,
1
),
False
],
[
3
,
128
,
128
,
1
,
False
]],
"blocks5"
:
[
[
3
,
128
,
256
,
(
1
,
2
),
False
],
[
5
,
256
,
256
,
1
,
False
],
[
5
,
256
,
256
,
1
,
False
],
[
5
,
256
,
256
,
1
,
False
],
[
5
,
256
,
256
,
1
,
False
],
],
"blocks6"
:
[
[
5
,
256
,
512
,
(
2
,
1
),
True
],
[
5
,
512
,
512
,
1
,
True
],
[
5
,
512
,
512
,
(
2
,
1
),
False
],
[
5
,
512
,
512
,
1
,
False
],
],
}
def
make_divisible
(
v
,
divisor
=
16
,
min_value
=
None
):
if
min_value
is
None
:
min_value
=
divisor
new_v
=
max
(
min_value
,
int
(
v
+
divisor
/
2
)
//
divisor
*
divisor
)
if
new_v
<
0.9
*
v
:
new_v
+=
divisor
return
new_v
class
LearnableAffineBlock
(
nn
.
Module
):
def
__init__
(
self
,
scale_value
=
1.0
,
bias_value
=
0.0
,
lr_mult
=
1.0
,
lab_lr
=
0.1
):
super
().
__init__
()
self
.
scale
=
nn
.
Parameter
(
torch
.
Tensor
([
scale_value
]))
self
.
bias
=
nn
.
Parameter
(
torch
.
Tensor
([
bias_value
]))
def
forward
(
self
,
x
):
return
self
.
scale
*
x
+
self
.
bias
class
ConvBNLayer
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
1
,
lr_mult
=
1.0
):
super
().
__init__
()
self
.
conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
(
kernel_size
-
1
)
//
2
,
groups
=
groups
,
bias
=
False
,
)
self
.
bn
=
nn
.
BatchNorm2d
(
out_channels
,
)
def
forward
(
self
,
x
):
x
=
self
.
conv
(
x
)
x
=
self
.
bn
(
x
)
return
x
class
Act
(
nn
.
Module
):
def
__init__
(
self
,
act
=
"hswish"
,
lr_mult
=
1.0
,
lab_lr
=
0.1
):
super
().
__init__
()
if
act
==
"hswish"
:
self
.
act
=
nn
.
Hardswish
(
inplace
=
True
)
else
:
assert
act
==
"relu"
self
.
act
=
Activation
(
act
)
self
.
lab
=
LearnableAffineBlock
(
lr_mult
=
lr_mult
,
lab_lr
=
lab_lr
)
def
forward
(
self
,
x
):
return
self
.
lab
(
self
.
act
(
x
))
class
LearnableRepLayer
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
stride
=
1
,
groups
=
1
,
num_conv_branches
=
1
,
lr_mult
=
1.0
,
lab_lr
=
0.1
,
):
super
().
__init__
()
self
.
is_repped
=
False
self
.
groups
=
groups
self
.
stride
=
stride
self
.
kernel_size
=
kernel_size
self
.
in_channels
=
in_channels
self
.
out_channels
=
out_channels
self
.
num_conv_branches
=
num_conv_branches
self
.
padding
=
(
kernel_size
-
1
)
//
2
self
.
identity
=
(
nn
.
BatchNorm2d
(
num_features
=
in_channels
,
)
if
out_channels
==
in_channels
and
stride
==
1
else
None
)
self
.
conv_kxk
=
nn
.
ModuleList
(
[
ConvBNLayer
(
in_channels
,
out_channels
,
kernel_size
,
stride
,
groups
=
groups
,
lr_mult
=
lr_mult
,
)
for
_
in
range
(
self
.
num_conv_branches
)
]
)
self
.
conv_1x1
=
(
ConvBNLayer
(
in_channels
,
out_channels
,
1
,
stride
,
groups
=
groups
,
lr_mult
=
lr_mult
)
if
kernel_size
>
1
else
None
)
self
.
lab
=
LearnableAffineBlock
(
lr_mult
=
lr_mult
,
lab_lr
=
lab_lr
)
self
.
act
=
Act
(
lr_mult
=
lr_mult
,
lab_lr
=
lab_lr
)
def
forward
(
self
,
x
):
# for export
if
self
.
is_repped
:
out
=
self
.
lab
(
self
.
reparam_conv
(
x
))
if
self
.
stride
!=
2
:
out
=
self
.
act
(
out
)
return
out
out
=
0
if
self
.
identity
is
not
None
:
out
+=
self
.
identity
(
x
)
if
self
.
conv_1x1
is
not
None
:
out
+=
self
.
conv_1x1
(
x
)
for
conv
in
self
.
conv_kxk
:
out
+=
conv
(
x
)
out
=
self
.
lab
(
out
)
if
self
.
stride
!=
2
:
out
=
self
.
act
(
out
)
return
out
def
rep
(
self
):
if
self
.
is_repped
:
return
kernel
,
bias
=
self
.
_get_kernel_bias
()
self
.
reparam_conv
=
nn
.
Conv2d
(
in_channels
=
self
.
in_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
self
.
kernel_size
,
stride
=
self
.
stride
,
padding
=
self
.
padding
,
groups
=
self
.
groups
,
)
self
.
reparam_conv
.
weight
.
data
=
kernel
self
.
reparam_conv
.
bias
.
data
=
bias
self
.
is_repped
=
True
def
_pad_kernel_1x1_to_kxk
(
self
,
kernel1x1
,
pad
):
if
not
isinstance
(
kernel1x1
,
torch
.
Tensor
):
return
0
else
:
return
nn
.
functional
.
pad
(
kernel1x1
,
[
pad
,
pad
,
pad
,
pad
])
def
_get_kernel_bias
(
self
):
kernel_conv_1x1
,
bias_conv_1x1
=
self
.
_fuse_bn_tensor
(
self
.
conv_1x1
)
kernel_conv_1x1
=
self
.
_pad_kernel_1x1_to_kxk
(
kernel_conv_1x1
,
self
.
kernel_size
//
2
)
kernel_identity
,
bias_identity
=
self
.
_fuse_bn_tensor
(
self
.
identity
)
kernel_conv_kxk
=
0
bias_conv_kxk
=
0
for
conv
in
self
.
conv_kxk
:
kernel
,
bias
=
self
.
_fuse_bn_tensor
(
conv
)
kernel_conv_kxk
+=
kernel
bias_conv_kxk
+=
bias
kernel_reparam
=
kernel_conv_kxk
+
kernel_conv_1x1
+
kernel_identity
bias_reparam
=
bias_conv_kxk
+
bias_conv_1x1
+
bias_identity
return
kernel_reparam
,
bias_reparam
def
_fuse_bn_tensor
(
self
,
branch
):
if
not
branch
:
return
0
,
0
elif
isinstance
(
branch
,
ConvBNLayer
):
kernel
=
branch
.
conv
.
weight
running_mean
=
branch
.
bn
.
_mean
running_var
=
branch
.
bn
.
_variance
gamma
=
branch
.
bn
.
weight
beta
=
branch
.
bn
.
bias
eps
=
branch
.
bn
.
_epsilon
else
:
assert
isinstance
(
branch
,
nn
.
BatchNorm2d
)
if
not
hasattr
(
self
,
"id_tensor"
):
input_dim
=
self
.
in_channels
//
self
.
groups
kernel_value
=
torch
.
zeros
(
(
self
.
in_channels
,
input_dim
,
self
.
kernel_size
,
self
.
kernel_size
),
dtype
=
branch
.
weight
.
dtype
,
)
for
i
in
range
(
self
.
in_channels
):
kernel_value
[
i
,
i
%
input_dim
,
self
.
kernel_size
//
2
,
self
.
kernel_size
//
2
]
=
1
self
.
id_tensor
=
kernel_value
kernel
=
self
.
id_tensor
running_mean
=
branch
.
_mean
running_var
=
branch
.
_variance
gamma
=
branch
.
weight
beta
=
branch
.
bias
eps
=
branch
.
_epsilon
std
=
(
running_var
+
eps
).
sqrt
()
t
=
(
gamma
/
std
).
reshape
((
-
1
,
1
,
1
,
1
))
return
kernel
*
t
,
beta
-
running_mean
*
gamma
/
std
class
SELayer
(
nn
.
Module
):
def
__init__
(
self
,
channel
,
reduction
=
4
,
lr_mult
=
1.0
):
super
().
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
)
self
.
relu
=
nn
.
ReLU
()
self
.
conv2
=
nn
.
Conv2d
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
)
self
.
hardsigmoid
=
nn
.
Hardsigmoid
(
inplace
=
True
)
def
forward
(
self
,
x
):
identity
=
x
x
=
self
.
avg_pool
(
x
)
x
=
self
.
conv1
(
x
)
x
=
self
.
relu
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
hardsigmoid
(
x
)
x
=
identity
*
x
return
x
class
LCNetV3Block
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
stride
,
dw_size
,
use_se
=
False
,
conv_kxk_num
=
4
,
lr_mult
=
1.0
,
lab_lr
=
0.1
,
):
super
().
__init__
()
self
.
use_se
=
use_se
self
.
dw_conv
=
LearnableRepLayer
(
in_channels
=
in_channels
,
out_channels
=
in_channels
,
kernel_size
=
dw_size
,
stride
=
stride
,
groups
=
in_channels
,
num_conv_branches
=
conv_kxk_num
,
lr_mult
=
lr_mult
,
lab_lr
=
lab_lr
,
)
if
use_se
:
self
.
se
=
SELayer
(
in_channels
,
lr_mult
=
lr_mult
)
self
.
pw_conv
=
LearnableRepLayer
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
num_conv_branches
=
conv_kxk_num
,
lr_mult
=
lr_mult
,
lab_lr
=
lab_lr
,
)
def
forward
(
self
,
x
):
x
=
self
.
dw_conv
(
x
)
if
self
.
use_se
:
x
=
self
.
se
(
x
)
x
=
self
.
pw_conv
(
x
)
return
x
class
PPLCNetV3
(
nn
.
Module
):
def
__init__
(
self
,
scale
=
1.0
,
conv_kxk_num
=
4
,
lr_mult_list
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
],
lab_lr
=
0.1
,
det
=
False
,
**
kwargs
):
super
().
__init__
()
self
.
scale
=
scale
self
.
lr_mult_list
=
lr_mult_list
self
.
det
=
det
self
.
net_config
=
NET_CONFIG_det
if
self
.
det
else
NET_CONFIG_rec
assert
isinstance
(
self
.
lr_mult_list
,
(
list
,
tuple
)
),
"lr_mult_list should be in (list, tuple) but got {}"
.
format
(
type
(
self
.
lr_mult_list
)
)
assert
(
len
(
self
.
lr_mult_list
)
==
6
),
"lr_mult_list length should be 6 but got {}"
.
format
(
len
(
self
.
lr_mult_list
))
self
.
conv1
=
ConvBNLayer
(
in_channels
=
3
,
out_channels
=
make_divisible
(
16
*
scale
),
kernel_size
=
3
,
stride
=
2
,
lr_mult
=
self
.
lr_mult_list
[
0
],
)
self
.
blocks2
=
nn
.
Sequential
(
*
[
LCNetV3Block
(
in_channels
=
make_divisible
(
in_c
*
scale
),
out_channels
=
make_divisible
(
out_c
*
scale
),
dw_size
=
k
,
stride
=
s
,
use_se
=
se
,
conv_kxk_num
=
conv_kxk_num
,
lr_mult
=
self
.
lr_mult_list
[
1
],
lab_lr
=
lab_lr
,
)
for
i
,
(
k
,
in_c
,
out_c
,
s
,
se
)
in
enumerate
(
self
.
net_config
[
"blocks2"
])
]
)
self
.
blocks3
=
nn
.
Sequential
(
*
[
LCNetV3Block
(
in_channels
=
make_divisible
(
in_c
*
scale
),
out_channels
=
make_divisible
(
out_c
*
scale
),
dw_size
=
k
,
stride
=
s
,
use_se
=
se
,
conv_kxk_num
=
conv_kxk_num
,
lr_mult
=
self
.
lr_mult_list
[
2
],
lab_lr
=
lab_lr
,
)
for
i
,
(
k
,
in_c
,
out_c
,
s
,
se
)
in
enumerate
(
self
.
net_config
[
"blocks3"
])
]
)
self
.
blocks4
=
nn
.
Sequential
(
*
[
LCNetV3Block
(
in_channels
=
make_divisible
(
in_c
*
scale
),
out_channels
=
make_divisible
(
out_c
*
scale
),
dw_size
=
k
,
stride
=
s
,
use_se
=
se
,
conv_kxk_num
=
conv_kxk_num
,
lr_mult
=
self
.
lr_mult_list
[
3
],
lab_lr
=
lab_lr
,
)
for
i
,
(
k
,
in_c
,
out_c
,
s
,
se
)
in
enumerate
(
self
.
net_config
[
"blocks4"
])
]
)
self
.
blocks5
=
nn
.
Sequential
(
*
[
LCNetV3Block
(
in_channels
=
make_divisible
(
in_c
*
scale
),
out_channels
=
make_divisible
(
out_c
*
scale
),
dw_size
=
k
,
stride
=
s
,
use_se
=
se
,
conv_kxk_num
=
conv_kxk_num
,
lr_mult
=
self
.
lr_mult_list
[
4
],
lab_lr
=
lab_lr
,
)
for
i
,
(
k
,
in_c
,
out_c
,
s
,
se
)
in
enumerate
(
self
.
net_config
[
"blocks5"
])
]
)
self
.
blocks6
=
nn
.
Sequential
(
*
[
LCNetV3Block
(
in_channels
=
make_divisible
(
in_c
*
scale
),
out_channels
=
make_divisible
(
out_c
*
scale
),
dw_size
=
k
,
stride
=
s
,
use_se
=
se
,
conv_kxk_num
=
conv_kxk_num
,
lr_mult
=
self
.
lr_mult_list
[
5
],
lab_lr
=
lab_lr
,
)
for
i
,
(
k
,
in_c
,
out_c
,
s
,
se
)
in
enumerate
(
self
.
net_config
[
"blocks6"
])
]
)
self
.
out_channels
=
make_divisible
(
512
*
scale
)
if
self
.
det
:
mv_c
=
[
16
,
24
,
56
,
480
]
self
.
out_channels
=
[
make_divisible
(
self
.
net_config
[
"blocks3"
][
-
1
][
2
]
*
scale
),
make_divisible
(
self
.
net_config
[
"blocks4"
][
-
1
][
2
]
*
scale
),
make_divisible
(
self
.
net_config
[
"blocks5"
][
-
1
][
2
]
*
scale
),
make_divisible
(
self
.
net_config
[
"blocks6"
][
-
1
][
2
]
*
scale
),
]
self
.
layer_list
=
nn
.
ModuleList
(
[
nn
.
Conv2d
(
self
.
out_channels
[
0
],
int
(
mv_c
[
0
]
*
scale
),
1
,
1
,
0
),
nn
.
Conv2d
(
self
.
out_channels
[
1
],
int
(
mv_c
[
1
]
*
scale
),
1
,
1
,
0
),
nn
.
Conv2d
(
self
.
out_channels
[
2
],
int
(
mv_c
[
2
]
*
scale
),
1
,
1
,
0
),
nn
.
Conv2d
(
self
.
out_channels
[
3
],
int
(
mv_c
[
3
]
*
scale
),
1
,
1
,
0
),
]
)
self
.
out_channels
=
[
int
(
mv_c
[
0
]
*
scale
),
int
(
mv_c
[
1
]
*
scale
),
int
(
mv_c
[
2
]
*
scale
),
int
(
mv_c
[
3
]
*
scale
),
]
def
forward
(
self
,
x
):
out_list
=
[]
x
=
self
.
conv1
(
x
)
x
=
self
.
blocks2
(
x
)
x
=
self
.
blocks3
(
x
)
out_list
.
append
(
x
)
x
=
self
.
blocks4
(
x
)
out_list
.
append
(
x
)
x
=
self
.
blocks5
(
x
)
out_list
.
append
(
x
)
x
=
self
.
blocks6
(
x
)
out_list
.
append
(
x
)
if
self
.
det
:
out_list
[
0
]
=
self
.
layer_list
[
0
](
out_list
[
0
])
out_list
[
1
]
=
self
.
layer_list
[
1
](
out_list
[
1
])
out_list
[
2
]
=
self
.
layer_list
[
2
](
out_list
[
2
])
out_list
[
3
]
=
self
.
layer_list
[
3
](
out_list
[
3
])
return
out_list
if
self
.
training
:
x
=
F
.
adaptive_avg_pool2d
(
x
,
[
1
,
40
])
else
:
x
=
F
.
avg_pool2d
(
x
,
[
3
,
2
])
return
x
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
0 → 100644
View file @
f4ffdfe8
from
torch
import
nn
from
.det_mobilenet_v3
import
ConvBNLayer
,
ResidualUnit
,
make_divisible
class
MobileNetV3
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
=
3
,
model_name
=
"small"
,
scale
=
0.5
,
large_stride
=
None
,
small_stride
=
None
,
**
kwargs
):
super
(
MobileNetV3
,
self
).
__init__
()
if
small_stride
is
None
:
small_stride
=
[
2
,
2
,
2
,
2
]
if
large_stride
is
None
:
large_stride
=
[
1
,
2
,
2
,
2
]
assert
isinstance
(
large_stride
,
list
),
"large_stride type must "
"be list but got {}"
.
format
(
type
(
large_stride
))
assert
isinstance
(
small_stride
,
list
),
"small_stride type must "
"be list but got {}"
.
format
(
type
(
small_stride
))
assert
(
len
(
large_stride
)
==
4
),
"large_stride length must be "
"4 but got {}"
.
format
(
len
(
large_stride
))
assert
(
len
(
small_stride
)
==
4
),
"small_stride length must be "
"4 but got {}"
.
format
(
len
(
small_stride
))
if
model_name
==
"large"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
False
,
"relu"
,
large_stride
[
0
]],
[
3
,
64
,
24
,
False
,
"relu"
,
(
large_stride
[
1
],
1
)],
[
3
,
72
,
24
,
False
,
"relu"
,
1
],
[
5
,
72
,
40
,
True
,
"relu"
,
(
large_stride
[
2
],
1
)],
[
5
,
120
,
40
,
True
,
"relu"
,
1
],
[
5
,
120
,
40
,
True
,
"relu"
,
1
],
[
3
,
240
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
200
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
184
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
184
,
80
,
False
,
"hard_swish"
,
1
],
[
3
,
480
,
112
,
True
,
"hard_swish"
,
1
],
[
3
,
672
,
112
,
True
,
"hard_swish"
,
1
],
[
5
,
672
,
160
,
True
,
"hard_swish"
,
(
large_stride
[
3
],
1
)],
[
5
,
960
,
160
,
True
,
"hard_swish"
,
1
],
[
5
,
960
,
160
,
True
,
"hard_swish"
,
1
],
]
cls_ch_squeeze
=
960
elif
model_name
==
"small"
:
cfg
=
[
# k, exp, c, se, nl, s,
[
3
,
16
,
16
,
True
,
"relu"
,
(
small_stride
[
0
],
1
)],
[
3
,
72
,
24
,
False
,
"relu"
,
(
small_stride
[
1
],
1
)],
[
3
,
88
,
24
,
False
,
"relu"
,
1
],
[
5
,
96
,
40
,
True
,
"hard_swish"
,
(
small_stride
[
2
],
1
)],
[
5
,
240
,
40
,
True
,
"hard_swish"
,
1
],
[
5
,
240
,
40
,
True
,
"hard_swish"
,
1
],
[
5
,
120
,
48
,
True
,
"hard_swish"
,
1
],
[
5
,
144
,
48
,
True
,
"hard_swish"
,
1
],
[
5
,
288
,
96
,
True
,
"hard_swish"
,
(
small_stride
[
3
],
1
)],
[
5
,
576
,
96
,
True
,
"hard_swish"
,
1
],
[
5
,
576
,
96
,
True
,
"hard_swish"
,
1
],
]
cls_ch_squeeze
=
576
else
:
raise
NotImplementedError
(
"mode["
+
model_name
+
"_model] is not implemented!"
)
supported_scale
=
[
0.35
,
0.5
,
0.75
,
1.0
,
1.25
]
assert
(
scale
in
supported_scale
),
"supported scales are {} but input scale is {}"
.
format
(
supported_scale
,
scale
)
inplanes
=
16
# conv1
self
.
conv1
=
ConvBNLayer
(
in_channels
=
in_channels
,
out_channels
=
make_divisible
(
inplanes
*
scale
),
kernel_size
=
3
,
stride
=
2
,
padding
=
1
,
groups
=
1
,
if_act
=
True
,
act
=
"hard_swish"
,
name
=
"conv1"
,
)
i
=
0
block_list
=
[]
inplanes
=
make_divisible
(
inplanes
*
scale
)
for
k
,
exp
,
c
,
se
,
nl
,
s
in
cfg
:
block_list
.
append
(
ResidualUnit
(
in_channels
=
inplanes
,
mid_channels
=
make_divisible
(
scale
*
exp
),
out_channels
=
make_divisible
(
scale
*
c
),
kernel_size
=
k
,
stride
=
s
,
use_se
=
se
,
act
=
nl
,
name
=
"conv"
+
str
(
i
+
2
),
)
)
inplanes
=
make_divisible
(
scale
*
c
)
i
+=
1
self
.
blocks
=
nn
.
Sequential
(
*
block_list
)
self
.
conv2
=
ConvBNLayer
(
in_channels
=
inplanes
,
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
),
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
groups
=
1
,
if_act
=
True
,
act
=
"hard_swish"
,
name
=
"conv_last"
,
)
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
make_divisible
(
scale
*
cls_ch_squeeze
)
def
forward
(
self
,
x
):
x
=
self
.
conv1
(
x
)
x
=
self
.
blocks
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
pool
(
x
)
return
x
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
0 → 100644
View file @
f4ffdfe8
import
os
,
sys
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
..common
import
Activation
class
ConvBNLayer
(
nn
.
Module
):
def
__init__
(
self
,
num_channels
,
filter_size
,
num_filters
,
stride
,
padding
,
channels
=
None
,
num_groups
=
1
,
act
=
'hard_swish'
):
super
(
ConvBNLayer
,
self
).
__init__
()
self
.
act
=
act
self
.
_conv
=
nn
.
Conv2d
(
in_channels
=
num_channels
,
out_channels
=
num_filters
,
kernel_size
=
filter_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
num_groups
,
bias
=
False
)
self
.
_batch_norm
=
nn
.
BatchNorm2d
(
num_filters
,
)
if
self
.
act
is
not
None
:
self
.
_act
=
Activation
(
act_type
=
act
,
inplace
=
True
)
def
forward
(
self
,
inputs
):
y
=
self
.
_conv
(
inputs
)
y
=
self
.
_batch_norm
(
y
)
if
self
.
act
is
not
None
:
y
=
self
.
_act
(
y
)
return
y
class
DepthwiseSeparable
(
nn
.
Module
):
def
__init__
(
self
,
num_channels
,
num_filters1
,
num_filters2
,
num_groups
,
stride
,
scale
,
dw_size
=
3
,
padding
=
1
,
use_se
=
False
):
super
(
DepthwiseSeparable
,
self
).
__init__
()
self
.
use_se
=
use_se
self
.
_depthwise_conv
=
ConvBNLayer
(
num_channels
=
num_channels
,
num_filters
=
int
(
num_filters1
*
scale
),
filter_size
=
dw_size
,
stride
=
stride
,
padding
=
padding
,
num_groups
=
int
(
num_groups
*
scale
))
if
use_se
:
self
.
_se
=
SEModule
(
int
(
num_filters1
*
scale
))
self
.
_pointwise_conv
=
ConvBNLayer
(
num_channels
=
int
(
num_filters1
*
scale
),
filter_size
=
1
,
num_filters
=
int
(
num_filters2
*
scale
),
stride
=
1
,
padding
=
0
)
def
forward
(
self
,
inputs
):
y
=
self
.
_depthwise_conv
(
inputs
)
if
self
.
use_se
:
y
=
self
.
_se
(
y
)
y
=
self
.
_pointwise_conv
(
y
)
return
y
class
MobileNetV1Enhance
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
=
3
,
scale
=
0.5
,
last_conv_stride
=
1
,
last_pool_type
=
'max'
,
**
kwargs
):
super
().
__init__
()
self
.
scale
=
scale
self
.
block_list
=
[]
self
.
conv1
=
ConvBNLayer
(
num_channels
=
in_channels
,
filter_size
=
3
,
channels
=
3
,
num_filters
=
int
(
32
*
scale
),
stride
=
2
,
padding
=
1
)
conv2_1
=
DepthwiseSeparable
(
num_channels
=
int
(
32
*
scale
),
num_filters1
=
32
,
num_filters2
=
64
,
num_groups
=
32
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_1
)
conv2_2
=
DepthwiseSeparable
(
num_channels
=
int
(
64
*
scale
),
num_filters1
=
64
,
num_filters2
=
128
,
num_groups
=
64
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv2_2
)
conv3_1
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
128
,
num_groups
=
128
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv3_1
)
conv3_2
=
DepthwiseSeparable
(
num_channels
=
int
(
128
*
scale
),
num_filters1
=
128
,
num_filters2
=
256
,
num_groups
=
128
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv3_2
)
conv4_1
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
256
,
num_groups
=
256
,
stride
=
1
,
scale
=
scale
)
self
.
block_list
.
append
(
conv4_1
)
conv4_2
=
DepthwiseSeparable
(
num_channels
=
int
(
256
*
scale
),
num_filters1
=
256
,
num_filters2
=
512
,
num_groups
=
256
,
stride
=
(
2
,
1
),
scale
=
scale
)
self
.
block_list
.
append
(
conv4_2
)
for
_
in
range
(
5
):
conv5
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
512
,
num_groups
=
512
,
stride
=
1
,
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
False
)
self
.
block_list
.
append
(
conv5
)
conv5_6
=
DepthwiseSeparable
(
num_channels
=
int
(
512
*
scale
),
num_filters1
=
512
,
num_filters2
=
1024
,
num_groups
=
512
,
stride
=
(
2
,
1
),
dw_size
=
5
,
padding
=
2
,
scale
=
scale
,
use_se
=
True
)
self
.
block_list
.
append
(
conv5_6
)
conv6
=
DepthwiseSeparable
(
num_channels
=
int
(
1024
*
scale
),
num_filters1
=
1024
,
num_filters2
=
1024
,
num_groups
=
1024
,
stride
=
last_conv_stride
,
dw_size
=
5
,
padding
=
2
,
use_se
=
True
,
scale
=
scale
)
self
.
block_list
.
append
(
conv6
)
self
.
block_list
=
nn
.
Sequential
(
*
self
.
block_list
)
if
last_pool_type
==
'avg'
:
self
.
pool
=
nn
.
AvgPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
else
:
self
.
pool
=
nn
.
MaxPool2d
(
kernel_size
=
2
,
stride
=
2
,
padding
=
0
)
self
.
out_channels
=
int
(
1024
*
scale
)
def
forward
(
self
,
inputs
):
y
=
self
.
conv1
(
inputs
)
y
=
self
.
block_list
(
y
)
y
=
self
.
pool
(
y
)
return
y
def
hardsigmoid
(
x
):
return
F
.
relu6
(
x
+
3.
,
inplace
=
True
)
/
6.
class
SEModule
(
nn
.
Module
):
def
__init__
(
self
,
channel
,
reduction
=
4
):
super
(
SEModule
,
self
).
__init__
()
self
.
avg_pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
channel
,
out_channels
=
channel
//
reduction
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
)
self
.
conv2
=
nn
.
Conv2d
(
in_channels
=
channel
//
reduction
,
out_channels
=
channel
,
kernel_size
=
1
,
stride
=
1
,
padding
=
0
,
bias
=
True
)
def
forward
(
self
,
inputs
):
outputs
=
self
.
avg_pool
(
inputs
)
outputs
=
self
.
conv1
(
outputs
)
outputs
=
F
.
relu
(
outputs
)
outputs
=
self
.
conv2
(
outputs
)
outputs
=
hardsigmoid
(
outputs
)
x
=
torch
.
mul
(
inputs
,
outputs
)
return
x
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
0 → 100644
View file @
f4ffdfe8
This diff is collapsed.
Click to expand it.
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
0 → 100644
View file @
f4ffdfe8
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
class
Hswish
(
nn
.
Module
):
def
__init__
(
self
,
inplace
=
True
):
super
(
Hswish
,
self
).
__init__
()
self
.
inplace
=
inplace
def
forward
(
self
,
x
):
return
x
*
F
.
relu6
(
x
+
3.0
,
inplace
=
self
.
inplace
)
/
6.0
# out = max(0, min(1, slop*x+offset))
# paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
class
Hsigmoid
(
nn
.
Module
):
def
__init__
(
self
,
inplace
=
True
):
super
(
Hsigmoid
,
self
).
__init__
()
self
.
inplace
=
inplace
def
forward
(
self
,
x
):
# torch: F.relu6(x + 3., inplace=self.inplace) / 6.
# paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
return
F
.
relu6
(
1.2
*
x
+
3.0
,
inplace
=
self
.
inplace
)
/
6.0
class
GELU
(
nn
.
Module
):
def
__init__
(
self
,
inplace
=
True
):
super
(
GELU
,
self
).
__init__
()
self
.
inplace
=
inplace
def
forward
(
self
,
x
):
return
torch
.
nn
.
functional
.
gelu
(
x
)
class
Swish
(
nn
.
Module
):
def
__init__
(
self
,
inplace
=
True
):
super
(
Swish
,
self
).
__init__
()
self
.
inplace
=
inplace
def
forward
(
self
,
x
):
if
self
.
inplace
:
x
.
mul_
(
torch
.
sigmoid
(
x
))
return
x
else
:
return
x
*
torch
.
sigmoid
(
x
)
class
Activation
(
nn
.
Module
):
def
__init__
(
self
,
act_type
,
inplace
=
True
):
super
(
Activation
,
self
).
__init__
()
act_type
=
act_type
.
lower
()
if
act_type
==
"relu"
:
self
.
act
=
nn
.
ReLU
(
inplace
=
inplace
)
elif
act_type
==
"relu6"
:
self
.
act
=
nn
.
ReLU6
(
inplace
=
inplace
)
elif
act_type
==
"sigmoid"
:
raise
NotImplementedError
elif
act_type
==
"hard_sigmoid"
:
self
.
act
=
Hsigmoid
(
inplace
)
# nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
elif
act_type
==
"hard_swish"
or
act_type
==
"hswish"
:
self
.
act
=
Hswish
(
inplace
=
inplace
)
elif
act_type
==
"leakyrelu"
:
self
.
act
=
nn
.
LeakyReLU
(
inplace
=
inplace
)
elif
act_type
==
"gelu"
:
self
.
act
=
GELU
(
inplace
=
inplace
)
elif
act_type
==
"swish"
:
self
.
act
=
Swish
(
inplace
=
inplace
)
else
:
raise
NotImplementedError
def
forward
(
self
,
inputs
):
return
self
.
act
(
inputs
)
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
0 → 100644
View file @
f4ffdfe8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
"build_head"
]
def
build_head
(
config
,
**
kwargs
):
# det head
from
.det_db_head
import
DBHead
,
PFHeadLocal
# rec head
from
.rec_ctc_head
import
CTCHead
from
.rec_multi_head
import
MultiHead
# cls head
from
.cls_head
import
ClsHead
support_dict
=
[
"DBHead"
,
"CTCHead"
,
"ClsHead"
,
"MultiHead"
,
"PFHeadLocal"
,
]
module_name
=
config
.
pop
(
"name"
)
char_num
=
config
.
pop
(
"char_num"
,
6625
)
assert
module_name
in
support_dict
,
Exception
(
"head only support {}"
.
format
(
support_dict
)
)
module_class
=
eval
(
module_name
)(
**
config
,
**
kwargs
)
return
module_class
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
0 → 100644
View file @
f4ffdfe8
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
class
ClsHead
(
nn
.
Module
):
"""
Class orientation
Args:
params(dict): super parameters for build Class network
"""
def
__init__
(
self
,
in_channels
,
class_dim
,
**
kwargs
):
super
(
ClsHead
,
self
).
__init__
()
self
.
pool
=
nn
.
AdaptiveAvgPool2d
(
1
)
self
.
fc
=
nn
.
Linear
(
in_channels
,
class_dim
,
bias
=
True
)
def
forward
(
self
,
x
):
x
=
self
.
pool
(
x
)
x
=
torch
.
reshape
(
x
,
shape
=
[
x
.
shape
[
0
],
x
.
shape
[
1
]])
x
=
self
.
fc
(
x
)
x
=
F
.
softmax
(
x
,
dim
=
1
)
return
x
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
0 → 100644
View file @
f4ffdfe8
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
..common
import
Activation
from
..backbones.det_mobilenet_v3
import
ConvBNLayer
class
Head
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
Head
,
self
).
__init__
()
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
in_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
)
self
.
conv_bn1
=
nn
.
BatchNorm2d
(
in_channels
//
4
)
self
.
relu1
=
Activation
(
act_type
=
'relu'
)
self
.
conv2
=
nn
.
ConvTranspose2d
(
in_channels
=
in_channels
//
4
,
out_channels
=
in_channels
//
4
,
kernel_size
=
2
,
stride
=
2
)
self
.
conv_bn2
=
nn
.
BatchNorm2d
(
in_channels
//
4
)
self
.
relu2
=
Activation
(
act_type
=
'relu'
)
self
.
conv3
=
nn
.
ConvTranspose2d
(
in_channels
=
in_channels
//
4
,
out_channels
=
1
,
kernel_size
=
2
,
stride
=
2
)
def
forward
(
self
,
x
,
return_f
=
False
):
x
=
self
.
conv1
(
x
)
x
=
self
.
conv_bn1
(
x
)
x
=
self
.
relu1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
conv_bn2
(
x
)
x
=
self
.
relu2
(
x
)
if
return_f
is
True
:
f
=
x
x
=
self
.
conv3
(
x
)
x
=
torch
.
sigmoid
(
x
)
if
return_f
is
True
:
return
x
,
f
return
x
class
DBHead
(
nn
.
Module
):
"""
Differentiable Binarization (DB) for text detection:
see https://arxiv.org/abs/1911.08947
args:
params(dict): super parameters for build DB network
"""
def
__init__
(
self
,
in_channels
,
k
=
50
,
**
kwargs
):
super
(
DBHead
,
self
).
__init__
()
self
.
k
=
k
binarize_name_list
=
[
'conv2d_56'
,
'batch_norm_47'
,
'conv2d_transpose_0'
,
'batch_norm_48'
,
'conv2d_transpose_1'
,
'binarize'
]
thresh_name_list
=
[
'conv2d_57'
,
'batch_norm_49'
,
'conv2d_transpose_2'
,
'batch_norm_50'
,
'conv2d_transpose_3'
,
'thresh'
]
self
.
binarize
=
Head
(
in_channels
,
**
kwargs
)
# binarize_name_list)
self
.
thresh
=
Head
(
in_channels
,
**
kwargs
)
#thresh_name_list)
def
step_function
(
self
,
x
,
y
):
return
torch
.
reciprocal
(
1
+
torch
.
exp
(
-
self
.
k
*
(
x
-
y
)))
def
forward
(
self
,
x
):
shrink_maps
=
self
.
binarize
(
x
)
return
{
'maps'
:
shrink_maps
}
class
LocalModule
(
nn
.
Module
):
def
__init__
(
self
,
in_c
,
mid_c
,
use_distance
=
True
):
super
(
self
.
__class__
,
self
).
__init__
()
self
.
last_3
=
ConvBNLayer
(
in_c
+
1
,
mid_c
,
3
,
1
,
1
,
act
=
'relu'
)
self
.
last_1
=
nn
.
Conv2d
(
mid_c
,
1
,
1
,
1
,
0
)
def
forward
(
self
,
x
,
init_map
,
distance_map
):
outf
=
torch
.
cat
([
init_map
,
x
],
dim
=
1
)
# last Conv
out
=
self
.
last_1
(
self
.
last_3
(
outf
))
return
out
class
PFHeadLocal
(
DBHead
):
def
__init__
(
self
,
in_channels
,
k
=
50
,
mode
=
'small'
,
**
kwargs
):
super
(
PFHeadLocal
,
self
).
__init__
(
in_channels
,
k
,
**
kwargs
)
self
.
mode
=
mode
self
.
up_conv
=
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
"nearest"
)
if
self
.
mode
==
'large'
:
self
.
cbn_layer
=
LocalModule
(
in_channels
//
4
,
in_channels
//
4
)
elif
self
.
mode
==
'small'
:
self
.
cbn_layer
=
LocalModule
(
in_channels
//
4
,
in_channels
//
8
)
def
forward
(
self
,
x
,
targets
=
None
):
shrink_maps
,
f
=
self
.
binarize
(
x
,
return_f
=
True
)
base_maps
=
shrink_maps
cbn_maps
=
self
.
cbn_layer
(
self
.
up_conv
(
f
),
shrink_maps
,
None
)
cbn_maps
=
F
.
sigmoid
(
cbn_maps
)
return
{
'maps'
:
0.5
*
(
base_maps
+
cbn_maps
),
'cbn_maps'
:
cbn_maps
}
\ No newline at end of file
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py
0 → 100644
View file @
f4ffdfe8
import
torch.nn.functional
as
F
from
torch
import
nn
class
CTCHead
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
=
6625
,
fc_decay
=
0.0004
,
mid_channels
=
None
,
return_feats
=
False
,
**
kwargs
):
super
(
CTCHead
,
self
).
__init__
()
if
mid_channels
is
None
:
self
.
fc
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
True
,
)
else
:
self
.
fc1
=
nn
.
Linear
(
in_channels
,
mid_channels
,
bias
=
True
,
)
self
.
fc2
=
nn
.
Linear
(
mid_channels
,
out_channels
,
bias
=
True
,
)
self
.
out_channels
=
out_channels
self
.
mid_channels
=
mid_channels
self
.
return_feats
=
return_feats
def
forward
(
self
,
x
,
labels
=
None
):
if
self
.
mid_channels
is
None
:
predicts
=
self
.
fc
(
x
)
else
:
x
=
self
.
fc1
(
x
)
predicts
=
self
.
fc2
(
x
)
if
self
.
return_feats
:
result
=
(
x
,
predicts
)
else
:
result
=
predicts
if
not
self
.
training
:
predicts
=
F
.
softmax
(
predicts
,
dim
=
2
)
result
=
predicts
return
result
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py
0 → 100644
View file @
f4ffdfe8
from
torch
import
nn
from
..necks.rnn
import
Im2Seq
,
SequenceEncoder
from
.rec_ctc_head
import
CTCHead
class
FCTranspose
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
only_transpose
=
False
):
super
().
__init__
()
self
.
only_transpose
=
only_transpose
if
not
self
.
only_transpose
:
self
.
fc
=
nn
.
Linear
(
in_channels
,
out_channels
,
bias
=
False
)
def
forward
(
self
,
x
):
if
self
.
only_transpose
:
return
x
.
permute
([
0
,
2
,
1
])
else
:
return
self
.
fc
(
x
.
permute
([
0
,
2
,
1
]))
class
MultiHead
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels_list
,
**
kwargs
):
super
().
__init__
()
self
.
head_list
=
kwargs
.
pop
(
"head_list"
)
self
.
gtc_head
=
"sar"
assert
len
(
self
.
head_list
)
>=
2
for
idx
,
head_name
in
enumerate
(
self
.
head_list
):
name
=
list
(
head_name
)[
0
]
if
name
==
"SARHead"
:
pass
elif
name
==
"NRTRHead"
:
pass
elif
name
==
"CTCHead"
:
# ctc neck
self
.
encoder_reshape
=
Im2Seq
(
in_channels
)
neck_args
=
self
.
head_list
[
idx
][
name
][
"Neck"
]
encoder_type
=
neck_args
.
pop
(
"name"
)
self
.
ctc_encoder
=
SequenceEncoder
(
in_channels
=
in_channels
,
encoder_type
=
encoder_type
,
**
neck_args
)
# ctc head
head_args
=
self
.
head_list
[
idx
][
name
].
get
(
"Head"
,
{})
if
head_args
is
None
:
head_args
=
{}
self
.
ctc_head
=
CTCHead
(
in_channels
=
self
.
ctc_encoder
.
out_channels
,
out_channels
=
out_channels_list
[
"CTCLabelDecode"
],
**
head_args
,
)
else
:
raise
NotImplementedError
(
f
"
{
name
}
is not supported in MultiHead yet"
)
def
forward
(
self
,
x
,
data
=
None
):
ctc_encoder
=
self
.
ctc_encoder
(
x
)
return
self
.
ctc_head
(
ctc_encoder
)
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py
0 → 100644
View file @
f4ffdfe8
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__
=
[
"build_neck"
]
def
build_neck
(
config
):
from
.db_fpn
import
DBFPN
,
LKPAN
,
RSEFPN
from
.rnn
import
SequenceEncoder
support_dict
=
[
"DBFPN"
,
"SequenceEncoder"
,
"RSEFPN"
,
"LKPAN"
]
module_name
=
config
.
pop
(
"name"
)
assert
module_name
in
support_dict
,
Exception
(
"neck only support {}"
.
format
(
support_dict
)
)
module_class
=
eval
(
module_name
)(
**
config
)
return
module_class
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py
0 → 100644
View file @
f4ffdfe8
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
..backbones.det_mobilenet_v3
import
SEModule
from
..necks.intracl
import
IntraCLBlock
def
hard_swish
(
x
,
inplace
=
True
):
return
x
*
F
.
relu6
(
x
+
3.0
,
inplace
=
inplace
)
/
6.0
class
DSConv
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
padding
,
stride
=
1
,
groups
=
None
,
if_act
=
True
,
act
=
"relu"
,
**
kwargs
):
super
(
DSConv
,
self
).
__init__
()
if
groups
==
None
:
groups
=
in_channels
self
.
if_act
=
if_act
self
.
act
=
act
self
.
conv1
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
in_channels
,
kernel_size
=
kernel_size
,
stride
=
stride
,
padding
=
padding
,
groups
=
groups
,
bias
=
False
,
)
self
.
bn1
=
nn
.
BatchNorm2d
(
in_channels
)
self
.
conv2
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
int
(
in_channels
*
4
),
kernel_size
=
1
,
stride
=
1
,
bias
=
False
,
)
self
.
bn2
=
nn
.
BatchNorm2d
(
int
(
in_channels
*
4
))
self
.
conv3
=
nn
.
Conv2d
(
in_channels
=
int
(
in_channels
*
4
),
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
bias
=
False
,
)
self
.
_c
=
[
in_channels
,
out_channels
]
if
in_channels
!=
out_channels
:
self
.
conv_end
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
out_channels
,
kernel_size
=
1
,
stride
=
1
,
bias
=
False
,
)
def
forward
(
self
,
inputs
):
x
=
self
.
conv1
(
inputs
)
x
=
self
.
bn1
(
x
)
x
=
self
.
conv2
(
x
)
x
=
self
.
bn2
(
x
)
if
self
.
if_act
:
if
self
.
act
==
"relu"
:
x
=
F
.
relu
(
x
)
elif
self
.
act
==
"hardswish"
:
x
=
hard_swish
(
x
)
else
:
print
(
"The activation function({}) is selected incorrectly."
.
format
(
self
.
act
)
)
exit
()
x
=
self
.
conv3
(
x
)
if
self
.
_c
[
0
]
!=
self
.
_c
[
1
]:
x
=
x
+
self
.
conv_end
(
inputs
)
return
x
class
DBFPN
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
use_asf
=
False
,
**
kwargs
):
super
(
DBFPN
,
self
).
__init__
()
self
.
out_channels
=
out_channels
self
.
use_asf
=
use_asf
self
.
in2_conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
[
0
],
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
,
)
self
.
in3_conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
[
1
],
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
,
)
self
.
in4_conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
[
2
],
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
,
)
self
.
in5_conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
[
3
],
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
,
)
self
.
p5_conv
=
nn
.
Conv2d
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
,
)
self
.
p4_conv
=
nn
.
Conv2d
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
,
)
self
.
p3_conv
=
nn
.
Conv2d
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
,
)
self
.
p2_conv
=
nn
.
Conv2d
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
bias
=
False
,
)
if
self
.
use_asf
is
True
:
self
.
asf
=
ASFBlock
(
self
.
out_channels
,
self
.
out_channels
//
4
)
def
forward
(
self
,
x
):
c2
,
c3
,
c4
,
c5
=
x
in5
=
self
.
in5_conv
(
c5
)
in4
=
self
.
in4_conv
(
c4
)
in3
=
self
.
in3_conv
(
c3
)
in2
=
self
.
in2_conv
(
c2
)
out4
=
in4
+
F
.
interpolate
(
in5
,
scale_factor
=
2
,
mode
=
"nearest"
,
)
# align_mode=1) # 1/16
out3
=
in3
+
F
.
interpolate
(
out4
,
scale_factor
=
2
,
mode
=
"nearest"
,
)
# align_mode=1) # 1/8
out2
=
in2
+
F
.
interpolate
(
out3
,
scale_factor
=
2
,
mode
=
"nearest"
,
)
# align_mode=1) # 1/4
p5
=
self
.
p5_conv
(
in5
)
p4
=
self
.
p4_conv
(
out4
)
p3
=
self
.
p3_conv
(
out3
)
p2
=
self
.
p2_conv
(
out2
)
p5
=
F
.
interpolate
(
p5
,
scale_factor
=
8
,
mode
=
"nearest"
,
)
# align_mode=1)
p4
=
F
.
interpolate
(
p4
,
scale_factor
=
4
,
mode
=
"nearest"
,
)
# align_mode=1)
p3
=
F
.
interpolate
(
p3
,
scale_factor
=
2
,
mode
=
"nearest"
,
)
# align_mode=1)
fuse
=
torch
.
cat
([
p5
,
p4
,
p3
,
p2
],
dim
=
1
)
if
self
.
use_asf
is
True
:
fuse
=
self
.
asf
(
fuse
,
[
p5
,
p4
,
p3
,
p2
])
return
fuse
class
RSELayer
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
,
shortcut
=
True
):
super
(
RSELayer
,
self
).
__init__
()
self
.
out_channels
=
out_channels
self
.
in_conv
=
nn
.
Conv2d
(
in_channels
=
in_channels
,
out_channels
=
self
.
out_channels
,
kernel_size
=
kernel_size
,
padding
=
int
(
kernel_size
//
2
),
bias
=
False
,
)
self
.
se_block
=
SEModule
(
self
.
out_channels
)
self
.
shortcut
=
shortcut
def
forward
(
self
,
ins
):
x
=
self
.
in_conv
(
ins
)
if
self
.
shortcut
:
out
=
x
+
self
.
se_block
(
x
)
else
:
out
=
self
.
se_block
(
x
)
return
out
class
RSEFPN
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
shortcut
=
True
,
**
kwargs
):
super
(
RSEFPN
,
self
).
__init__
()
self
.
out_channels
=
out_channels
self
.
ins_conv
=
nn
.
ModuleList
()
self
.
inp_conv
=
nn
.
ModuleList
()
self
.
intracl
=
False
if
"intracl"
in
kwargs
.
keys
()
and
kwargs
[
"intracl"
]
is
True
:
self
.
intracl
=
kwargs
[
"intracl"
]
self
.
incl1
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
self
.
incl2
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
self
.
incl3
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
self
.
incl4
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
for
i
in
range
(
len
(
in_channels
)):
self
.
ins_conv
.
append
(
RSELayer
(
in_channels
[
i
],
out_channels
,
kernel_size
=
1
,
shortcut
=
shortcut
)
)
self
.
inp_conv
.
append
(
RSELayer
(
out_channels
,
out_channels
//
4
,
kernel_size
=
3
,
shortcut
=
shortcut
)
)
def
forward
(
self
,
x
):
c2
,
c3
,
c4
,
c5
=
x
in5
=
self
.
ins_conv
[
3
](
c5
)
in4
=
self
.
ins_conv
[
2
](
c4
)
in3
=
self
.
ins_conv
[
1
](
c3
)
in2
=
self
.
ins_conv
[
0
](
c2
)
out4
=
in4
+
F
.
interpolate
(
in5
,
scale_factor
=
2
,
mode
=
"nearest"
)
# 1/16
out3
=
in3
+
F
.
interpolate
(
out4
,
scale_factor
=
2
,
mode
=
"nearest"
)
# 1/8
out2
=
in2
+
F
.
interpolate
(
out3
,
scale_factor
=
2
,
mode
=
"nearest"
)
# 1/4
p5
=
self
.
inp_conv
[
3
](
in5
)
p4
=
self
.
inp_conv
[
2
](
out4
)
p3
=
self
.
inp_conv
[
1
](
out3
)
p2
=
self
.
inp_conv
[
0
](
out2
)
if
self
.
intracl
is
True
:
p5
=
self
.
incl4
(
p5
)
p4
=
self
.
incl3
(
p4
)
p3
=
self
.
incl2
(
p3
)
p2
=
self
.
incl1
(
p2
)
p5
=
F
.
interpolate
(
p5
,
scale_factor
=
8
,
mode
=
"nearest"
)
p4
=
F
.
interpolate
(
p4
,
scale_factor
=
4
,
mode
=
"nearest"
)
p3
=
F
.
interpolate
(
p3
,
scale_factor
=
2
,
mode
=
"nearest"
)
fuse
=
torch
.
cat
([
p5
,
p4
,
p3
,
p2
],
dim
=
1
)
return
fuse
class
LKPAN
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
mode
=
"large"
,
**
kwargs
):
super
(
LKPAN
,
self
).
__init__
()
self
.
out_channels
=
out_channels
self
.
ins_conv
=
nn
.
ModuleList
()
self
.
inp_conv
=
nn
.
ModuleList
()
# pan head
self
.
pan_head_conv
=
nn
.
ModuleList
()
self
.
pan_lat_conv
=
nn
.
ModuleList
()
if
mode
.
lower
()
==
"lite"
:
p_layer
=
DSConv
elif
mode
.
lower
()
==
"large"
:
p_layer
=
nn
.
Conv2d
else
:
raise
ValueError
(
"mode can only be one of ['lite', 'large'], but received {}"
.
format
(
mode
)
)
for
i
in
range
(
len
(
in_channels
)):
self
.
ins_conv
.
append
(
nn
.
Conv2d
(
in_channels
=
in_channels
[
i
],
out_channels
=
self
.
out_channels
,
kernel_size
=
1
,
bias
=
False
,
)
)
self
.
inp_conv
.
append
(
p_layer
(
in_channels
=
self
.
out_channels
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
9
,
padding
=
4
,
bias
=
False
,
)
)
if
i
>
0
:
self
.
pan_head_conv
.
append
(
nn
.
Conv2d
(
in_channels
=
self
.
out_channels
//
4
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
3
,
padding
=
1
,
stride
=
2
,
bias
=
False
,
)
)
self
.
pan_lat_conv
.
append
(
p_layer
(
in_channels
=
self
.
out_channels
//
4
,
out_channels
=
self
.
out_channels
//
4
,
kernel_size
=
9
,
padding
=
4
,
bias
=
False
,
)
)
self
.
intracl
=
False
if
"intracl"
in
kwargs
.
keys
()
and
kwargs
[
"intracl"
]
is
True
:
self
.
intracl
=
kwargs
[
"intracl"
]
self
.
incl1
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
self
.
incl2
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
self
.
incl3
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
self
.
incl4
=
IntraCLBlock
(
self
.
out_channels
//
4
,
reduce_factor
=
2
)
def
forward
(
self
,
x
):
c2
,
c3
,
c4
,
c5
=
x
in5
=
self
.
ins_conv
[
3
](
c5
)
in4
=
self
.
ins_conv
[
2
](
c4
)
in3
=
self
.
ins_conv
[
1
](
c3
)
in2
=
self
.
ins_conv
[
0
](
c2
)
out4
=
in4
+
F
.
interpolate
(
in5
,
scale_factor
=
2
,
mode
=
"nearest"
)
# 1/16
out3
=
in3
+
F
.
interpolate
(
out4
,
scale_factor
=
2
,
mode
=
"nearest"
)
# 1/8
out2
=
in2
+
F
.
interpolate
(
out3
,
scale_factor
=
2
,
mode
=
"nearest"
)
# 1/4
f5
=
self
.
inp_conv
[
3
](
in5
)
f4
=
self
.
inp_conv
[
2
](
out4
)
f3
=
self
.
inp_conv
[
1
](
out3
)
f2
=
self
.
inp_conv
[
0
](
out2
)
pan3
=
f3
+
self
.
pan_head_conv
[
0
](
f2
)
pan4
=
f4
+
self
.
pan_head_conv
[
1
](
pan3
)
pan5
=
f5
+
self
.
pan_head_conv
[
2
](
pan4
)
p2
=
self
.
pan_lat_conv
[
0
](
f2
)
p3
=
self
.
pan_lat_conv
[
1
](
pan3
)
p4
=
self
.
pan_lat_conv
[
2
](
pan4
)
p5
=
self
.
pan_lat_conv
[
3
](
pan5
)
if
self
.
intracl
is
True
:
p5
=
self
.
incl4
(
p5
)
p4
=
self
.
incl3
(
p4
)
p3
=
self
.
incl2
(
p3
)
p2
=
self
.
incl1
(
p2
)
p5
=
F
.
interpolate
(
p5
,
scale_factor
=
8
,
mode
=
"nearest"
)
p4
=
F
.
interpolate
(
p4
,
scale_factor
=
4
,
mode
=
"nearest"
)
p3
=
F
.
interpolate
(
p3
,
scale_factor
=
2
,
mode
=
"nearest"
)
fuse
=
torch
.
cat
([
p5
,
p4
,
p3
,
p2
],
dim
=
1
)
return
fuse
class
ASFBlock
(
nn
.
Module
):
"""
This code is refered from:
https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py
"""
def
__init__
(
self
,
in_channels
,
inter_channels
,
out_features_num
=
4
):
"""
Adaptive Scale Fusion (ASF) block of DBNet++
Args:
in_channels: the number of channels in the input data
inter_channels: the number of middle channels
out_features_num: the number of fused stages
"""
super
(
ASFBlock
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
inter_channels
=
inter_channels
self
.
out_features_num
=
out_features_num
self
.
conv
=
nn
.
Conv2d
(
in_channels
,
inter_channels
,
3
,
padding
=
1
)
self
.
spatial_scale
=
nn
.
Sequential
(
# Nx1xHxW
nn
.
Conv2d
(
in_channels
=
1
,
out_channels
=
1
,
kernel_size
=
3
,
bias
=
False
,
padding
=
1
,
),
nn
.
ReLU
(),
nn
.
Conv2d
(
in_channels
=
1
,
out_channels
=
1
,
kernel_size
=
1
,
bias
=
False
,
),
nn
.
Sigmoid
(),
)
self
.
channel_scale
=
nn
.
Sequential
(
nn
.
Conv2d
(
in_channels
=
inter_channels
,
out_channels
=
out_features_num
,
kernel_size
=
1
,
bias
=
False
,
),
nn
.
Sigmoid
(),
)
def
forward
(
self
,
fuse_features
,
features_list
):
fuse_features
=
self
.
conv
(
fuse_features
)
spatial_x
=
torch
.
mean
(
fuse_features
,
dim
=
1
,
keepdim
=
True
)
attention_scores
=
self
.
spatial_scale
(
spatial_x
)
+
fuse_features
attention_scores
=
self
.
channel_scale
(
attention_scores
)
assert
len
(
features_list
)
==
self
.
out_features_num
out_list
=
[]
for
i
in
range
(
self
.
out_features_num
):
out_list
.
append
(
attention_scores
[:,
i
:
i
+
1
]
*
features_list
[
i
])
return
torch
.
cat
(
out_list
,
dim
=
1
)
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment