Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
ea3003f6
Unverified
Commit
ea3003f6
authored
May 23, 2025
by
Xiaomeng Zhao
Committed by
GitHub
May 23, 2025
Browse files
Merge pull request #2505 from myhloli/dev
feat(ocr): add PPHGNetV2_B4 backbone and update OCR models
parents
8f8b8c4c
93ad41ed
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
55 additions
and
11 deletions
+55
-11
magic_pdf/model/doc_analyze_by_custom_model.py
magic_pdf/model/doc_analyze_by_custom_model.py
+1
-1
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
...ddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
+2
-1
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py
...es/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py
+18
-5
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
...leocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
+26
-0
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
...eocr2pytorch/pytorchocr/utils/resources/models_config.yml
+8
-4
No files found.
magic_pdf/model/doc_analyze_by_custom_model.py
View file @
ea3003f6
...
@@ -189,7 +189,7 @@ def batch_doc_analyze(
...
@@ -189,7 +189,7 @@ def batch_doc_analyze(
formula_enable
=
None
,
formula_enable
=
None
,
table_enable
=
None
,
table_enable
=
None
,
):
):
MIN_BATCH_INFERENCE_SIZE
=
int
(
os
.
environ
.
get
(
'MINERU_MIN_BATCH_INFERENCE_SIZE'
,
2
00
))
MIN_BATCH_INFERENCE_SIZE
=
int
(
os
.
environ
.
get
(
'MINERU_MIN_BATCH_INFERENCE_SIZE'
,
1
00
))
batch_size
=
MIN_BATCH_INFERENCE_SIZE
batch_size
=
MIN_BATCH_INFERENCE_SIZE
page_wh_list
=
[]
page_wh_list
=
[]
...
...
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
View file @
ea3003f6
...
@@ -35,7 +35,7 @@ def build_backbone(config, model_type):
...
@@ -35,7 +35,7 @@ def build_backbone(config, model_type):
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_mobilenet_v3
import
MobileNetV3
from
.rec_svtrnet
import
SVTRNet
from
.rec_svtrnet
import
SVTRNet
from
.rec_mv1_enhance
import
MobileNetV1Enhance
from
.rec_mv1_enhance
import
MobileNetV1Enhance
from
.rec_pphgnetv2
import
PPHGNetV2_B4
support_dict
=
[
support_dict
=
[
"MobileNetV1Enhance"
,
"MobileNetV1Enhance"
,
"MobileNetV3"
,
"MobileNetV3"
,
...
@@ -48,6 +48,7 @@ def build_backbone(config, model_type):
...
@@ -48,6 +48,7 @@ def build_backbone(config, model_type):
"DenseNet"
,
"DenseNet"
,
"PPLCNetV3"
,
"PPLCNetV3"
,
"PPHGNet_small"
,
"PPHGNet_small"
,
"PPHGNetV2_B4"
,
]
]
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
...
...
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py
View file @
ea3003f6
...
@@ -9,14 +9,27 @@ class Im2Seq(nn.Module):
...
@@ -9,14 +9,27 @@ class Im2Seq(nn.Module):
super
().
__init__
()
super
().
__init__
()
self
.
out_channels
=
in_channels
self
.
out_channels
=
in_channels
# def forward(self, x):
# B, C, H, W = x.shape
# # assert H == 1
# x = x.squeeze(dim=2)
# # x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels)
# x = x.permute(0, 2, 1)
# return x
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
B
,
C
,
H
,
W
=
x
.
shape
B
,
C
,
H
,
W
=
x
.
shape
# assert H == 1
# 处理四维张量,将空间维度展平为序列
x
=
x
.
squeeze
(
dim
=
2
)
if
H
==
1
:
# x = x.transpose([0, 2, 1]) # paddle (NTC)(batch, width, channels)
# 原来的处理逻辑,适用于H=1的情况
x
=
x
.
permute
(
0
,
2
,
1
)
x
=
x
.
squeeze
(
dim
=
2
)
return
x
x
=
x
.
permute
(
0
,
2
,
1
)
# (B, W, C)
else
:
# 处理H不为1的情况
x
=
x
.
permute
(
0
,
2
,
3
,
1
)
# (B, H, W, C)
x
=
x
.
reshape
(
B
,
H
*
W
,
C
)
# (B, H*W, C)
return
x
class
EncoderWithRNN_
(
nn
.
Module
):
class
EncoderWithRNN_
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
hidden_size
):
def
__init__
(
self
,
in_channels
,
hidden_size
):
...
...
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
View file @
ea3003f6
...
@@ -212,6 +212,32 @@ ch_PP-OCRv4_rec_server_doc_infer:
...
@@ -212,6 +212,32 @@ ch_PP-OCRv4_rec_server_doc_infer:
nrtr_dim
:
384
nrtr_dim
:
384
max_text_length
:
25
max_text_length
:
25
ch_PP-OCRv5_rec_server_infer
:
model_type
:
rec
algorithm
:
SVTR_HGNet
Transform
:
Backbone
:
name
:
PPHGNetV2_B4
text_rec
:
True
Head
:
name
:
MultiHead
out_channels_list
:
CTCLabelDecode
:
18385
head_list
:
-
CTCHead
:
Neck
:
name
:
svtr
dims
:
120
depth
:
2
hidden_dims
:
120
kernel_size
:
[
1
,
3
]
use_guide
:
True
Head
:
fc_decay
:
0.00001
-
NRTRHead
:
nrtr_dim
:
384
max_text_length
:
25
ch_PP-OCRv5_rec_infer
:
ch_PP-OCRv5_rec_infer
:
model_type
:
rec
model_type
:
rec
algorithm
:
SVTR_HGNet
algorithm
:
SVTR_HGNet
...
...
magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
View file @
ea3003f6
lang
:
lang
:
ch_lite
:
det
:
ch_PP-OCRv3_det_infer.pth
rec
:
ch_PP-OCRv5_rec_infer.pth
dict
:
ppocrv5_dict.txt
ch_lite_v4
:
ch_lite_v4
:
det
:
ch_PP-OCRv3_det_infer.pth
det
:
ch_PP-OCRv3_det_infer.pth
rec
:
ch_PP-OCRv4_rec_infer.pth
rec
:
ch_PP-OCRv4_rec_infer.pth
dict
:
ppocr_keys_v1.txt
dict
:
ppocr_keys_v1.txt
ch_lite
:
det
:
ch_PP-OCRv5_det_infer.pth
rec
:
ch_PP-OCRv5_rec_infer.pth
dict
:
ppocrv5_dict.txt
ch_server
:
ch_server
:
det
:
ch_PP-OCRv3_det_infer.pth
rec
:
ch_PP-OCRv5_rec_server_infer.pth
dict
:
ppocrv5_dict.txt
ch_server_v4
:
det
:
ch_PP-OCRv3_det_infer.pth
det
:
ch_PP-OCRv3_det_infer.pth
rec
:
ch_PP-OCRv4_rec_server_infer.pth
rec
:
ch_PP-OCRv4_rec_server_infer.pth
dict
:
ppocr_keys_v1.txt
dict
:
ppocr_keys_v1.txt
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment