Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
383fed52
Unverified
Commit
383fed52
authored
Jun 06, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Jun 06, 2025
Browse files
Merge pull request #2575 from seedclaimer/dev
修复batch-ocr-det中的逻辑缺失,可能与公式检测异常有关
parents
1b29fba6
400bb763
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
8 deletions
+30
-8
magic_pdf/model/batch_analyze.py
magic_pdf/model/batch_analyze.py
+30
-8
No files found.
magic_pdf/model/batch_analyze.py
View file @
383fed52
...
@@ -181,17 +181,16 @@ class BatchAnalyze:
...
@@ -181,17 +181,16 @@ class BatchAnalyze:
# 对每个分辨率组进行批处理
# 对每个分辨率组进行批处理
for
group_key
,
group_crops
in
tqdm
(
resolution_groups
.
items
(),
desc
=
f
"OCR-det
{
lang
}
"
):
for
group_key
,
group_crops
in
tqdm
(
resolution_groups
.
items
(),
desc
=
f
"OCR-det
{
lang
}
"
):
raw_images
=
[
crop_info
[
0
]
for
crop_info
in
group_crops
]
# 计算目标尺寸(组内最大尺寸,向上取整到32的倍数)
# 计算目标尺寸(组内最大尺寸,向上取整到32的倍数)
max_h
=
max
(
img
.
shape
[
0
]
for
img
in
raw_image
s
)
max_h
=
max
(
crop_info
[
0
]
.
shape
[
0
]
for
crop_info
in
group_crop
s
)
max_w
=
max
(
img
.
shape
[
1
]
for
img
in
raw_image
s
)
max_w
=
max
(
crop_info
[
0
]
.
shape
[
1
]
for
crop_info
in
group_crop
s
)
target_h
=
((
max_h
+
32
-
1
)
//
32
)
*
32
target_h
=
((
max_h
+
32
-
1
)
//
32
)
*
32
target_w
=
((
max_w
+
32
-
1
)
//
32
)
*
32
target_w
=
((
max_w
+
32
-
1
)
//
32
)
*
32
# 对所有图像进行padding到统一尺寸
# 对所有图像进行padding到统一尺寸
batch_images
=
[]
batch_images
=
[]
for
img
in
raw_images
:
for
crop_info
in
group_crops
:
img
=
crop_info
[
0
]
h
,
w
=
img
.
shape
[:
2
]
h
,
w
=
img
.
shape
[:
2
]
# 创建目标尺寸的白色背景
# 创建目标尺寸的白色背景
padded_img
=
np
.
ones
((
target_h
,
target_w
,
3
),
dtype
=
np
.
uint8
)
*
255
padded_img
=
np
.
ones
((
target_h
,
target_w
,
3
),
dtype
=
np
.
uint8
)
*
255
...
@@ -208,9 +207,32 @@ class BatchAnalyze:
...
@@ -208,9 +207,32 @@ class BatchAnalyze:
for
i
,
(
crop_info
,
(
dt_boxes
,
elapse
))
in
enumerate
(
zip
(
group_crops
,
batch_results
)):
for
i
,
(
crop_info
,
(
dt_boxes
,
elapse
))
in
enumerate
(
zip
(
group_crops
,
batch_results
)):
new_image
,
useful_list
,
ocr_res_list_dict
,
res
,
adjusted_mfdetrec_res
,
_lang
=
crop_info
new_image
,
useful_list
,
ocr_res_list_dict
,
res
,
adjusted_mfdetrec_res
,
_lang
=
crop_info
if
dt_boxes
is
not
None
:
if
dt_boxes
is
not
None
and
len
(
dt_boxes
)
>
0
:
# 构造OCR结果格式 - 每个box应该是4个点的列表
# 直接应用原始OCR流程中的关键处理步骤
ocr_res
=
[
box
.
tolist
()
for
box
in
dt_boxes
]
from
magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils
import
(
merge_det_boxes
,
update_det_boxes
,
sorted_boxes
)
# 1. 排序检测框
if
len
(
dt_boxes
)
>
0
:
dt_boxes_sorted
=
sorted_boxes
(
dt_boxes
)
else
:
dt_boxes_sorted
=
[]
# 2. 合并相邻检测框
if
dt_boxes_sorted
:
dt_boxes_merged
=
merge_det_boxes
(
dt_boxes_sorted
)
else
:
dt_boxes_merged
=
[]
# 3. 根据公式位置更新检测框(关键步骤!)
if
dt_boxes_merged
and
adjusted_mfdetrec_res
:
dt_boxes_final
=
update_det_boxes
(
dt_boxes_merged
,
adjusted_mfdetrec_res
)
else
:
dt_boxes_final
=
dt_boxes_merged
# 构造OCR结果格式
ocr_res
=
[
box
.
tolist
()
if
hasattr
(
box
,
'tolist'
)
else
box
for
box
in
dt_boxes_final
]
if
ocr_res
:
if
ocr_res
:
ocr_result_list
=
get_ocr_result_list
(
ocr_result_list
=
get_ocr_result_list
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment