Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
58b8e8a9
Commit
58b8e8a9
authored
Jun 17, 2025
by
myhloli
Browse files
fix: add new enum values and improve MIN_BATCH_INFERENCE_SIZE documentation in pipeline_analyze.py
parent
20dcbd21
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
2 deletions
+8
-2
mineru/backend/pipeline/pipeline_analyze.py
mineru/backend/pipeline/pipeline_analyze.py
+6
-2
mineru/utils/enum_class.py
mineru/utils/enum_class.py
+2
-0
No files found.
mineru/backend/pipeline/pipeline_analyze.py
View file @
58b8e8a9
...
@@ -76,7 +76,11 @@ def doc_analyze(
...
@@ -76,7 +76,11 @@ def doc_analyze(
formula_enable
=
True
,
formula_enable
=
True
,
table_enable
=
True
,
table_enable
=
True
,
):
):
MIN_BATCH_INFERENCE_SIZE
=
int
(
os
.
environ
.
get
(
'MINERU_MIN_BATCH_INFERENCE_SIZE'
,
100
))
"""
适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能,可能会增加显存使用量,
可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置,默认值为100。
"""
min_batch_inference_size
=
int
(
os
.
environ
.
get
(
'MINERU_MIN_BATCH_INFERENCE_SIZE'
,
100
))
# 收集所有页面信息
# 收集所有页面信息
all_pages_info
=
[]
# 存储(dataset_index, page_index, img, ocr, lang, width, height)
all_pages_info
=
[]
# 存储(dataset_index, page_index, img, ocr, lang, width, height)
...
@@ -109,7 +113,7 @@ def doc_analyze(
...
@@ -109,7 +113,7 @@ def doc_analyze(
# 准备批处理
# 准备批处理
images_with_extra_info
=
[(
info
[
2
],
info
[
3
],
info
[
4
])
for
info
in
all_pages_info
]
images_with_extra_info
=
[(
info
[
2
],
info
[
3
],
info
[
4
])
for
info
in
all_pages_info
]
batch_size
=
MIN_BATCH_INFERENCE_SIZE
batch_size
=
min_batch_inference_size
batch_images
=
[
batch_images
=
[
images_with_extra_info
[
i
:
i
+
batch_size
]
images_with_extra_info
[
i
:
i
+
batch_size
]
for
i
in
range
(
0
,
len
(
images_with_extra_info
),
batch_size
)
for
i
in
range
(
0
,
len
(
images_with_extra_info
),
batch_size
)
...
...
mineru/utils/enum_class.py
View file @
58b8e8a9
...
@@ -33,9 +33,11 @@ class CategoryId:
...
@@ -33,9 +33,11 @@ class CategoryId:
TableCaption
=
6
TableCaption
=
6
TableFootnote
=
7
TableFootnote
=
7
InterlineEquation_Layout
=
8
InterlineEquation_Layout
=
8
InterlineEquationNumber_Layout
=
9
InlineEquation
=
13
InlineEquation
=
13
InterlineEquation_YOLO
=
14
InterlineEquation_YOLO
=
14
OcrText
=
15
OcrText
=
15
LowScoreText
=
16
ImageFootnote
=
101
ImageFootnote
=
101
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment