Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
6b1df419
Commit
6b1df419
authored
Jun 03, 2025
by
myhloli
Browse files
refactor: optimize OCR batch processing and enhance image cropping logic
parent
73f85035
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
2 additions
and
2 deletions
+2
-2
mineru/backend/pipeline/batch_analyze.py
mineru/backend/pipeline/batch_analyze.py
+2
-2
No files found.
mineru/backend/pipeline/batch_analyze.py
View file @
6b1df419
...
...
@@ -127,7 +127,7 @@ class BatchAnalyze:
if
not
lang_crop_list
:
continue
logger
.
info
(
f
"Processing OCR detection for language
{
lang
}
with
{
len
(
lang_crop_list
)
}
images"
)
#
logger.info(f"Processing OCR detection for language {lang} with {len(lang_crop_list)} images")
# 获取OCR模型
ocr_model
=
atom_model_manager
.
get_atom_model
(
...
...
@@ -171,7 +171,7 @@ class BatchAnalyze:
# 批处理检测
batch_size
=
min
(
len
(
batch_images
),
self
.
batch_ratio
*
16
)
# 增加批处理大小
logger
.
debug
(
f
"OCR-det batch:
{
batch_size
}
images, target size:
{
target_h
}
x
{
target_w
}
"
)
#
logger.debug(f"OCR-det batch: {batch_size} images, target size: {target_h}x{target_w}")
batch_results
=
ocr_model
.
text_detector
.
batch_predict
(
batch_images
,
batch_size
)
# 处理批处理结果
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment