Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
54950551
Unverified
Commit
54950551
authored
May 29, 2025
by
Xiaomeng Zhao
Committed by
GitHub
May 29, 2025
Browse files
Update batch_analyze.py
parent
99d4c97a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
3 additions
and
3 deletions
+3
-3
magic_pdf/model/batch_analyze.py
magic_pdf/model/batch_analyze.py
+3
-3
No files found.
magic_pdf/model/batch_analyze.py
View file @
54950551
...
...
@@ -127,7 +127,7 @@ class BatchAnalyze:
# 收集所有需要OCR检测的裁剪图像
all_cropped_images_info
=
[]
for
ocr_res_list_dict
in
tqdm
(
ocr_res_list_all_page
,
desc
=
"Preparing OCR-det batches"
)
:
for
ocr_res_list_dict
in
ocr_res_list_all_page
:
_lang
=
ocr_res_list_dict
[
'lang'
]
for
res
in
ocr_res_list_dict
[
'ocr_res_list'
]:
...
...
@@ -156,7 +156,7 @@ class BatchAnalyze:
if
not
lang_crop_list
:
continue
logger
.
info
(
f
"Processing OCR detection for language
{
lang
}
with
{
len
(
lang_crop_list
)
}
images"
)
#
logger.info(f"Processing OCR detection for language {lang} with {len(lang_crop_list)} images")
# 获取OCR模型
atom_model_manager
=
AtomModelSingleton
()
...
...
@@ -201,7 +201,7 @@ class BatchAnalyze:
# 批处理检测
batch_size
=
min
(
len
(
batch_images
),
self
.
batch_ratio
*
16
)
# 增加批处理大小
logger
.
debug
(
f
"OCR-det batch:
{
batch_size
}
images, target size:
{
target_h
}
x
{
target_w
}
"
)
#
logger.debug(f"OCR-det batch: {batch_size} images, target size: {target_h}x{target_w}")
batch_results
=
ocr_model
.
text_detector
.
batch_predict
(
batch_images
,
batch_size
)
# 处理批处理结果
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment