Commit 58b8e8a9 authored by myhloli's avatar myhloli
Browse files

fix: add new enum values and improve MIN_BATCH_INFERENCE_SIZE documentation in pipeline_analyze.py

parent 20dcbd21
......@@ -76,7 +76,11 @@ def doc_analyze(
formula_enable=True,
table_enable=True,
):
MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
"""
适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能,可能会增加显存使用量,
可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置,默认值为100。
"""
min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
# 收集所有页面信息
all_pages_info = [] # 存储(dataset_index, page_index, img, ocr, lang, width, height)
......@@ -109,7 +113,7 @@ def doc_analyze(
# 准备批处理
images_with_extra_info = [(info[2], info[3], info[4]) for info in all_pages_info]
batch_size = MIN_BATCH_INFERENCE_SIZE
batch_size = min_batch_inference_size
batch_images = [
images_with_extra_info[i:i + batch_size]
for i in range(0, len(images_with_extra_info), batch_size)
......
......@@ -33,9 +33,11 @@ class CategoryId:
TableCaption = 6
TableFootnote = 7
InterlineEquation_Layout = 8
InterlineEquationNumber_Layout = 9
InlineEquation = 13
InterlineEquation_YOLO = 14
OcrText = 15
LowScoreText = 16
ImageFootnote = 101
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment