refactor: enhance main function parameters and improve device handling logic

2688e3f7 · myhloli · c18934a3 · 2688e3f7 · 2688e3f7
Commit 2688e3f7 authored Jun 06, 2025 by myhloli
Hide whitespace changes
Inline Side-by-side

Showing with 65 additions and 7 deletions

mineru/backend/pipeline/pipeline_analyze.py mineru/backend/pipeline/pipeline_analyze.py +5 -5

mineru/cli/client.py mineru/cli/client.py +60 -2

No files found.
--- a/mineru/backend/pipeline/pipeline_analyze.py
+++ b/mineru/backend/pipeline/pipeline_analyze.py
@@ -92,14 +92,14 @@ def doc_analyze(
    ocr_enabled_list = []
    for pdf_idx, pdf_bytes in enumerate(pdf_bytes_list):
        # 确定OCR设置
-        _ocr = False
+        _ocr_enable = False
        if parse_method == 'auto':
            if classify(pdf_bytes) == 'ocr':
-                _ocr = True
+                _ocr_enable = True
        elif parse_method == 'ocr':
-            _ocr = True
+            _ocr_enable = True
-        ocr_enabled_list.append(_ocr)
+        ocr_enabled_list.append(_ocr_enable)
        _lang = lang_list[pdf_idx]
        # 收集每个数据集中的页面
@@ -110,7 +110,7 @@ def doc_analyze(
            img_dict = images_list[page_idx]
            all_pages_info.append((
                pdf_idx, page_idx,
-                img_dict['img_pil'], _ocr, _lang,
+                img_dict['img_pil'], _ocr_enable, _lang,
            ))
    # 准备批处理

--- a/mineru/cli/client.py
+++ b/mineru/cli/client.py
@@ -2,7 +2,9 @@
 import os
 import click
 from pathlib import Path
+import torch
 from loguru import logger
+from mineru.utils.model_utils import get_vram
 from ..version import __version__
 from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
@@ -38,7 +40,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
    vlm-huggingface: More general.
    vlm-sglang-engine: Faster(engine).
    vlm-sglang-client: Faster(client).
-    without method specified, huggingface will be used by default.""",
+    without method specified, pipeline will be used by default.""",
    default='pipeline',
 )
 @click.option(
@@ -49,6 +51,7 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
    help="""
    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
    Without languages specified, 'ch' will be used by default.
+    Adapted only for the case where the backend is set to "pipeline".
    """,
    default='ch',
 )
@@ -78,8 +81,63 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
    help='The ending page for PDF parsing, beginning from 0.',
    default=None,
 )
+@click.option(
+    '-f',
+    '--formula',
+    'formula_enable',
+    type=bool,
+    help='Enable formula parsing. Default is True. Adapted only for the case where the backend is set to "pipeline".',
+    default=True,
+)
+@click.option(
+    '-t',
+    '--table',
+    'table_enable',
+    type=bool,
+    help='Enable table parsing. Default is True. Adapted only for the case where the backend is set to "pipeline".',
+    default=True,
+)
+@click.option(
+    '-d',
+    '--device',
+    'device_mode',
+    type=str,
+    help='Device mode for model inference, e.g., "cpu", "cuda", "cuda:0", "npu", "npu:0", "mps". Adapted only for the case where the backend is set to "pipeline". ',
+    default=None,
+)
+@click.option(
+    '-vm',
+    '--virtual-vram',
+    'virtual_vram',
+    type=int,
+    help='Device mode for model inference, e.g., "cpu", "cuda", "cuda:0", "npu", "npu:0", "mps". Default is "cpu". Adapted only for the case where the backend is set to "pipeline". ',
+    default=None,
+)
+def main(input_path, output_dir, backend, lang, server_url, start_page_id, end_page_id, formula_enable, table_enable, device_mode, virtual_vram):
+    os.environ['MINERU_FORMULA_ENABLE'] = str(formula_enable).lower()
+    os.environ['MINERU_TABLE_ENABLE'] = str(table_enable).lower()
+    def get_device_mode() -> str:
+        if device_mode is not None:
+            return device_mode
+        if torch.cuda.is_available():
+            return "cuda"
+        if torch.backends.mps.is_available():
+            return "mps"
+        return "cpu"
+    os.environ['MINERU_DEVICE_MODE'] = get_device_mode()
+    def get_virtual_vram_size() -> int:
+        if virtual_vram is not None:
+            return virtual_vram
+        if get_device_mode().startswith("cuda") or get_device_mode().startswith("npu"):
+            return round(get_vram(get_device_mode()))
+        return 1
+    os.environ['MINERU_VIRTUAL_VRAM_SIZE']= str(get_virtual_vram_size())
-def main(input_path, output_dir, backend, lang, server_url, start_page_id, end_page_id):
    os.makedirs(output_dir, exist_ok=True)
    def parse_doc(path_list: list[Path]):