Merge pull request #1759 from opendatalab/release-1.2.0

Release 1.2.0

Merge pull request #1759 from opendatalab/release-1.2.0
Release 1.2.0
1f497129 · Xiaomeng Zhao · GitHub · 9a87d3ea · bd3a7b37 · 1f497129
Unverified Commit 1f497129 authored Feb 24, 2025 by Xiaomeng Zhao Committed by GitHub Feb 24, 2025
20 changed files
--- a/README.md
+++ b/README.md
@@ -244,8 +244,8 @@ Synced with dev branch updates:
 #### 1. Install magic-pdf
 ```bash
-conda create -n MinerU python=3.10
+conda create -n mineru python=3.10
-conda activate MinerU
+conda activate mineru
 pip install -U "magic-pdf[full]" --extra-index-url https://wheels.myhloli.com
 ```
@@ -305,7 +305,7 @@ If your device supports CUDA and meets the GPU requirements of the mainline envi
  ```bash
  wget https://github.com/opendatalab/MinerU/raw/master/docker/global/Dockerfile -O Dockerfile
  docker build -t mineru:latest .
-  docker run --rm -it --gpus=all mineru:latest /bin/bash -c "echo 'source /opt/mineru_venv/bin/activate' >> ~/.bashrc && exec bash"
+  docker run -it --name mineru --gpus=all mineru:latest /bin/bash -c "echo 'source /opt/mineru_venv/bin/activate' >> ~/.bashrc && exec bash"
  magic-pdf --help
  ```

--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -248,8 +248,8 @@ https://github.com/user-attachments/assets/4bea02c9-6d54-4cd6-97ed-dff14340982c
 > 最新版本国内镜像源同步可能会有延迟，请耐心等待
 ```bash
-conda create -n MinerU python=3.10
+conda create -n mineru python=3.10
-conda activate MinerU
+conda activate mineru
 pip install -U "magic-pdf[full]" --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple
 ```
@@ -308,7 +308,7 @@ pip install -U "magic-pdf[full]" --extra-index-url https://wheels.myhloli.com -i
  ```bash
  wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/Dockerfile -O Dockerfile
  docker build -t mineru:latest .
-  docker run --rm -it --gpus=all mineru:latest /bin/bash -c "echo 'source /opt/mineru_venv/bin/activate' >> ~/.bashrc && exec bash"
+  docker run -it --name mineru --gpus=all mineru:latest /bin/bash -c "echo 'source /opt/mineru_venv/bin/activate' >> ~/.bashrc && exec bash"
  magic-pdf --help
  ```
 ### 使用NPU

--- a/docker/ascend_npu/Dockerfile
+++ b/docker/ascend_npu/Dockerfile
@@ -36,7 +36,8 @@ RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
    wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/ascend_npu/requirements.txt -O requirements.txt && \
    pip3 install -r requirements.txt --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple && \
    wget https://gitee.com/ascend/pytorch/releases/download/v6.0.rc2-pytorch2.3.1/torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
-    pip install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl"
+    pip3 install torch_npu-2.3.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl && \
+    pip3 install https://gcore.jsdelivr.net/gh/myhloli/wheels@main/assets/whl/paddle-custom-npu/paddle_custom_npu-0.0.0-cp310-cp310-linux_aarch64.whl"
 # Copy the configuration file template and install magic-pdf latest
 RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json && \

--- a/docker/ascend_npu/requirements.txt
+++ b/docker/ascend_npu/requirements.txt
@@ -13,12 +13,12 @@ torchvision>=0.17.2,<=0.18.1
 matplotlib
 ultralytics>=8.3.48
 paddleocr==2.7.3
-paddlepaddle==3.0.0b1
+paddlepaddle==3.0.0rc1
 struct-eqtable==0.3.2
 einops
 accelerate
-rapidocr-paddle
+rapidocr-paddle>=1.4.5,<2.0.0
-rapidocr-onnxruntime
+rapidocr-onnxruntime>=1.4.4,<2.0.0
 rapid-table>=1.0.3,<2.0.0
 doclayout-yolo==0.0.2b1
 openai

--- a/docker/china/Dockerfile
+++ b/docker/china/Dockerfile
@@ -32,7 +32,7 @@ RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
    pip3 install --upgrade pip -i https://mirrors.aliyun.com/pypi/simple && \
    wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/docker/china/requirements.txt -O requirements.txt && \
    pip3 install -r requirements.txt --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple && \
-    pip3 install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"
+    pip3 install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"
 # Copy the configuration file template and install magic-pdf latest
 RUN /bin/bash -c "wget https://gcore.jsdelivr.net/gh/opendatalab/MinerU@master/magic-pdf.template.json && \

--- a/docker/china/requirements.txt
+++ b/docker/china/requirements.txt
@@ -16,8 +16,8 @@ paddleocr==2.7.3
 struct-eqtable==0.3.2
 einops
 accelerate
-rapidocr-paddle
+rapidocr-paddle>=1.4.5,<2.0.0
-rapidocr-onnxruntime
+rapidocr-onnxruntime>=1.4.4,<2.0.0
 rapid-table>=1.0.3,<2.0.0
 doclayout-yolo==0.0.2b1
 openai

--- a/docker/global/Dockerfile
+++ b/docker/global/Dockerfile
@@ -32,7 +32,7 @@ RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
    pip3 install --upgrade pip && \
    wget https://github.com/opendatalab/MinerU/raw/master/docker/global/requirements.txt -O requirements.txt && \
    pip3 install -r requirements.txt --extra-index-url https://wheels.myhloli.com && \
-    pip3 install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"
+    pip3 install paddlepaddle-gpu==3.0.0rc1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"
 # Copy the configuration file template and install magic-pdf latest
 RUN /bin/bash -c "wget https://github.com/opendatalab/MinerU/raw/master/magic-pdf.template.json && \

--- a/docker/global/requirements.txt
+++ b/docker/global/requirements.txt
@@ -16,8 +16,8 @@ paddleocr==2.7.3
 struct-eqtable==0.3.2
 einops
 accelerate
-rapidocr-paddle
+rapidocr-paddle>=1.4.5,<2.0.0
-rapidocr-onnxruntime
+rapidocr-onnxruntime>=1.4.4,<2.0.0
 rapid-table>=1.0.3,<2.0.0
 doclayout-yolo==0.0.2b1
 openai

--- a/docs/README_Ascend_NPU_Acceleration_zh_CN.md
+++ b/docs/README_Ascend_NPU_Acceleration_zh_CN.md
@@ -25,7 +25,7 @@ docker build -t mineru_npu:latest .
 ## 运行容器
 ```bash
-docker run --rm -it -u root --privileged=true \
+docker run -it -u root --name mineru-npu --privileged=true \
    --ipc=host \
    --network=host \
    --device=/dev/davinci0 \

--- a/docs/README_Windows_CUDA_Acceleration_en_US.md
+++ b/docs/README_Windows_CUDA_Acceleration_en_US.md
@@ -65,7 +65,7 @@ If your graphics card has at least 8GB of VRAM, follow these steps to test CUDA-
 1. **Overwrite the installation of torch and torchvision** supporting CUDA.
   ```
-   pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
+   pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu118
   ```
 2. **Modify the value of `"device-mode"`** in the `magic-pdf.json` configuration file located in your user directory.

--- a/docs/README_Windows_CUDA_Acceleration_zh_CN.md
+++ b/docs/README_Windows_CUDA_Acceleration_zh_CN.md
@@ -66,7 +66,7 @@ pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i h
 **1.覆盖安装支持cuda的torch和torchvision**
 ```bash
-pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
+pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 "numpy<2.0.0" --index-url https://download.pytorch.org/whl/cu118
 ```
 **2.修改【用户目录】中配置文件magic-pdf.json中"device-mode"的值**

--- a/magic_pdf/filter/__init__.py
+++ b/magic_pdf/filter/__init__.py
@@ -23,7 +23,7 @@ def classify(pdf_bytes: bytes) -> SupportedPdfParseMethod:
                pdf_meta['image_info_per_page'],
                pdf_meta['text_len_per_page'],
                pdf_meta['imgs_per_page'],
-                pdf_meta['text_layout_per_page'],
+                # pdf_meta['text_layout_per_page'],
                pdf_meta['invalid_chars'],
            )
            if is_text_pdf:

--- a/magic_pdf/filter/pdf_classify_by_type.py
+++ b/magic_pdf/filter/pdf_classify_by_type.py
@@ -305,7 +305,8 @@ def classify_by_img_narrow_strips(page_width, page_height, img_sz_list):
 def classify(total_page: int, page_width, page_height, img_sz_list: list, text_len_list: list, img_num_list: list,
-             text_layout_list: list, invalid_chars: bool):
+             # text_layout_list: list,
+             invalid_chars: bool):
    """
    这里的图片和页面长度单位是pts
    :param total_page:
@@ -321,7 +322,7 @@ def classify(total_page: int, page_width, page_height, img_sz_list: list, text_l
        'by_text_len': classify_by_text_len(text_len_list, total_page),
        'by_avg_words': classify_by_avg_words(text_len_list),
        'by_img_num': classify_by_img_num(img_sz_list, img_num_list),
-        'by_text_layout': classify_by_text_layout(text_layout_list),
+        # 'by_text_layout': classify_by_text_layout(text_layout_list),
        'by_img_narrow_strips': classify_by_img_narrow_strips(page_width, page_height, img_sz_list),
        'by_invalid_chars': invalid_chars,
    }
@@ -332,9 +333,10 @@ def classify(total_page: int, page_width, page_height, img_sz_list: list, text_l
        return False, results
    else:
        logger.warning(
-            f"pdf is not classified by area and text_len, by_image_area: {results['by_image_area']},"
+            f"OCR needed based on classification result, by_image_area: {results['by_image_area']},"
            f" by_text: {results['by_text_len']}, by_avg_words: {results['by_avg_words']}, by_img_num: {results['by_img_num']},"
-            f" by_text_layout: {results['by_text_layout']}, by_img_narrow_strips: {results['by_img_narrow_strips']},"
+            # f" by_text_layout: {results['by_text_layout']},"
+            f" by_img_narrow_strips: {results['by_img_narrow_strips']},"
            f" by_invalid_chars: {results['by_invalid_chars']}",
            file=sys.stderr)  # 利用这种情况可以快速找出来哪些pdf比较特殊，针对性修正分类算法
        return False, results

--- a/magic_pdf/filter/pdf_meta_scan.py
+++ b/magic_pdf/filter/pdf_meta_scan.py
@@ -356,9 +356,9 @@ def pdf_meta_scan(pdf_bytes: bytes):
        # logger.info(f"image_info_per_page: {image_info_per_page}, junk_img_bojids: {junk_img_bojids}")
        text_len_per_page = get_pdf_textlen_per_page(doc)
        # logger.info(f"text_len_per_page: {text_len_per_page}")
-        text_layout_per_page = get_pdf_text_layout_per_page(doc)
+        # text_layout_per_page = get_pdf_text_layout_per_page(doc)
        # logger.info(f"text_layout_per_page: {text_layout_per_page}")
-        text_language = get_language(doc)
+        # text_language = get_language(doc)
        # logger.info(f"text_language: {text_language}")
        invalid_chars = check_invalid_chars(pdf_bytes)
        # logger.info(f"invalid_chars: {invalid_chars}")
@@ -372,8 +372,8 @@ def pdf_meta_scan(pdf_bytes: bytes):
            'page_height_pts': int(page_height_pts),
            'image_info_per_page': image_info_per_page,
            'text_len_per_page': text_len_per_page,
-            'text_layout_per_page': text_layout_per_page,
+            # 'text_layout_per_page': text_layout_per_page,
-            'text_language': text_language,
+            # 'text_language': text_language,
            # "svgs_per_page": svgs_per_page,
            'imgs_per_page': imgs_per_page,  # 增加每页img数量list
            'junk_img_bojids': junk_img_bojids,  # 增加垃圾图片的bojid list

--- a/magic_pdf/libs/pdf_check.py
+++ b/magic_pdf/libs/pdf_check.py
@@ -4,6 +4,7 @@ from loguru import logger
 import re
 from io import BytesIO
 from pdfminer.high_level import extract_text
+from pdfminer.layout import LAParams
 def calculate_sample_count(total_page: int):
@@ -41,7 +42,16 @@ def detect_invalid_chars(src_pdf_bytes: bytes) -> bool:
    sample_docs = extract_pages(src_pdf_bytes)
    sample_pdf_bytes = sample_docs.tobytes()
    sample_pdf_file_like_object = BytesIO(sample_pdf_bytes)
-    text = extract_text(sample_pdf_file_like_object)
+    laparams = LAParams(
+        line_overlap=0.5,
+        char_margin=2.0,
+        line_margin=0.5,
+        word_margin=0.1,
+        boxes_flow=None,
+        detect_vertical=False,
+        all_texts=False,
+    )
+    text = extract_text(pdf_file=sample_pdf_file_like_object, laparams=laparams)
    text = text.replace("\n", "")
    # logger.info(text)
    '''乱码文本用pdfminer提取出来的文本特征是(cid:xxx)'''

--- a/magic_pdf/model/doc_analyze_by_custom_model.py
+++ b/magic_pdf/model/doc_analyze_by_custom_model.py
 import os
 import time
+import torch
+os.environ['FLAGS_npu_jit_compile'] = '0'  # 关闭paddle的jit编译
+os.environ['FLAGS_use_stride_kernel'] = '0'
+os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 让mps可以fallback
+os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
 # 关闭paddle的信号处理
 import paddle
-import torch
+paddle.disable_signal_handler()
 from loguru import logger
 from magic_pdf.model.batch_analyze import BatchAnalyze
 from magic_pdf.model.sub_modules.model_utils import get_vram
-paddle.disable_signal_handler()
-os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
 try:
    import torchtext
    if torchtext.__version__ >= '0.18.0':
        torchtext.disable_torchtext_deprecation_warning()
 except ImportError:
@@ -32,20 +33,6 @@ from magic_pdf.model.model_list import MODEL
 from magic_pdf.operators.models import InferenceResult
-def dict_compare(d1, d2):
-    return d1.items() == d2.items()
-def remove_duplicates_dicts(lst):
-    unique_dicts = []
-    for dict_item in lst:
-        if not any(
-            dict_compare(dict_item, existing_dict) for existing_dict in unique_dicts
-        ):
-            unique_dicts.append(dict_item)
-    return unique_dicts
 class ModelSingleton:
    _instance = None
    _models = {}
@@ -158,7 +145,11 @@ def doc_analyze(
    table_enable=None,
 ) -> InferenceResult:
-    end_page_id = end_page_id if end_page_id else len(dataset) - 1
+    end_page_id = (
+        end_page_id
+        if end_page_id is not None and end_page_id >= 0
+        else len(dataset) - 1
+    )
    model_manager = ModelSingleton()
    custom_model = model_manager.get_model(
@@ -178,21 +169,20 @@ def doc_analyze(
        gpu_memory = int(os.getenv("VIRTUAL_VRAM_SIZE", round(get_vram(device))))
        if gpu_memory is not None and gpu_memory >= 8:
-            if 8 <= gpu_memory < 10:
+            if gpu_memory >= 40:
-                batch_ratio = 2
+                batch_ratio = 32
-            elif 10 <= gpu_memory <= 12:
+            elif gpu_memory >=20:
-                batch_ratio = 4
-            elif 12 < gpu_memory <= 16:
-                batch_ratio = 8
-            elif 16 < gpu_memory <= 24:
                batch_ratio = 16
+            elif gpu_memory >= 16:
+                batch_ratio = 8
+            elif gpu_memory >= 10:
+                batch_ratio = 4
            else:
-                batch_ratio = 32
+                batch_ratio = 2
-            if batch_ratio >= 1:
+            logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}')
-                logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}')
+            batch_model = BatchAnalyze(model=custom_model, batch_ratio=batch_ratio)
-                batch_model = BatchAnalyze(model=custom_model, batch_ratio=batch_ratio)
+            batch_analyze = True
-                batch_analyze = True
    model_json = []
    doc_analyze_start = time.time()

--- a/magic_pdf/model/magic_model.py
+++ b/magic_pdf/model/magic_model.py
@@ -450,11 +450,132 @@ class MagicModel:
            )
        return ret
+    def __tie_up_category_by_distance_v3(
+        self,
+        page_no: int,
+        subject_category_id: int,
+        object_category_id: int,
+        priority_pos: PosRelationEnum,
+    ):
+        subjects = self.__reduct_overlap(
+            list(
+                map(
+                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
+                    filter(
+                        lambda x: x['category_id'] == subject_category_id,
+                        self.__model_list[page_no]['layout_dets'],
+                    ),
+                )
+            )
+        )
+        objects = self.__reduct_overlap(
+            list(
+                map(
+                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
+                    filter(
+                        lambda x: x['category_id'] == object_category_id,
+                        self.__model_list[page_no]['layout_dets'],
+                    ),
+                )
+            )
+        )
+        ret = []
+        N, M = len(subjects), len(objects)
+        subjects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
+        objects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
+        OBJ_IDX_OFFSET = 10000
+        SUB_BIT_KIND, OBJ_BIT_KIND = 0, 1
+        all_boxes_with_idx = [(i, SUB_BIT_KIND, sub['bbox'][0], sub['bbox'][1]) for i, sub in enumerate(subjects)] + [(i + OBJ_IDX_OFFSET , OBJ_BIT_KIND, obj['bbox'][0], obj['bbox'][1]) for i, obj in enumerate(objects)]
+        seen_idx = set()
+        seen_sub_idx = set()
+        while N > len(seen_sub_idx):
+            candidates = []
+            for idx, kind, x0, y0 in all_boxes_with_idx:
+                if idx in seen_idx:
+                    continue
+                candidates.append((idx, kind, x0, y0))
+            if len(candidates) == 0:
+                break
+            left_x = min([v[2] for v in candidates])
+            top_y =  min([v[3] for v in candidates])
+            candidates.sort(key=lambda x: (x[2]-left_x) ** 2 + (x[3] - top_y) ** 2)
+            fst_idx, fst_kind, left_x, top_y = candidates[0]
+            candidates.sort(key=lambda x: (x[2] - left_x) ** 2 + (x[3] - top_y)**2)
+            nxt = None
+            for i in range(1, len(candidates)):
+                if candidates[i][1] ^ fst_kind == 1:
+                    nxt = candidates[i]
+                    break
+            if nxt is None:
+                break
+            if fst_kind == SUB_BIT_KIND:
+                sub_idx, obj_idx = fst_idx, nxt[0] - OBJ_IDX_OFFSET
+            else:
+                sub_idx, obj_idx = nxt[0], fst_idx - OBJ_IDX_OFFSET
+            pair_dis = bbox_distance(subjects[sub_idx]['bbox'], objects[obj_idx]['bbox'])
+            nearest_dis = float('inf')
+            for i in range(N):
+                if i in seen_idx:continue
+                nearest_dis = min(nearest_dis, bbox_distance(subjects[i]['bbox'], objects[obj_idx]['bbox']))
+            if pair_dis >= 3*nearest_dis:
+                seen_idx.add(sub_idx)
+                continue
+            seen_idx.add(sub_idx)
+            seen_idx.add(obj_idx + OBJ_IDX_OFFSET)
+            seen_sub_idx.add(sub_idx)
+            ret.append(
+                {
+                    'sub_bbox': {
+                        'bbox': subjects[sub_idx]['bbox'],
+                        'score': subjects[sub_idx]['score'],
+                    },
+                    'obj_bboxes': [
+                        {'score': objects[obj_idx]['score'], 'bbox': objects[obj_idx]['bbox']}
+                    ],
+                    'sub_idx': sub_idx,
+                }
+            )
+        for i in range(len(subjects)):
+            if i in seen_sub_idx:
+                continue
+            ret.append(
+                {
+                    'sub_bbox': {
+                        'bbox': subjects[i]['bbox'],
+                        'score': subjects[i]['score'],
+                    },
+                    'obj_bboxes': [],
+                    'sub_idx': i,
+                }
+            )
+        return ret
    def get_imgs_v2(self, page_no: int):
-        with_captions = self.__tie_up_category_by_distance_v2(
+        with_captions = self.__tie_up_category_by_distance_v3(
            page_no, 3, 4, PosRelationEnum.BOTTOM
        )
-        with_footnotes = self.__tie_up_category_by_distance_v2(
+        with_footnotes = self.__tie_up_category_by_distance_v3(
            page_no, 3, CategoryId.ImageFootnote, PosRelationEnum.ALL
        )
        ret = []
@@ -470,10 +591,10 @@ class MagicModel:
        return ret
    def get_tables_v2(self, page_no: int) -> list:
-        with_captions = self.__tie_up_category_by_distance_v2(
+        with_captions = self.__tie_up_category_by_distance_v3(
            page_no, 5, 6, PosRelationEnum.UP
        )
-        with_footnotes = self.__tie_up_category_by_distance_v2(
+        with_footnotes = self.__tie_up_category_by_distance_v3(
            page_no, 5, 7, PosRelationEnum.ALL
        )
        ret = []

--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -89,13 +89,6 @@ class CustomPEKModel:
        # 初始化解析方案
        self.device = kwargs.get('device', 'cpu')
-        if str(self.device).startswith("npu"):
-            import torch_npu
-            os.environ['FLAGS_npu_jit_compile'] = '0'
-            os.environ['FLAGS_use_stride_kernel'] = '0'
-        elif str(self.device).startswith("mps"):
-            os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'
        logger.info('using device: {}'.format(self.device))
        models_dir = kwargs.get(
            'models_dir', os.path.join(root_dir, 'resources', 'models')

--- a/magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py
+++ b/magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py
 # Copyright (c) Opendatalab. All rights reserved.
+import time
 from collections import Counter
 from uuid import uuid4
@@ -102,9 +103,9 @@ class YOLOv11LangDetModel(object):
            temp_images = split_images(image)
            for temp_image in temp_images:
                all_images.append(resize_images_to_224(temp_image))
+        # langdetect_start = time.time()
-        images_lang_res = self.batch_predict(all_images, batch_size=8)
+        images_lang_res = self.batch_predict(all_images, batch_size=256)
-        # logger.info(f"images_lang_res: {images_lang_res}")
+        # logger.info(f"image number of langdetect: {len(images_lang_res)}, langdetect time: {round(time.time() - langdetect_start, 2)}")
        if len(images_lang_res) > 0:
            count_dict = Counter(images_lang_res)
            language = max(count_dict, key=count_dict.get)

--- a/magic_pdf/model/sub_modules/model_init.py
+++ b/magic_pdf/model/sub_modules/model_init.py
@@ -4,22 +4,37 @@ from loguru import logger
 from magic_pdf.config.constants import MODEL_NAME
 from magic_pdf.model.model_list import AtomicModel
 from magic_pdf.model.sub_modules.language_detection.yolov11.YOLOv11 import YOLOv11LangDetModel
-from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import \
+from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import DocLayoutYOLOModel
-    DocLayoutYOLOModel
+from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import Layoutlmv3_Predictor
-from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import \
-    Layoutlmv3_Predictor
 from magic_pdf.model.sub_modules.mfd.yolov8.YOLOv8 import YOLOv8MFDModel
 from magic_pdf.model.sub_modules.mfr.unimernet.Unimernet import UnimernetModel
-from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_273_mod import \
-    ModifiedPaddleOCR
-from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import \
-    RapidTableModel
-# from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
-from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import \
-    StructTableModel
-from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import \
-    TableMasterPaddleModel
+try:
+    from magic_pdf_ascend_plugin.libs.license_verifier import load_license, LicenseFormatError, LicenseSignatureError, LicenseExpiredError
+    from magic_pdf_ascend_plugin.model_plugin.ocr.paddleocr.ppocr_273_npu import ModifiedPaddleOCR
+    from magic_pdf_ascend_plugin.model_plugin.table.rapidtable.rapid_table_npu import RapidTableModel
+    license_key = load_license()
+    logger.info(f'Using Ascend Plugin Success, License id is {license_key["payload"]["id"]},'
+                f' License expired at {license_key["payload"]["date"]["end_date"]}')
+except Exception as e:
+    if isinstance(e, ImportError):
+        pass
+    elif isinstance(e, LicenseFormatError):
+        logger.error("Ascend Plugin: Invalid license format. Please check the license file.")
+    elif isinstance(e, LicenseSignatureError):
+        logger.error("Ascend Plugin: Invalid signature. The license may be tampered with.")
+    elif isinstance(e, LicenseExpiredError):
+        logger.error("Ascend Plugin: License has expired. Please renew your license.")
+    elif isinstance(e, FileNotFoundError):
+        logger.error("Ascend Plugin: Not found License file.")
+    else:
+        logger.error(f"Ascend Plugin: {e}")
+    from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_273_mod import ModifiedPaddleOCR
+    # from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
+    from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
+from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import StructTableModel
+from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import TableMasterPaddleModel
 def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr_engine=None, table_sub_model_name=None):
    if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
@@ -76,7 +91,6 @@ def ocr_model_init(show_log: bool = False,
                   use_dilation=True,
                   det_db_unclip_ratio=1.8,
                   ):
    if lang is not None and lang != '':
        model = ModifiedPaddleOCR(
            show_log=show_log,