Merge pull request #838 from opendatalab/release-0.9.0

Release 0.9.0

Merge pull request #838 from opendatalab/release-0.9.0
Release 0.9.0
3a42ebbf · Xiaomeng Zhao · GitHub · 765c6d77 · 14024793 · 3a42ebbf
Unverified Commit 3a42ebbf authored Nov 01, 2024 by Xiaomeng Zhao Committed by GitHub Nov 01, 2024
20 changed files
--- a/.github/workflows/cla.yml
+++ b/.github/workflows/cla.yml
@@ -29,7 +29,7 @@ jobs:
          path-to-document: 'https://github.com/opendatalab/MinerU/blob/master/MinerU_CLA.md' # e.g. a CLA or a DCO document
          # branch should not be protected
          branch: 'master'
-          allowlist: myhloli,dt-yy,Focusshang,renpengli01,icecraft,drunkpig,wangbinDL,qiangqiang199,GDDGCZ518,papayalove,conghui,quyuan
+          allowlist: myhloli,dt-yy,Focusshang,renpengli01,icecraft,drunkpig,wangbinDL,qiangqiang199,GDDGCZ518,papayalove,conghui,quyuan,LollipopsAndWine

         # the followings are the optional inputs - If the optional inputs are not given, then default values will be taken
          #remote-organization-name: enter the remote organization name where the signatures should be stored (Default is storing the signatures in the same repository)

--- a/.github/workflows/cli.yml
+++ b/.github/workflows/cli.yml
@@ -10,7 +10,6 @@ on:
    paths-ignore:
      - "cmds/**"
      - "**.md"
-      - "**.yml"
  pull_request:
    branches:
      - "master"
@@ -18,12 +17,11 @@ on:
    paths-ignore:
      - "cmds/**"
      - "**.md"
-      - "**.yml"
  workflow_dispatch:
 jobs:
  cli-test:
    runs-on: pdf
-    timeout-minutes: 120
+    timeout-minutes: 240
    strategy:
      fail-fast: true

@@ -33,16 +31,16 @@ jobs:
      with:
        fetch-depth: 2

-    - name: install
+    - name: install&test
      run: |
-        echo $GITHUB_WORKSPACE && sh tests/retry_env.sh
-    - name: unit test
-      run: |        
-        cd $GITHUB_WORKSPACE && export PYTHONPATH=. && coverage run -m  pytest  tests/test_unit.py --cov=magic_pdf/ --cov-report term-missing --cov-report html
+        source activate mineru
+        conda env list
+        pip show coverage
+        # cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
+        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py      
+        cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/  --cov-report html --cov-report term-missing
        cd $GITHUB_WORKSPACE && python tests/get_coverage.py
-    - name: cli test
-      run: |
-        cd $GITHUB_WORKSPACE &&  pytest -s -v tests/test_cli/test_cli_sdk.py
+        cd $GITHUB_WORKSPACE && pytest -m P0 -s -v tests/test_cli/test_cli_sdk.py

  notify_to_feishu:
    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}

--- a/.github/workflows/daily.yml
+++ b/.github/workflows/daily.yml
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: mineru
+on:
+  schedule:
+    - cron: '0 22 * * *'  # 每天晚上 10 点执行
+jobs:
+  cli-test:
+    runs-on: pdf
+    timeout-minutes: 240
+    strategy:
+      fail-fast: true
+
+    steps:
+    - name: PDF cli
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: install&test
+      run: |
+        source activate mineru
+        conda env list
+        pip show coverage
+        # cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
+        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py      
+        cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/  --cov-report html --cov-report term-missing
+        cd $GITHUB_WORKSPACE && python tests/get_coverage.py
+        cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
+
+  notify_to_feishu:
+    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
+    needs: cli-test
+    runs-on: pdf
+    steps:
+    - name: get_actor
+      run: |
+          metion_list="dt-yy"
+          echo $GITHUB_ACTOR
+          if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
+            metion_list="xuchao"
+          elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
+            metion_list="zhaoxiaomeng"
+          elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
+            metion_list="xurui1"
+          fi
+          echo $metion_list
+          echo "METIONS=$metion_list" >> "$GITHUB_ENV"
+          echo ${{ env.METIONS }}
+
+    - name: notify
+      run: |
+        echo ${{ secrets.USER_ID }}
+        curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}
--- a/.github/workflows/huigui.yml
+++ b/.github/workflows/huigui.yml
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: mineru
+on:
+  push:
+    branches:
+      - "master"
+      - "dev"
+    paths-ignore:
+      - "cmds/**"
+      - "**.md"
+  workflow_dispatch:
+jobs:
+  cli-test:
+    runs-on: pdf
+    timeout-minutes: 240
+    strategy:
+      fail-fast: true
+
+    steps:
+    - name: PDF cli
+      uses: actions/checkout@v3
+      with:
+        fetch-depth: 2
+
+    - name: install&test
+      run: |
+        source activate mineru
+        conda env list
+        pip show coverage
+        # cd $GITHUB_WORKSPACE && sh tests/retry_env.sh
+        cd $GITHUB_WORKSPACE && python tests/clean_coverage.py      
+        cd $GITHUB_WORKSPACE && coverage run -m pytest tests/unittest/ --cov=magic_pdf/  --cov-report html --cov-report term-missing
+        cd $GITHUB_WORKSPACE && python tests/get_coverage.py
+        cd $GITHUB_WORKSPACE && pytest -s -v tests/test_cli/test_cli_sdk.py
+
+  notify_to_feishu:
+    if: ${{ always() && !cancelled() && contains(needs.*.result, 'failure') && (github.ref_name == 'master') }}
+    needs: cli-test
+    runs-on: pdf
+    steps:
+    - name: get_actor
+      run: |
+          metion_list="dt-yy"
+          echo $GITHUB_ACTOR
+          if [[ $GITHUB_ACTOR == "drunkpig" ]]; then
+            metion_list="xuchao"
+          elif [[ $GITHUB_ACTOR == "myhloli" ]]; then
+            metion_list="zhaoxiaomeng"
+          elif [[ $GITHUB_ACTOR == "icecraft" ]]; then
+            metion_list="xurui1"
+          fi
+          echo $metion_list
+          echo "METIONS=$metion_list" >> "$GITHUB_ENV"
+          echo ${{ env.METIONS }}
+
+    - name: notify
+      run: |
+        echo ${{ secrets.USER_ID }}
+        curl -X POST -H "Content-Type: application/json" -d '{"msg_type":"post","content":{"post":{"zh_cn":{"title":"'${{ github.repository }}' GitHubAction Failed","content":[[{"tag":"text","text":""},{"tag":"a","text":"Please click here for details ","href":"https://github.com/'${{ github.repository }}'/actions/runs/'${GITHUB_RUN_ID}'"},{"tag":"at","user_id":"'${{ secrets.USER_ID }}'"}]]}}}}'  ${{ secrets.WEBHOOK_URL }}
--- a/.github/workflows/update_base.yml
+++ b/.github/workflows/update_base.yml
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
-
-name: update-base
-on:
-  push:
-    tags:
-      - '*released'
-  workflow_dispatch:
-jobs:
-  pdf-test:
-    runs-on: pdf
-    timeout-minutes: 40
-
-
-    steps:
-    - name: update-base
-      uses: actions/checkout@v3
-    - name: start-update
-      run: |
-        echo "start test"
-  
--- a/.gitignore
+++ b/.gitignore
-*.tar
-*.tar.gz
-venv*/
-envs/
-slurm_logs/
-
-sync1.sh
-data_preprocess_pj1
-data-preparation1
-__pycache__
-*.log
-*.pyc
-.vscode
-debug/
-*.ipynb
-.idea
-
-# vscode history
-.history
-
-.DS_Store
-.env
-
-bad_words/
-bak/
-
-app/tests/*
-temp/
-tmp/
-tmp
-.vscode
-.vscode/
-ocr_demo
-
-/app/common/__init__.py
-/magic_pdf/config/__init__.py
-source.dev.env
-
-tmp
+*.tar
+*.tar.gz
+*.zip
+venv*/
+envs/
+slurm_logs/
+
+sync1.sh
+data_preprocess_pj1
+data-preparation1
+__pycache__
+*.log
+*.pyc
+.vscode
+debug/
+*.ipynb
+.idea
+
+# vscode history
+.history
+
+.DS_Store
+.env
+
+bad_words/
+bak/
+
+app/tests/*
+temp/
+tmp/
+tmp
+.vscode
+.vscode/
+ocr_demo
+.coveragerc
+/app/common/__init__.py
+/magic_pdf/config/__init__.py
+source.dev.env
+
+tmp
+
+projects/web/node_modules
+projects/web/dist
+
+projects/web_demo/web_demo/static/
+cli_debug/
+debug_utils/
+
+# sphinx docs
+_build/
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -3,7 +3,7 @@ repos:
    rev: 5.0.4
    hooks:
      - id: flake8
-        args: ["--max-line-length=120", "--ignore=E131,E125,W503,W504,E203"]
+        args: ["--max-line-length=150", "--ignore=E131,E125,W503,W504,E203"]
  - repo: https://github.com/PyCQA/isort
    rev: 5.11.5
    hooks:
@@ -12,11 +12,12 @@ repos:
    rev: v0.32.0
    hooks:
      - id: yapf
-        args: ["--style={based_on_style: google, column_limit: 120, indent_width: 4}"]
+        args: ["--style={based_on_style: google, column_limit: 150, indent_width: 4}"]
  - repo: https://github.com/codespell-project/codespell
    rev: v2.2.1
    hooks:
      - id: codespell
+        args: ['--skip', '*.json']
  - repo: https://github.com/pre-commit/pre-commit-hooks
    rev: v4.3.0
    hooks:

--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+
+formats:
+  - epub
+
+python:
+  install:
+    - requirements: docs/zh_cn/requirements.txt
+
+sphinx:
+  configuration: docs/zh_cn/conf.py
--- a/Dockerfile
+++ b/Dockerfile
@@ -31,7 +31,7 @@ RUN python3 -m venv /opt/mineru_venv
 RUN /bin/bash -c "source /opt/mineru_venv/bin/activate && \
    pip3 install --upgrade pip && \
    wget https://gitee.com/myhloli/MinerU/raw/master/requirements-docker.txt && \
-    pip3 install -r requirements-docker.txt --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple && \
+    pip3 install -r requirements-docker.txt --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple && \
    pip3 install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/"

 # Copy the configuration file template and install magic-pdf latest

--- a/LICENSE.md
+++ b/LICENSE.md
@@ -659,3 +659,4 @@ specific requirements.
 if any, to sign a "copyright disclaimer" for the program, if necessary.
 For more information on this, and how to apply and follow the GNU AGPL, see
 <https://www.gnu.org/licenses/>.
+
--- a/README.md
+++ b/README.md
--- a/README_ja-JP.md
+++ b/README_ja-JP.md
@@ -290,14 +290,23 @@ https://github.com/opendatalab/MinerU/assets/11393164/20438a02-ce6c-4af8-9dde-d7
 # 引用

 ```bibtex
-@misc{2024mineru,
-    title={MinerU: A One-stop, Open-source, High-quality Data Extraction Tool},
-    author={MinerU Contributors},
-    howpublished = {\url{https://github.com/opendatalab/MinerU}},
-    year={2024}
+@misc{wang2024mineruopensourcesolutionprecise,
+      title={MinerU: An Open-Source Solution for Precise Document Content Extraction}, 
+      author={Bin Wang and Chao Xu and Xiaomeng Zhao and Linke Ouyang and Fan Wu and Zhiyuan Zhao and Rui Xu and Kaiwen Liu and Yuan Qu and Fukai Shang and Bo Zhang and Liqun Wei and Zhihao Sui and Wei Li and Botian Shi and Yu Qiao and Dahua Lin and Conghui He},
+      year={2024},
+      eprint={2409.18839},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV},
+      url={https://arxiv.org/abs/2409.18839}, 
 }
-```

+@article{he2024opendatalab,
+  title={Opendatalab: Empowering general artificial intelligence with open datasets},
+  author={He, Conghui and Li, Wei and Jin, Zhenjiang and Xu, Chao and Wang, Bin and Lin, Dahua},
+  journal={arXiv preprint arXiv:2407.13773},
+  year={2024}
+}
+```

 # スター履歴


--- a/README_zh-CN.md
+++ b/README_zh-CN.md
--- a/demo/demo.py
+++ b/demo/demo.py
 import os
-import json

 from loguru import logger
-
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter

-import magic_pdf.model as model_config 
-model_config.__use_inside_model__ = True

 try:
    current_script_dir = os.path.dirname(os.path.abspath(__file__))
    demo_name = "demo1"
    pdf_path = os.path.join(current_script_dir, f"{demo_name}.pdf")
-    model_path = os.path.join(current_script_dir, f"{demo_name}.json")
    pdf_bytes = open(pdf_path, "rb").read()
-    # model_json = json.loads(open(model_path, "r", encoding="utf-8").read())
-    model_json = []  # model_json传空list使用内置模型解析
-    jso_useful_key = {"_pdf_type": "", "model_list": model_json}
+    jso_useful_key = {"_pdf_type": "", "model_list": []}
    local_image_dir = os.path.join(current_script_dir, 'images')
    image_dir = str(os.path.basename(local_image_dir))
    image_writer = DiskReaderWriter(local_image_dir)
    pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
    pipe.pipe_classify()
-    """如果没有传入有效的模型数据，则使用内置model解析"""
-    if len(model_json) == 0:
-        if model_config.__use_inside_model__:
-            pipe.pipe_analyze()
-        else:
-            logger.error("need model list input")
-            exit(1)
+    pipe.pipe_analyze()
    pipe.pipe_parse()
    md_content = pipe.pipe_mk_markdown(image_dir, drop_mode="none")
    with open(f"{demo_name}.md", "w", encoding="utf-8") as f:

--- a/demo/magic_pdf_parse_main.py
+++ b/demo/magic_pdf_parse_main.py
@@ -4,13 +4,12 @@ import copy

 from loguru import logger

+from magic_pdf.libs.draw_bbox import draw_layout_bbox, draw_span_bbox
 from magic_pdf.pipe.UNIPipe import UNIPipe
 from magic_pdf.pipe.OCRPipe import OCRPipe
 from magic_pdf.pipe.TXTPipe import TXTPipe
 from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter
-import magic_pdf.model as model_config

-model_config.__use_inside_model__ = True

 # todo: 设备类型选择 （？）

@@ -47,11 +46,20 @@ def json_md_dump(
    )


+# 可视化
+def draw_visualization_bbox(pdf_info, pdf_bytes, local_md_dir, pdf_file_name):
+    # 画布局框，附带排序结果
+    draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir, pdf_file_name)
+    # 画 span 框
+    draw_span_bbox(pdf_info, pdf_bytes, local_md_dir, pdf_file_name)
+
+
 def pdf_parse_main(
        pdf_path: str,
        parse_method: str = 'auto',
        model_json_path: str = None,
        is_json_md_dump: bool = True,
+        is_draw_visualization_bbox: bool = True,
        output_dir: str = None
 ):
    """
@@ -108,11 +116,7 @@ def pdf_parse_main(

        # 如果没有传入模型数据，则使用内置模型解析
        if not model_json:
-            if model_config.__use_inside_model__:
-                pipe.pipe_analyze()  # 解析
-            else:
-                logger.error("need model list input")
-                exit(1)
+            pipe.pipe_analyze()  # 解析

        # 执行解析
        pipe.pipe_parse()
@@ -121,10 +125,11 @@ def pdf_parse_main(
        content_list = pipe.pipe_mk_uni_format(image_path_parent, drop_mode="none")
        md_content = pipe.pipe_mk_markdown(image_path_parent, drop_mode="none")

-
        if is_json_md_dump:
            json_md_dump(pipe, md_writer, pdf_name, content_list, md_content)

+        if is_draw_visualization_bbox:
+            draw_visualization_bbox(pipe.pdf_mid_data['pdf_info'], pdf_bytes, output_path, pdf_name)

    except Exception as e:
        logger.exception(e)
@@ -132,5 +137,5 @@ def pdf_parse_main(

 # 测试
 if __name__ == '__main__':
-    pdf_path = r"C:\Users\XYTK2\Desktop\2024-2016-gb-cd-300.pdf"
+    pdf_path = r"D:\project\20240617magicpdf\Magic-PDF\demo\demo1.pdf"
    pdf_parse_main(pdf_path)
--- a/docs/FAQ_en_us.md
+++ b/docs/FAQ_en_us.md
@@ -11,7 +11,7 @@ pip install magic-pdf[full]

 ### 2. Encountering the error `pickle.UnpicklingError: invalid load key, 'v'.` during use

-This might be due to an incomplete download of the model file. You can try re-downloading the model file and then try again.  
+This might be due to an incomplete download of the model file. You can try re-downloading the model file and then try again.
 Reference: https://github.com/opendatalab/MinerU/issues/143

 ### 3. Where should the model files be downloaded and how should the `/models-dir` configuration be set?
@@ -24,7 +24,7 @@ The path for the model files is configured in "magic-pdf.json". just like:
 }
 ```

-This path is an absolute path, not a relative path. You can obtain the absolute path in the models directory using the "pwd" command.  
+This path is an absolute path, not a relative path. You can obtain the absolute path in the models directory using the "pwd" command.
 Reference: https://github.com/opendatalab/MinerU/issues/155#issuecomment-2230216874

 ### 4. Encountered the error `ImportError: libGL.so.1: cannot open shared object file: No such file or directory` in Ubuntu 22.04 on WSL2
@@ -38,17 +38,22 @@ sudo apt-get install libgl1-mesa-glx
 Reference: https://github.com/opendatalab/MinerU/issues/388

 ### 5. Encountered error `ModuleNotFoundError: No module named 'fairscale'`
+
 You need to uninstall the module and reinstall it:
+
 ```bash
 pip uninstall fairscale
 pip install fairscale
 ```
+
 Reference: https://github.com/opendatalab/MinerU/issues/411

 ### 6. On some newer devices like the H100, the text parsed during OCR using CUDA acceleration is garbled.

 The compatibility of cuda11 with new graphics cards is poor, and the CUDA version used by Paddle needs to be upgraded.
+
 ```bash
 pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
 ```
+
 Reference: https://github.com/opendatalab/MinerU/issues/558
--- a/docs/FAQ_zh_cn.md
+++ b/docs/FAQ_zh_cn.md
 # 常见问题解答

-### 1.在较新版本的mac上使用命令安装pip install magic-pdf[full] zsh: no matches found: magic-pdf[full]
+### 1.在较新版本的mac上使用命令安装pip install magic-pdf\[full\] zsh: no matches found: magic-pdf\[full\]

 在 macOS 上，默认的 shell 从 Bash 切换到了 Z shell，而 Z shell 对于某些类型的字符串匹配有特殊的处理逻辑，这可能导致no matches found错误。
 可以通过在命令行禁用globbing特性，再尝试运行安装命令
+
 ```bash
 setopt no_nomatch
 pip install magic-pdf[full]
@@ -11,41 +12,50 @@ pip install magic-pdf[full]

 ### 2.使用过程中遇到_pickle.UnpicklingError: invalid load key, 'v'.错误

-可能是由于模型文件未下载完整导致，可尝试重新下载模型文件后再试  
+可能是由于模型文件未下载完整导致，可尝试重新下载模型文件后再试
 参考：https://github.com/opendatalab/MinerU/issues/143

 ### 3.模型文件应该下载到哪里/models-dir的配置应该怎么填

 模型文件的路径输入是在"magic-pdf.json"中通过
+
 ```json
 {
  "models-dir": "/tmp/models"
 }
 ```
+
 进行配置的。
-这个路径是绝对路径而不是相对路径，绝对路径的获取可在models目录中通过命令 "pwd" 获取。  
+这个路径是绝对路径而不是相对路径，绝对路径的获取可在models目录中通过命令 "pwd" 获取。
 参考：https://github.com/opendatalab/MinerU/issues/155#issuecomment-2230216874

 ### 4.在WSL2的Ubuntu22.04中遇到报错`ImportError: libGL.so.1: cannot open shared object file: No such file or directory`

 WSL2的Ubuntu22.04中缺少`libgl`库，可通过以下命令安装`libgl`库解决：
+
 ```bash
 sudo apt-get install libgl1-mesa-glx
 ```
+
 参考：https://github.com/opendatalab/MinerU/issues/388

 ### 5.遇到报错 `ModuleNotFoundError : Nomodulenamed 'fairscale'`
+
 需要卸载该模块并重新安装
+
 ```bash
 pip uninstall fairscale
 pip install fairscale
 ```
+
 参考：https://github.com/opendatalab/MinerU/issues/411

 ### 6.在部分较新的设备如H100上，使用CUDA加速OCR时解析出的文字乱码。

 cuda11对新显卡的兼容性不好，需要升级paddle使用的cuda版本
+
 ```bash
 pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
 ```
+
 参考：https://github.com/opendatalab/MinerU/issues/558
--- a/docs/README_Ubuntu_CUDA_Acceleration_en_US.md
+++ b/docs/README_Ubuntu_CUDA_Acceleration_en_US.md
-
 # Ubuntu 22.04 LTS

 ### 1. Check if NVIDIA Drivers Are Installed
-   ```sh
-   nvidia-smi
-   ```
-   If you see information similar to the following, it means that the NVIDIA drivers are already installed, and you can skip Step 2.
-   ```plaintext
-   +---------------------------------------------------------------------------------------+
-   | NVIDIA-SMI 537.34                 Driver Version: 537.34       CUDA Version: 12.2     |
-   |-----------------------------------------+----------------------+----------------------+
-   | GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
-   | Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
-   |                                         |                      |               MIG M. |
-   |=========================================+======================+======================|
-   |   0  NVIDIA GeForce RTX 3060 Ti   WDDM  | 00000000:01:00.0  On |                  N/A |
-   |  0%   51C    P8              12W / 200W |   1489MiB /  8192MiB |      5%      Default |
-   |                                         |                      |                  N/A |
-   +-----------------------------------------+----------------------+----------------------+
-   ```
+
+```sh
+nvidia-smi
+```
+
+If you see information similar to the following, it means that the NVIDIA drivers are already installed, and you can skip Step 2.
+
+Notice:`CUDA Version` should be >= 12.1, If the displayed version number is less than 12.1, please upgrade the driver.
+
+```plaintext
+---------------------------------------------------------------------------------------+
+| NVIDIA-SMI 537.34                 Driver Version: 537.34       CUDA Version: 12.2     |
+|-----------------------------------------+----------------------+----------------------+
+| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
+|                                         |                      |               MIG M. |
+|=========================================+======================+======================|
+|   0  NVIDIA GeForce RTX 3060 Ti   WDDM  | 00000000:01:00.0  On |                  N/A |
+|  0%   51C    P8              12W / 200W |   1489MiB /  8192MiB |      5%      Default |
+|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
+```

 ### 2. Install the Driver
-   If no driver is installed, use the following command:
-   ```sh
-   sudo apt-get update
-   sudo apt-get install nvidia-driver-545
-   ```
-   Install the proprietary driver and restart your computer after installation.
-   ```sh
-   reboot
-   ```
+
+If no driver is installed, use the following command:
+
+```sh
+sudo apt-get update
+sudo apt-get install nvidia-driver-545
+```
+
+Install the proprietary driver and restart your computer after installation.
+
+```sh
+reboot
+```

 ### 3. Install Anaconda
-   If Anaconda is already installed, skip this step.
-   ```sh
-   wget https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Linux-x86_64.sh
-   bash Anaconda3-2024.06-1-Linux-x86_64.sh
-   ```
-   In the final step, enter `yes`, close the terminal, and reopen it.
+
+If Anaconda is already installed, skip this step.
+
+```sh
+wget https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Linux-x86_64.sh
+bash Anaconda3-2024.06-1-Linux-x86_64.sh
+```
+
+In the final step, enter `yes`, close the terminal, and reopen it.

 ### 4. Create an Environment Using Conda
-   Specify Python version 3.10.
-   ```sh
-   conda create -n MinerU python=3.10
-   conda activate MinerU
-   ```
+
+Specify Python version 3.10.
+
+```sh
+conda create -n MinerU python=3.10
+conda activate MinerU
+```

 ### 5. Install Applications
-   ```sh
-   pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
-   ```
+
+```sh
+pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
+```
+
 ❗ After installation, make sure to check the version of `magic-pdf` using the following command:
-   ```sh
-   magic-pdf --version
-   ```
-   If the version number is less than 0.7.0, please report the issue.
+
+```sh
+magic-pdf --version
+```
+
+If the version number is less than 0.7.0, please report the issue.

 ### 6. Download Models
-   Refer to detailed instructions on [how to download model files](how_to_download_models_en.md).
+
+
+Refer to detailed instructions on [how to download model files](how_to_download_models_en.md).
+

 ## 7. Understand the Location of the Configuration File

 After completing the [6. Download Models](#6-download-models) step, the script will automatically generate a `magic-pdf.json` file in the user directory and configure the default model path.
 You can find the `magic-pdf.json` file in your user directory.
+
 > The user directory for Linux is "/home/username".

+
 ### 8. First Run
-   Download a sample file from the repository and test it.
-   ```sh
-   wget https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf
-   magic-pdf -p small_ocr.pdf
-   ```
+
+Download a sample file from the repository and test it.
+
+```sh
+wget https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf
+magic-pdf -p small_ocr.pdf
+```

 ### 9. Test CUDA Acceleration

-If your graphics card has at least 8GB of VRAM, follow these steps to test CUDA acceleration:
+If your graphics card has at least **8GB** of VRAM, follow these steps to test CUDA acceleration:

 1. Modify the value of `"device-mode"` in the `magic-pdf.json` configuration file located in your home directory.
   ```json
@@ -89,8 +113,6 @@ If your graphics card has at least 8GB of VRAM, follow these steps to test CUDA

 ### 10. Enable CUDA Acceleration for OCR

-❗ The following operations require a graphics card with at least 16GB of VRAM; otherwise, the program may crash or experience reduced performance.
-    
 1. Download `paddlepaddle-gpu`. Installation will automatically enable OCR acceleration.
   ```sh
   python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/

--- a/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md
+++ b/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md
 # Ubuntu 22.04 LTS

 ## 1. 检测是否已安装nvidia驱动
+
 ```bash
-nvidia-smi 
+nvidia-smi
 ```
+
 如果看到类似如下的信息，说明已经安装了nvidia驱动，可以跳过步骤2
+
+注意:`CUDA Version` 显示的版本号应 >= 12.1，如显示的版本号小于12.1，请升级驱动
+
+```plaintext
 ```
 +---------------------------------------------------------------------------------------+
 | NVIDIA-SMI 537.34                 Driver Version: 537.34       CUDA Version: 12.2     |
@@ -18,78 +24,110 @@ nvidia-smi
 |                                         |                      |                  N/A |
 +-----------------------------------------+----------------------+----------------------+
 ```
+
 ## 2. 安装驱动
+
 如没有驱动，则通过如下命令
+
 ```bash
 sudo apt-get update
 sudo apt-get install nvidia-driver-545
 ```
+
 安装专有驱动，安装完成后，重启电脑
+
 ```bash
 reboot
 ```
+
 ## 3. 安装anacoda
+
 如果已安装conda，可以跳过本步骤
+
 ```bash
 wget -U NoSuchBrowser/1.0 https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2024.06-1-Linux-x86_64.sh
 bash Anaconda3-2024.06-1-Linux-x86_64.sh
 ```
+
 最后一步输入yes，关闭终端重新打开
+
 ## 4. 使用conda 创建环境
+
 需指定python版本为3.10
+
 ```bash
 conda create -n MinerU python=3.10
 conda activate MinerU
 ```
+
 ## 5. 安装应用
+
 ```bash
-pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
+pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://mirrors.aliyun.com/pypi/simple
 ```
+
 > ❗️下载完成后，务必通过以下命令确认magic-pdf的版本是否正确
-> 
+>
 > ```bash
 > magic-pdf --version
->```
+> ```
+>
 > 如果版本号小于0.7.0，请到issue中向我们反馈

 ## 6. 下载模型
+
+
 详细参考 [如何下载模型文件](how_to_download_models_zh_cn.md)

 ## 7. 了解配置文件存放的位置
+
 完成[6.下载模型](#6-下载模型)步骤后，脚本会自动生成用户目录下的magic-pdf.json文件，并自动配置默认模型路径。
-您可在【用户目录】下找到magic-pdf.json文件。 
+您可在【用户目录】下找到magic-pdf.json文件。
+
+
 > linux用户目录为 "/home/用户名"

 ## 8. 第一次运行
+
 从仓库中下载样本文件，并测试
+
 ```bash
 wget https://gitee.com/myhloli/MinerU/raw/master/demo/small_ocr.pdf
 magic-pdf -p small_ocr.pdf
 ```
+
 ## 9. 测试CUDA加速
-如果您的显卡显存大于等于8G，可以进行以下流程，测试CUDA解析加速效果
+
+如果您的显卡显存大于等于 **8GB** ，可以进行以下流程，测试CUDA解析加速效果

 **1.修改【用户目录】中配置文件magic-pdf.json中"device-mode"的值**
+
 ```json
 {
  "device-mode":"cuda"
 }
 ```
+
 **2.运行以下命令测试cuda加速效果**
+
 ```bash
 magic-pdf -p small_ocr.pdf
 ```
+
 > 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`layout detection cost` 和 `mfr time` 应提速10倍以上。

 ## 10. 为ocr开启cuda加速
-> ❗️以下操作需显卡显存大于等于16G才可进行，否则会因为显存不足导致程序崩溃或运行速度下降

 **1.下载paddlepaddle-gpu, 安装完成后会自动开启ocr加速**
+
 ```bash
 python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
 ```
+
 **2.运行以下命令测试ocr加速效果**
+
 ```bash
 magic-pdf -p small_ocr.pdf
 ```
+
 > 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`ocr cost`应提速10倍以上。
--- a/docs/README_Windows_CUDA_Acceleration_en_US.md
+++ b/docs/README_Windows_CUDA_Acceleration_en_US.md
 # Windows 10/11

 ### 1. Install CUDA and cuDNN
+
 Required versions: CUDA 11.8 + cuDNN 8.7.0
-   - CUDA 11.8: https://developer.nvidia.com/cuda-11-8-0-download-archive
-   - cuDNN v8.7.0 (November 28th, 2022), for CUDA 11.x: https://developer.nvidia.com/rdp/cudnn-archive
-   
+
+- CUDA 11.8: https://developer.nvidia.com/cuda-11-8-0-download-archive
+- cuDNN v8.7.0 (November 28th, 2022), for CUDA 11.x: https://developer.nvidia.com/rdp/cudnn-archive
+
 ### 2. Install Anaconda
-   If Anaconda is already installed, you can skip this step.
-   
+
+If Anaconda is already installed, you can skip this step.
+
 Download link: https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Windows-x86_64.exe

 ### 3. Create an Environment Using Conda
-   Python version must be 3.10.
-   ```
-   conda create -n MinerU python=3.10
-   conda activate MinerU
-   ```
+
+Python version must be 3.10.
+
+```
+conda create -n MinerU python=3.10
+conda activate MinerU
+```

 ### 4. Install Applications
-   ```
-   pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
-   ```
-   >❗️After installation, verify the version of `magic-pdf`:
-   >  ```bash
-   >  magic-pdf --version
-   >  ```
-   > If the version number is less than 0.7.0, please report it in the issues section.
-   
+
+```
+pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
+```
+
+> ❗️After installation, verify the version of `magic-pdf`:
+>
+> ```bash
+> magic-pdf --version
+> ```
+>
+> If the version number is less than 0.7.0, please report it in the issues section.
+
 ### 5. Download Models
-   Refer to detailed instructions on [how to download model files](how_to_download_models_en.md).
+
+Refer to detailed instructions on [how to download model files](how_to_download_models_en.md).

 ### 6. Understand the Location of the Configuration File

 After completing the [5. Download Models](#5-download-models) step, the script will automatically generate a `magic-pdf.json` file in the user directory and configure the default model path.
 You can find the `magic-pdf.json` file in your 【user directory】 .
+
 > The user directory for Windows is "C:/Users/username".

 ### 7. First Run
-   Download a sample file from the repository and test it.
-   ```powershell
-     (New-Object System.Net.WebClient).DownloadFile('https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf', 'small_ocr.pdf')
-     magic-pdf -p small_ocr.pdf
-   ```
+
+Download a sample file from the repository and test it.
+
+```powershell
+  wget https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf -O small_ocr.pdf
+  magic-pdf -p small_ocr.pdf
+```

 ### 8. Test CUDA Acceleration
-   If your graphics card has at least 8GB of VRAM, follow these steps to test CUDA-accelerated parsing performance.
-   1. **Overwrite the installation of torch and torchvision** supporting CUDA.
-      ```
-      pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
-      ```
-      >❗️Ensure the following versions are specified in the command:
-      >```
-      > torch==2.3.1 torchvision==0.18.1
-      >```
-      >These are the highest versions we support. Installing higher versions without specifying them will cause the program to fail.
-   2. **Modify the value of `"device-mode"`** in the `magic-pdf.json` configuration file located in your user directory.
-     
-      ```json
-      {
-        "device-mode": "cuda"
-      }
-      ```
-   3. **Run the following command to test CUDA acceleration**:
-
-      ```
-      magic-pdf -p small_ocr.pdf
-      ```
+
+If your graphics card has at least 8GB of VRAM, follow these steps to test CUDA-accelerated parsing performance.
+
+1. **Overwrite the installation of torch and torchvision** supporting CUDA.
+
+   ```
+   pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
+   ```
+
+   > ❗️Ensure the following versions are specified in the command:
+   >
+   > ```
+   > torch==2.3.1 torchvision==0.18.1
+   > ```
+   >
+   > These are the highest versions we support. Installing higher versions without specifying them will cause the program to fail.
+
+2. **Modify the value of `"device-mode"`** in the `magic-pdf.json` configuration file located in your user directory.
+
+   ```json
+   {
+     "device-mode": "cuda"
+   }
+   ```
+
+
+3. **Run the following command to test CUDA acceleration**:
+
+   ```
+   magic-pdf -p small_ocr.pdf
+   ```

 ### 9. Enable CUDA Acceleration for OCR
-   >❗️This operation requires at least 16GB of VRAM on your graphics card, otherwise it will cause the program to crash or slow down.
-   1. **Download paddlepaddle-gpu**, which will automatically enable OCR acceleration upon installation.
-      ```
-      pip install paddlepaddle-gpu==2.6.1
-      ```
-   2. **Run the following command to test OCR acceleration**:
-      ```
-      magic-pdf -p small_ocr.pdf
-      ```
+
+1. **Download paddlepaddle-gpu**, which will automatically enable OCR acceleration upon installation.
+   ```
+   pip install paddlepaddle-gpu==2.6.1
+   ```
+2. **Run the following command to test OCR acceleration**:
+   ```
+   magic-pdf -p small_ocr.pdf
+   ```