Merge pull request #6 from opendatalab/dev

Dev

Merge pull request #6 from opendatalab/dev
Dev
ece7f8d5 · Kaiwen Liu · GitHub · 98362a6e · 702b6ac9 · ece7f8d5
Unverified Commit ece7f8d5 authored Oct 15, 2024 by Kaiwen Liu Committed by GitHub Oct 15, 2024
20 changed files
--- a/magic_pdf/tools/cli.py
+++ b/magic_pdf/tools/cli.py
@@ -44,6 +44,18 @@ auto: automatically choose the best method for parsing pdf from ocr and txt.
 without method specified, auto will be used by default.""",
    default='auto',
 )
+@click.option(
+    '-l',
+    '--lang',
+    'lang',
+    type=str,
+    help="""
+    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
+    You should input "Abbreviation" with language form url:
+    https://paddlepaddle.github.io/PaddleOCR/en/ppocr/blog/multi_languages.html#5-support-languages-and-abbreviations
+    """,
+    default=None,
+)
 @click.option(
    '-d',
    '--debug',
@@ -68,7 +80,7 @@ without method specified, auto will be used by default.""",
    help='The ending page for PDF parsing, beginning from 0.',
    default=None,
 )
-def cli(path, output_dir, method, debug_able, start_page_id, end_page_id):
+def cli(path, output_dir, method, lang, debug_able, start_page_id, end_page_id):
    model_config.__use_inside_model__ = True
    model_config.__model_mode__ = 'full'
    os.makedirs(output_dir, exist_ok=True)
@@ -90,6 +102,7 @@ def cli(path, output_dir, method, debug_able, start_page_id, end_page_id):
                debug_able,
                start_page_id=start_page_id,
                end_page_id=end_page_id,
+                lang=lang
            )
        except Exception as e:

--- a/magic_pdf/tools/common.py
+++ b/magic_pdf/tools/common.py
@@ -7,7 +7,7 @@ from loguru import logger
 import magic_pdf.model as model_config
 from magic_pdf.libs.draw_bbox import (draw_layout_bbox, draw_span_bbox,
-                                      drow_model_bbox)
+                                      draw_model_bbox, draw_line_sort_bbox)
 from magic_pdf.libs.MakeContentConfig import DropMode, MakeMode
 from magic_pdf.pipe.OCRPipe import OCRPipe
 from magic_pdf.pipe.TXTPipe import TXTPipe
@@ -39,16 +39,19 @@ def do_parse(
    f_dump_middle_json=True,
    f_dump_model_json=True,
    f_dump_orig_pdf=True,
-    f_dump_content_list=False,
+    f_dump_content_list=True,
    f_make_md_mode=MakeMode.MM_MD,
    f_draw_model_bbox=False,
+    f_draw_line_sort_bbox=False,
    start_page_id=0,
    end_page_id=None,
+    lang=None,
 ):
    if debug_able:
-        logger.warning("debug mode is on")
+        logger.warning('debug mode is on')
-        f_dump_content_list = True
+        # f_dump_content_list = True
        f_draw_model_bbox = True
+        f_draw_line_sort_bbox = True
    orig_model_list = copy.deepcopy(model_list)
    local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name,
@@ -61,13 +64,13 @@ def do_parse(
    if parse_method == 'auto':
        jso_useful_key = {'_pdf_type': '', 'model_list': model_list}
        pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer, is_debug=True,
-                       start_page_id=start_page_id, end_page_id=end_page_id)
+                       start_page_id=start_page_id, end_page_id=end_page_id, lang=lang)
    elif parse_method == 'txt':
        pipe = TXTPipe(pdf_bytes, model_list, image_writer, is_debug=True,
-                       start_page_id=start_page_id, end_page_id=end_page_id)
+                       start_page_id=start_page_id, end_page_id=end_page_id, lang=lang)
    elif parse_method == 'ocr':
        pipe = OCRPipe(pdf_bytes, model_list, image_writer, is_debug=True,
-                       start_page_id=start_page_id, end_page_id=end_page_id)
+                       start_page_id=start_page_id, end_page_id=end_page_id, lang=lang)
    else:
        logger.error('unknown parse method')
        exit(1)
@@ -89,7 +92,9 @@ def do_parse(
    if f_draw_span_bbox:
        draw_span_bbox(pdf_info, pdf_bytes, local_md_dir, pdf_file_name)
    if f_draw_model_bbox:
-        drow_model_bbox(copy.deepcopy(orig_model_list), pdf_bytes, local_md_dir, pdf_file_name)
+        draw_model_bbox(copy.deepcopy(orig_model_list), pdf_bytes, local_md_dir, pdf_file_name)
+    if f_draw_line_sort_bbox:
+        draw_line_sort_bbox(pdf_info, pdf_bytes, local_md_dir, pdf_file_name)
    md_content = pipe.pipe_mk_markdown(image_dir,
                                       drop_mode=DropMode.NONE,

--- a/magic_pdf/user_api.py
+++ b/magic_pdf/user_api.py
@@ -26,7 +26,7 @@ PARSE_TYPE_OCR = "ocr"
 def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWriter, is_debug=False,
-                  start_page_id=0, end_page_id=None,
+                  start_page_id=0, end_page_id=None, lang=None,
                  *args, **kwargs):
    """
    解析文本类pdf
@@ -44,11 +44,14 @@ def parse_txt_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
    pdf_info_dict["_version_name"] = __version__
+    if lang is not None:
+        pdf_info_dict["_lang"] = lang
    return pdf_info_dict
 def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWriter, is_debug=False,
-                  start_page_id=0, end_page_id=None,
+                  start_page_id=0, end_page_id=None, lang=None,
                  *args, **kwargs):
    """
    解析ocr类pdf
@@ -66,12 +69,15 @@ def parse_ocr_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWrit
    pdf_info_dict["_version_name"] = __version__
+    if lang is not None:
+        pdf_info_dict["_lang"] = lang
    return pdf_info_dict
 def parse_union_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWriter, is_debug=False,
                    input_model_is_empty: bool = False,
-                    start_page_id=0, end_page_id=None,
+                    start_page_id=0, end_page_id=None, lang=None,
                    *args, **kwargs):
    """
    ocr和文本混合的pdf，全部解析出来
@@ -95,9 +101,11 @@ def parse_union_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWr
    if pdf_info_dict is None or pdf_info_dict.get("_need_drop", False):
        logger.warning(f"parse_pdf_by_txt drop or error, switch to parse_pdf_by_ocr")
        if input_model_is_empty:
-            pdf_models = doc_analyze(pdf_bytes, ocr=True,
+            pdf_models = doc_analyze(pdf_bytes,
+                                     ocr=True,
                                     start_page_id=start_page_id,
-                                     end_page_id=end_page_id)
+                                     end_page_id=end_page_id,
+                                     lang=lang)
        pdf_info_dict = parse_pdf(parse_pdf_by_ocr)
        if pdf_info_dict is None:
            raise Exception("Both parse_pdf_by_txt and parse_pdf_by_ocr failed.")
@@ -108,4 +116,7 @@ def parse_union_pdf(pdf_bytes: bytes, pdf_models: list, imageWriter: AbsReaderWr
    pdf_info_dict["_version_name"] = __version__
+    if lang is not None:
+        pdf_info_dict["_lang"] = lang
    return pdf_info_dict
--- a/docs/FAQ_en_us.md
+++ b/docs/FAQ_en_us.md
@@ -38,9 +38,22 @@ sudo apt-get install libgl1-mesa-glx
 Reference: https://github.com/opendatalab/MinerU/issues/388
 ### 5. Encountered error `ModuleNotFoundError: No module named 'fairscale'`
 You need to uninstall the module and reinstall it:
 ```bash
 pip uninstall fairscale
 pip install fairscale
 ```
 Reference: https://github.com/opendatalab/MinerU/issues/411
+### 6. On some newer devices like the H100, the text parsed during OCR using CUDA acceleration is garbled.
+The compatibility of cuda11 with new graphics cards is poor, and the CUDA version used by Paddle needs to be upgraded.
+```bash
+pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
+```
+Reference: https://github.com/opendatalab/MinerU/issues/558
--- a/docs/FAQ_zh_cn.md
+++ b/docs/FAQ_zh_cn.md
 # 常见问题解答
-### 1.在较新版本的mac上使用命令安装pip install magic-pdf[full] zsh: no matches found: magic-pdf[full]
+### 1.在较新版本的mac上使用命令安装pip install magic-pdf\[full\] zsh: no matches found: magic-pdf\[full\]
 在 macOS 上，默认的 shell 从 Bash 切换到了 Z shell，而 Z shell 对于某些类型的字符串匹配有特殊的处理逻辑，这可能导致no matches found错误。
 可以通过在命令行禁用globbing特性，再尝试运行安装命令
 ```bash
 setopt no_nomatch
 pip install magic-pdf[full]
@@ -17,11 +18,13 @@ pip install magic-pdf[full]
 ### 3.模型文件应该下载到哪里/models-dir的配置应该怎么填
 模型文件的路径输入是在"magic-pdf.json"中通过
 ```json
 {
  "models-dir": "/tmp/models"
 }
 ```
 进行配置的。
 这个路径是绝对路径而不是相对路径，绝对路径的获取可在models目录中通过命令 "pwd" 获取。
 参考：https://github.com/opendatalab/MinerU/issues/155#issuecomment-2230216874
@@ -29,15 +32,30 @@ pip install magic-pdf[full]
 ### 4.在WSL2的Ubuntu22.04中遇到报错`ImportError: libGL.so.1: cannot open shared object file: No such file or directory`
 WSL2的Ubuntu22.04中缺少`libgl`库，可通过以下命令安装`libgl`库解决：
 ```bash
 sudo apt-get install libgl1-mesa-glx
 ```
 参考：https://github.com/opendatalab/MinerU/issues/388
 ### 5.遇到报错 `ModuleNotFoundError : Nomodulenamed 'fairscale'`
 需要卸载该模块并重新安装
 ```bash
 pip uninstall fairscale
 pip install fairscale
 ```
 参考：https://github.com/opendatalab/MinerU/issues/411
+### 6.在部分较新的设备如H100上，使用CUDA加速OCR时解析出的文字乱码。
+cuda11对新显卡的兼容性不好，需要升级paddle使用的cuda版本
+```bash
+pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu123/
+```
+参考：https://github.com/opendatalab/MinerU/issues/558
--- a/old_docs/README_Ubuntu_CUDA_Acceleration_en_US.md
+++ b/old_docs/README_Ubuntu_CUDA_Acceleration_en_US.md
+# Ubuntu 22.04 LTS
+### 1. Check if NVIDIA Drivers Are Installed
+```sh
+nvidia-smi
+```
+If you see information similar to the following, it means that the NVIDIA drivers are already installed, and you can skip Step 2.
+```plaintext
+---------------------------------------------------------------------------------------+
+| NVIDIA-SMI 537.34                 Driver Version: 537.34       CUDA Version: 12.2     |
+|-----------------------------------------+----------------------+----------------------+
+| GPU  Name                     TCC/WDDM  | Bus-Id        Disp.A | Volatile Uncorr. ECC |
+| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
+|                                         |                      |               MIG M. |
+|=========================================+======================+======================|
+|   0  NVIDIA GeForce RTX 3060 Ti   WDDM  | 00000000:01:00.0  On |                  N/A |
+|  0%   51C    P8              12W / 200W |   1489MiB /  8192MiB |      5%      Default |
+|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
+```
+### 2. Install the Driver
+If no driver is installed, use the following command:
+```sh
+sudo apt-get update
+sudo apt-get install nvidia-driver-545
+```
+Install the proprietary driver and restart your computer after installation.
+```sh
+reboot
+```
+### 3. Install Anaconda
+If Anaconda is already installed, skip this step.
+```sh
+wget https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Linux-x86_64.sh
+bash Anaconda3-2024.06-1-Linux-x86_64.sh
+```
+In the final step, enter `yes`, close the terminal, and reopen it.
+### 4. Create an Environment Using Conda
+Specify Python version 3.10.
+```sh
+conda create -n MinerU python=3.10
+conda activate MinerU
+```
+### 5. Install Applications
+```sh
+pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
+```
+❗ After installation, make sure to check the version of `magic-pdf` using the following command:
+```sh
+magic-pdf --version
+```
+If the version number is less than 0.7.0, please report the issue.
+### 6. Download Models
+Refer to detailed instructions on [how to download model files](how_to_download_models_en.md).
+## 7. Understand the Location of the Configuration File
+After completing the [6. Download Models](#6-download-models) step, the script will automatically generate a `magic-pdf.json` file in the user directory and configure the default model path.
+You can find the `magic-pdf.json` file in your user directory.
+> The user directory for Linux is "/home/username".
+### 8. First Run
+Download a sample file from the repository and test it.
+```sh
+wget https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf
+magic-pdf -p small_ocr.pdf
+```
+### 9. Test CUDA Acceleration
+If your graphics card has at least **8GB** of VRAM, follow these steps to test CUDA acceleration:
+> ❗ Due to the extremely limited nature of 8GB VRAM for running this application, you need to close all other programs using VRAM to ensure that 8GB of VRAM is available when running this application.
+1. Modify the value of `"device-mode"` in the `magic-pdf.json` configuration file located in your home directory.
+   ```json
+   {
+     "device-mode": "cuda"
+   }
+   ```
+2. Test CUDA acceleration with the following command:
+   ```sh
+   magic-pdf -p small_ocr.pdf
+   ```
+### 10. Enable CUDA Acceleration for OCR
+1. Download `paddlepaddle-gpu`. Installation will automatically enable OCR acceleration.
+   ```sh
+   python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
+   ```
+2. Test OCR acceleration with the following command:
+   ```sh
+   magic-pdf -p small_ocr.pdf
+   ```
--- a/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md
+++ b/docs/README_Ubuntu_CUDA_Acceleration_zh_CN.md
 # Ubuntu 22.04 LTS
 ## 1. 检测是否已安装nvidia驱动
 ```bash
 nvidia-smi
 ```
 如果看到类似如下的信息，说明已经安装了nvidia驱动，可以跳过步骤2
 ```
 +---------------------------------------------------------------------------------------+
 | NVIDIA-SMI 537.34                 Driver Version: 537.34       CUDA Version: 12.2     |
@@ -18,96 +21,110 @@ nvidia-smi
 |                                         |                      |                  N/A |
 +-----------------------------------------+----------------------+----------------------+
 ```
 ## 2. 安装驱动
 如没有驱动，则通过如下命令
 ```bash
 sudo apt-get update
 sudo apt-get install nvidia-driver-545
 ```
 安装专有驱动，安装完成后，重启电脑
 ```bash
 reboot
 ```
 ## 3. 安装anacoda
 如果已安装conda，可以跳过本步骤
 ```bash
 wget -U NoSuchBrowser/1.0 https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2024.06-1-Linux-x86_64.sh
 bash Anaconda3-2024.06-1-Linux-x86_64.sh
 ```
 最后一步输入yes，关闭终端重新打开
 ## 4. 使用conda 创建环境
 需指定python版本为3.10
 ```bash
 conda create -n MinerU python=3.10
 conda activate MinerU
 ```
 ## 5. 安装应用
 ```bash
 pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
 ```
 > ❗️下载完成后，务必通过以下命令确认magic-pdf的版本是否正确
 >
 > ```bash
 > magic-pdf --version
->```
+> ```
+>
 > 如果版本号小于0.7.0，请到issue中向我们反馈
 ## 6. 下载模型
 详细参考 [如何下载模型文件](how_to_download_models_zh_cn.md)
-下载后请将models目录移动到空间较大的ssd磁盘目录  
-> ❗️模型下载后请务必检查模型文件是否下载完整
-> 
-> 请检查目录下的模型文件大小与网页上描述是否一致，如果可以的话，最好通过sha256校验模型是否下载完整
-> 
-## 7. 第一次运行前的配置
-在仓库根目录可以获得 [magic-pdf.template.json](../magic-pdf.template.json) 配置模版文件
-> ❗️务必执行以下命令将配置文件拷贝到【用户目录】下，否则程序将无法运行
->  
-> linux用户目录为 "/home/用户名"
-```bash
-wget https://gitee.com/myhloli/MinerU/raw/master/magic-pdf.template.json
-cp magic-pdf.template.json ~/magic-pdf.json
-```
-在用户目录中找到magic-pdf.json文件并配置"models-dir"为[6. 下载模型](#6-下载模型)中下载的模型权重文件所在目录
+## 7. 了解配置文件存放的位置
-> ❗️务必正确配置模型权重文件所在目录的【绝对路径】，否则会因为找不到模型文件而导致程序无法运行
-> 
+完成[6.下载模型](#6-下载模型)步骤后，脚本会自动生成用户目录下的magic-pdf.json文件，并自动配置默认模型路径。
-```json
+您可在【用户目录】下找到magic-pdf.json文件。
-{
-  "models-dir": "/tmp/models"
+> linux用户目录为 "/home/用户名"
-}
-```
 ## 8. 第一次运行
 从仓库中下载样本文件，并测试
 ```bash
 wget https://gitee.com/myhloli/MinerU/raw/master/demo/small_ocr.pdf
 magic-pdf -p small_ocr.pdf
 ```
 ## 9. 测试CUDA加速
-如果您的显卡显存大于等于8G，可以进行以下流程，测试CUDA解析加速效果
+如果您的显卡显存大于等于 **8GB** ，可以进行以下流程，测试CUDA解析加速效果
+> ❗️因8GB显存运行本应用非常极限，需要关闭所有其他正在使用显存的程序以确保本应用运行时有足额8GB显存可用。
 **1.修改【用户目录】中配置文件magic-pdf.json中"device-mode"的值**
 ```json
 {
  "device-mode":"cuda"
 }
 ```
 **2.运行以下命令测试cuda加速效果**
 ```bash
 magic-pdf -p small_ocr.pdf
 ```
 > 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`layout detection cost` 和 `mfr time` 应提速10倍以上。
 ## 10. 为ocr开启cuda加速
-> ❗️以下操作需显卡显存大于等于16G才可进行，否则会因为显存不足导致程序崩溃或运行速度下降
 **1.下载paddlepaddle-gpu, 安装完成后会自动开启ocr加速**
 ```bash
 python -m pip install paddlepaddle-gpu==3.0.0b1 -i https://www.paddlepaddle.org.cn/packages/stable/cu118/
 ```
 **2.运行以下命令测试ocr加速效果**
 ```bash
 magic-pdf -p small_ocr.pdf
 ```
 > 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`ocr cost`应提速10倍以上。
--- a/old_docs/README_Windows_CUDA_Acceleration_en_US.md
+++ b/old_docs/README_Windows_CUDA_Acceleration_en_US.md
+# Windows 10/11
+### 1. Install CUDA and cuDNN
+Required versions: CUDA 11.8 + cuDNN 8.7.0
+- CUDA 11.8: https://developer.nvidia.com/cuda-11-8-0-download-archive
+- cuDNN v8.7.0 (November 28th, 2022), for CUDA 11.x: https://developer.nvidia.com/rdp/cudnn-archive
+### 2. Install Anaconda
+If Anaconda is already installed, you can skip this step.
+Download link: https://repo.anaconda.com/archive/Anaconda3-2024.06-1-Windows-x86_64.exe
+### 3. Create an Environment Using Conda
+Python version must be 3.10.
+```
+conda create -n MinerU python=3.10
+conda activate MinerU
+```
+### 4. Install Applications
+```
+pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com
+```
+> ❗️After installation, verify the version of `magic-pdf`:
+>
+> ```bash
+> magic-pdf --version
+> ```
+>
+> If the version number is less than 0.7.0, please report it in the issues section.
+### 5. Download Models
+Refer to detailed instructions on [how to download model files](how_to_download_models_en.md).
+### 6. Understand the Location of the Configuration File
+After completing the [5. Download Models](#5-download-models) step, the script will automatically generate a `magic-pdf.json` file in the user directory and configure the default model path.
+You can find the `magic-pdf.json` file in your 【user directory】 .
+> The user directory for Windows is "C:/Users/username".
+### 7. First Run
+Download a sample file from the repository and test it.
+```powershell
+  wget https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf -O small_ocr.pdf
+  magic-pdf -p small_ocr.pdf
+```
+### 8. Test CUDA Acceleration
+If your graphics card has at least 8GB of VRAM, follow these steps to test CUDA-accelerated parsing performance.
+> ❗ Due to the extremely limited nature of 8GB VRAM for running this application, you need to close all other programs using VRAM to ensure that 8GB of VRAM is available when running this application.
+1. **Overwrite the installation of torch and torchvision** supporting CUDA.
+   ```
+   pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
+   ```
+   > ❗️Ensure the following versions are specified in the command:
+   >
+   > ```
+   > torch==2.3.1 torchvision==0.18.1
+   > ```
+   >
+   > These are the highest versions we support. Installing higher versions without specifying them will cause the program to fail.
+2. **Modify the value of `"device-mode"`** in the `magic-pdf.json` configuration file located in your user directory.
+   ```json
+   {
+     "device-mode": "cuda"
+   }
+   ```
+3. **Run the following command to test CUDA acceleration**:
+   ```
+   magic-pdf -p small_ocr.pdf
+   ```
+### 9. Enable CUDA Acceleration for OCR
+1. **Download paddlepaddle-gpu**, which will automatically enable OCR acceleration upon installation.
+   ```
+   pip install paddlepaddle-gpu==2.6.1
+   ```
+2. **Run the following command to test OCR acceleration**:
+   ```
+   magic-pdf -p small_ocr.pdf
+   ```
--- a/docs/README_Windows_CUDA_Acceleration_zh_CN.md
+++ b/docs/README_Windows_CUDA_Acceleration_zh_CN.md
@@ -3,103 +3,108 @@
 ## 1. 安装cuda和cuDNN
 需要安装的版本 CUDA 11.8 + cuDNN 8.7.0
 - CUDA 11.8 https://developer.nvidia.com/cuda-11-8-0-download-archive
 - cuDNN v8.7.0 (November 28th, 2022), for CUDA 11.x https://developer.nvidia.com/rdp/cudnn-archive
 ## 2. 安装anaconda
 如果已安装conda，可以跳过本步骤
 下载链接：
 https://mirrors.tuna.tsinghua.edu.cn/anaconda/archive/Anaconda3-2024.06-1-Windows-x86_64.exe
 ## 3. 使用conda 创建环境
 需指定python版本为3.10
 ```bash
 conda create -n MinerU python=3.10
 conda activate MinerU
 ```
 ## 4. 安装应用
 ```bash
 pip install -U magic-pdf[full] --extra-index-url https://wheels.myhloli.com -i https://pypi.tuna.tsinghua.edu.cn/simple
 ```
 > ❗️下载完成后，务必通过以下命令确认magic-pdf的版本是否正确
 >
 > ```bash
 > magic-pdf --version
->```
+> ```
+>
 > 如果版本号小于0.7.0，请到issue中向我们反馈
 ## 5. 下载模型
 详细参考 [如何下载模型文件](how_to_download_models_zh_cn.md)
-下载后请将models目录移动到空间较大的ssd磁盘目录  
-> ❗️模型下载后请务必检查模型文件是否下载完整
-> 
-> 请检查目录下的模型文件大小与网页上描述是否一致，如果可以的话，最好通过sha256校验模型是否下载完整
-## 6. 第一次运行前的配置
+## 6. 了解配置文件存放的位置
-在仓库根目录可以获得 [magic-pdf.template.json](../magic-pdf.template.json) 配置模版文件
-> ❗️务必执行以下命令将配置文件拷贝到【用户目录】下，否则程序将无法运行
->  
-> windows用户目录为 "C:\Users\用户名"
-```powershell
-(New-Object System.Net.WebClient).DownloadFile('https://gitee.com/myhloli/MinerU/raw/master/magic-pdf.template.json', 'magic-pdf.template.json')
-cp magic-pdf.template.json ~/magic-pdf.json
-```
-在用户目录中找到magic-pdf.json文件并配置"models-dir"为[5. 下载模型](#5-下载模型)中下载的模型权重文件所在目录
+完成[5.下载模型](#5-下载模型)步骤后，脚本会自动生成用户目录下的magic-pdf.json文件，并自动配置默认模型路径。
-> ❗️务必正确配置模型权重文件所在目录的【绝对路径】，否则会因为找不到模型文件而导致程序无法运行
+您可在【用户目录】下找到magic-pdf.json文件。
-> 
-> windows系统中此路径应包含盘符，且需把路径中所有的`"\"`替换为`"/"`,否则会因为转义原因导致json文件语法错误。
+> windows用户目录为 "C:/Users/用户名"
-> 
-> 例如：模型放在D盘根目录的models目录，则model-dir的值应为"D:/models"
-```json
-{
-  "models-dir": "/tmp/models"
-}
-```
 ## 7. 第一次运行
 从仓库中下载样本文件，并测试
 ```powershell
-(New-Object System.Net.WebClient).DownloadFile('https://gitee.com/myhloli/MinerU/raw/master/demo/small_ocr.pdf', 'small_ocr.pdf')
+ wget https://github.com/opendatalab/MinerU/raw/master/demo/small_ocr.pdf -O small_ocr.pdf
-magic-pdf -p small_ocr.pdf
+ magic-pdf -p small_ocr.pdf
 ```
 ## 8. 测试CUDA加速
-如果您的显卡显存大于等于8G，可以进行以下流程，测试CUDA解析加速效果
+如果您的显卡显存大于等于 **8GB** ，可以进行以下流程，测试CUDA解析加速效果
+> ❗️因8GB显存运行本应用非常极限，需要关闭所有其他正在使用显存的程序以确保本应用运行时有足额8GB显存可用。
 **1.覆盖安装支持cuda的torch和torchvision**
 ```bash
 pip install --force-reinstall torch==2.3.1 torchvision==0.18.1 --index-url https://download.pytorch.org/whl/cu118
 ```
 > ❗️务必在命令中指定以下版本
+>
 > ```bash
 > torch==2.3.1 torchvision==0.18.1
 > ```
+>
 > 这是我们支持的最高版本，如果不指定版本会自动安装更高版本导致程序无法运行
 **2.修改【用户目录】中配置文件magic-pdf.json中"device-mode"的值**
 ```json
 {
  "device-mode":"cuda"
 }
 ```
 **3.运行以下命令测试cuda加速效果**
 ```bash
 magic-pdf -p small_ocr.pdf
 ```
-> 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`layout detection cost` 和 `mfr time` 应提速10倍以上。
+> 提示：CUDA加速是否生效可以根据log中输出的各个阶段的耗时来简单判断，通常情况下，`layout detection time` 和 `mfr time` 应提速10倍以上。
 ## 9. 为ocr开启cuda加速
-> ❗️以下操作需显卡显存大于等于16G才可进行，否则会因为显存不足导致程序崩溃或运行速度下降
 **1.下载paddlepaddle-gpu, 安装完成后会自动开启ocr加速**
 ```bash
 pip install paddlepaddle-gpu==2.6.1
 ```
 **2.运行以下命令测试ocr加速效果**
 ```bash
 magic-pdf -p small_ocr.pdf
 ```
-> 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`ocr cost`应提速10倍以上。
+> 提示：CUDA加速是否生效可以根据log中输出的各个阶段cost耗时来简单判断，通常情况下，`ocr time`应提速10倍以上。
--- a/docs/chemical_knowledge_introduction/introduction.pdf
+++ b/docs/chemical_knowledge_introduction/introduction.pdf
--- a/docs/chemical_knowledge_introduction/introduction.xmind
+++ b/docs/chemical_knowledge_introduction/introduction.xmind
--- a/old_docs/download_models.py
+++ b/old_docs/download_models.py
+import json
+import os
+import requests
+from modelscope import snapshot_download
+def download_and_modify_json(url, local_filename, modifications):
+    if os.path.exists(local_filename):
+        data = json.load(open(local_filename))
+    else:
+        # 下载JSON文件
+        response = requests.get(url)
+        response.raise_for_status()  # 检查请求是否成功
+        # 解析JSON内容
+        data = response.json()
+    # 修改内容
+    for key, value in modifications.items():
+        data[key] = value
+    # 保存修改后的内容
+    with open(local_filename, 'w', encoding='utf-8') as f:
+        json.dump(data, f, ensure_ascii=False, indent=4)
+if __name__ == '__main__':
+    model_dir = snapshot_download('opendatalab/PDF-Extract-Kit')
+    layoutreader_model_dir = snapshot_download('ppaanngggg/layoutreader')
+    model_dir = model_dir + '/models'
+    print(f'model_dir is: {model_dir}')
+    print(f'layoutreader_model_dir is: {layoutreader_model_dir}')
+    json_url = 'https://gitee.com/myhloli/MinerU/raw/master/magic-pdf.template.json'
+    config_file_name = 'magic-pdf.json'
+    home_dir = os.path.expanduser('~')
+    config_file = os.path.join(home_dir, config_file_name)
+    json_mods = {
+        'models-dir': model_dir,
+        'layoutreader-model-dir': layoutreader_model_dir,
+    }
+    download_and_modify_json(json_url, config_file, json_mods)
+    print(f'The configuration file has been configured successfully, the path is: {config_file}')
--- a/old_docs/download_models_hf.py
+++ b/old_docs/download_models_hf.py
--- a/old_docs/how_to_download_models_en.md
+++ b/old_docs/how_to_download_models_en.md
+Model downloads are divided into initial downloads and updates to the model directory. Please refer to the corresponding documentation for instructions on how to proceed.
+# Initial download of model files
+### 1. Download the Model from Hugging Face
+Use a Python Script to Download Model Files from Hugging Face
+```bash
+pip install huggingface_hub
+wget https://github.com/opendatalab/MinerU/raw/master/docs/download_models_hf.py -O download_models_hf.py
+python download_models_hf.py
+```
+The Python script will automatically download the model files and configure the model directory in the configuration file.
+The configuration file can be found in the user directory, with the filename `magic-pdf.json`.
+# How to update models previously downloaded
+## 1. Models downloaded via Git LFS
+> Due to feedback from some users that downloading model files using git lfs was incomplete or resulted in corrupted model files, this method is no longer recommended.
+If you previously downloaded model files via git lfs, you can navigate to the previous download directory and use the `git pull` command to update the model.
+## 2. Models downloaded via Hugging Face or Model Scope
+If you previously downloaded models via Hugging Face or Model Scope, you can rerun the Python script used for the initial download. This will automatically update the model directory to the latest version.
--- a/old_docs/how_to_download_models_zh_cn.md
+++ b/old_docs/how_to_download_models_zh_cn.md
--- a/docs/images/MinerU-logo-hq.png
+++ b/docs/images/MinerU-logo-hq.png
--- a/old_docs/images/MinerU-logo.png
+++ b/old_docs/images/MinerU-logo.png
--- a/docs/images/datalab_logo.png
+++ b/docs/images/datalab_logo.png
--- a/docs/images/flowchart_en.png
+++ b/docs/images/flowchart_en.png
--- a/docs/images/flowchart_zh_cn.png
+++ b/docs/images/flowchart_zh_cn.png