Merge pull request #1427 from opendatalab/release-1.0.0

Release 1.0.0

Merge pull request #1427 from opendatalab/release-1.0.0
Release 1.0.0
4bb54393 · Xiaomeng Zhao · GitHub · 04f084ac · 1c9f9942 · 4bb54393
Unverified Commit 4bb54393 authored Jan 10, 2025 by Xiaomeng Zhao Committed by GitHub Jan 10, 2025
20 changed files
--- a/next_docs/en/user_guide.rst
+++ b/next_docs/en/user_guide.rst
@@ -4,7 +4,9 @@
    :maxdepth: 2

    user_guide/install
+    user_guide/usage
    user_guide/quick_start
    user_guide/tutorial
    user_guide/data
-    
+    user_guide/inference_result
+    user_guide/pipe_result
--- a/next_docs/en/user_guide/data/data_reader_writer.rst
+++ b/next_docs/en/user_guide/data/data_reader_writer.rst
@@ -87,56 +87,70 @@ Read Examples

 .. code:: python

+    import os 
    from magic_pdf.data.data_reader_writer import *
+    from magic_pdf.data.data_reader_writer import MultiBucketS3DataReader
+    from magic_pdf.data.schemas import S3Config

-    # file based related 
+    # file based related
    file_based_reader1 = FileBasedDataReader('')

-    ## will read file abc 
-    file_based_reader1.read('abc') 
+    ## will read file abc
+    file_based_reader1.read('abc')

    file_based_reader2 = FileBasedDataReader('/tmp')

    ## will read /tmp/abc
    file_based_reader2.read('abc')

-    ## will read /var/logs/message.txt
-    file_based_reader2.read('/var/logs/message.txt')
+    ## will read /tmp/logs/message.txt
+    file_based_reader2.read('/tmp/logs/message.txt')

    # multi bucket s3 releated
-    multi_bucket_s3_reader1 = MultiBucketS3DataReader("test_bucket1/test_prefix", list[S3Config(
-            bucket_name=test_bucket1, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+    bucket = "bucket"               # replace with real bucket
+    ak = "ak"                       # replace with real access key
+    sk = "sk"                       # replace with real secret key
+    endpoint_url = "endpoint_url"   # replace with real endpoint_url
+
+    bucket_2 = "bucket_2"               # replace with real bucket
+    ak_2 = "ak_2"                       # replace with real access key
+    sk_2 = "sk_2"                       # replace with real secret key 
+    endpoint_url_2 = "endpoint_url_2"   # replace with real endpoint_url
+
+    test_prefix = 'test/unittest'
+    multi_bucket_s3_reader1 = MultiBucketS3DataReader(f"{bucket}/{test_prefix}", [S3Config(
+            bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
        ),
        S3Config(
-            bucket_name=test_bucket_2,
+            bucket_name=bucket_2,
            access_key=ak_2,
            secret_key=sk_2,
            endpoint_url=endpoint_url_2,
        )])
-    
-    ## will read s3://test_bucket1/test_prefix/abc
+
+    ## will read s3://{bucket}/{test_prefix}/abc
    multi_bucket_s3_reader1.read('abc')

-    ## will read s3://test_bucket1/efg
-    multi_bucket_s3_reader1.read('s3://test_bucket1/efg')
+    ## will read s3://{bucket}/{test_prefix}/efg
+    multi_bucket_s3_reader1.read(f's3://{bucket}/{test_prefix}/efg')

-    ## will read s3://test_bucket2/abc
-    multi_bucket_s3_reader1.read('s3://test_bucket2/abc')
+    ## will read s3://{bucket2}/{test_prefix}/abc
+    multi_bucket_s3_reader1.read(f's3://{bucket_2}/{test_prefix}/abc')

    # s3 related
    s3_reader1 = S3DataReader(
-        default_prefix_without_bucket = "test_prefix"
-        bucket: "test_bucket",
-        ak: "ak",
-        sk: "sk",
-        endpoint_url: "localhost"
+        test_prefix,
+        bucket,
+        ak,
+        sk,
+        endpoint_url
    )

-    ## will read s3://test_bucket/test_prefix/abc 
+    ## will read s3://{bucket}/{test_prefix}/abc
    s3_reader1.read('abc')
-   
-    ## will read s3://test_bucket/efg
-    s3_reader1.read('s3://test_bucket/efg')
+
+    ## will read s3://{bucket}/efg
+    s3_reader1.read(f's3://{bucket}/efg')


 Write Examples
@@ -144,65 +158,79 @@ Write Examples

 .. code:: python

+    import os
    from magic_pdf.data.data_reader_writer import *
+    from magic_pdf.data.data_reader_writer import MultiBucketS3DataWriter
+    from magic_pdf.data.schemas import S3Config

-    # file based related 
-    file_based_writer1 = FileBasedDataWriter('')
+    # file based related
+    file_based_writer1 = FileBasedDataWriter("")

    ## will write 123 to abc
-    file_based_writer1.write('abc', '123'.encode()) 
+    file_based_writer1.write("abc", "123".encode())

    ## will write 123 to abc
-    file_based_writer1.write_string('abc', '123') 
+    file_based_writer1.write_string("abc", "123")

-    file_based_writer2 = FileBasedDataWriter('/tmp')
+    file_based_writer2 = FileBasedDataWriter("/tmp")

    ## will write 123 to /tmp/abc
-    file_based_writer2.write_string('abc', '123')
+    file_based_writer2.write_string("abc", "123")

-    ## will write 123 to /var/logs/message.txt
-    file_based_writer2.write_string('/var/logs/message.txt', '123')
+    ## will write 123 to /tmp/logs/message.txt
+    file_based_writer2.write_string("/tmp/logs/message.txt", "123")

    # multi bucket s3 releated
-    multi_bucket_s3_writer1 = MultiBucketS3DataWriter("test_bucket1/test_prefix", list[S3Config(
-            bucket_name=test_bucket1, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
-        ),
-        S3Config(
-            bucket_name=test_bucket_2,
-            access_key=ak_2,
-            secret_key=sk_2,
-            endpoint_url=endpoint_url_2,
-        )])
-    
-    ## will write 123 to s3://test_bucket1/test_prefix/abc
-    multi_bucket_s3_writer1.write_string('abc', '123')
+    bucket = "bucket"               # replace with real bucket
+    ak = "ak"                       # replace with real access key
+    sk = "sk"                       # replace with real secret key
+    endpoint_url = "endpoint_url"   # replace with real endpoint_url
+
+    bucket_2 = "bucket_2"               # replace with real bucket
+    ak_2 = "ak_2"                       # replace with real access key
+    sk_2 = "sk_2"                       # replace with real secret key 
+    endpoint_url_2 = "endpoint_url_2"   # replace with real endpoint_url
+
+    test_prefix = "test/unittest"
+    multi_bucket_s3_writer1 = MultiBucketS3DataWriter(
+        f"{bucket}/{test_prefix}",
+        [
+            S3Config(
+                bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+            ),
+            S3Config(
+                bucket_name=bucket_2,
+                access_key=ak_2,
+                secret_key=sk_2,
+                endpoint_url=endpoint_url_2,
+            ),
+        ],
+    )

-    ## will write 123 to s3://test_bucket1/test_prefix/abc
-    multi_bucket_s3_writer1.write('abc', '123'.encode())
+    ## will write 123 to s3://{bucket}/{test_prefix}/abc
+    multi_bucket_s3_writer1.write_string("abc", "123")

-    ## will write 123 to s3://test_bucket1/efg
-    multi_bucket_s3_writer1.write('s3://test_bucket1/efg', '123'.encode())
+    ## will write 123 to s3://{bucket}/{test_prefix}/abc
+    multi_bucket_s3_writer1.write("abc", "123".encode())

-    ## will write 123 to s3://test_bucket2/abc
-    multi_bucket_s3_writer1.write('s3://test_bucket2/abc', '123'.encode())
+    ## will write 123 to s3://{bucket}/{test_prefix}/efg
+    multi_bucket_s3_writer1.write(f"s3://{bucket}/{test_prefix}/efg", "123".encode())
+
+    ## will write 123 to s3://{bucket_2}/{test_prefix}/abc
+    multi_bucket_s3_writer1.write(f's3://{bucket_2}/{test_prefix}/abc', '123'.encode())

    # s3 related
-    s3_writer1 = S3DataWriter(
-        default_prefix_without_bucket = "test_prefix"
-        bucket: "test_bucket",
-        ak: "ak",
-        sk: "sk",
-        endpoint_url: "localhost"
-    )
+    s3_writer1 = S3DataWriter(test_prefix, bucket, ak, sk, endpoint_url)
+
+    ## will write 123 to s3://{bucket}/{test_prefix}/abc
+    s3_writer1.write("abc", "123".encode())

-    ## will write 123 to s3://test_bucket/test_prefix/abc 
-    s3_writer1.write('abc', '123'.encode())
+    ## will write 123 to s3://{bucket}/{test_prefix}/abc
+    s3_writer1.write_string("abc", "123")

-    ## will write 123 to s3://test_bucket/test_prefix/abc 
-    s3_writer1.write_string('abc', '123')
+    ## will write 123 to s3://{bucket}/efg
+    s3_writer1.write(f"s3://{bucket}/efg", "123".encode())

-    ## will write 123 to s3://test_bucket/efg
-    s3_writer1.write('s3://test_bucket/efg', '123'.encode())


 Check :doc:`../../api/data_reader_writer` for more details
--- a/next_docs/en/user_guide/data/read_api.rst
+++ b/next_docs/en/user_guide/data/read_api.rst
@@ -18,24 +18,50 @@ Read the contet from jsonl which may located on local machine or remote s3. if y

 .. code:: python

-    from magic_pdf.data.io.read_api import *
+    from magic_pdf.data.read_api import *
+    from magic_pdf.data.data_reader_writer import MultiBucketS3DataReader
+    from magic_pdf.data.schemas import S3Config

-    # read jsonl from local machine 
-    datasets = read_jsonl("tt.jsonl", None)
+    # read jsonl from local machine
+    datasets = read_jsonl("tt.jsonl", None)   # replace with real jsonl file

    # read jsonl from remote s3
-    datasets = read_jsonl("s3://bucket_1/tt.jsonl", s3_reader)

+    bucket = "bucket_1"                     # replace with real s3 bucket
+    ak = "access_key_1"                     # replace with real s3 access key
+    sk = "secret_key_1"                     # replace with real s3 secret key
+    endpoint_url = "endpoint_url_1"         # replace with real s3 endpoint url
+
+    bucket_2 = "bucket_2"                   # replace with real s3 bucket
+    ak_2 = "access_key_2"                   # replace with real s3 access key
+    sk_2 = "secret_key_2"                   # replace with real s3 secret key
+    endpoint_url_2 = "endpoint_url_2"       # replace with real s3 endpoint url
+
+    s3configs = [
+        S3Config(
+            bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+        ),
+        S3Config(
+            bucket_name=bucket_2,
+            access_key=ak_2,
+            secret_key=sk_2,
+            endpoint_url=endpoint_url_2,
+        ),
+    ]
+
+    s3_reader = MultiBucketS3DataReader(bucket, s3configs)
+
+    datasets = read_jsonl(f"s3://bucket_1/tt.jsonl", s3_reader)  # replace with real s3 jsonl file

 read_local_pdfs
-^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^

 Read pdf from path or directory.


 .. code:: python

-    from magic_pdf.data.io.read_api import *
+    from magic_pdf.data.read_api import *

    # read pdf path
    datasets = read_local_pdfs("tt.pdf")
@@ -51,13 +77,30 @@ Read images from path or directory

 .. code:: python 

-    from magic_pdf.data.io.read_api import *
+    from magic_pdf.data.read_api import *
+
+    # read from image path 
+    datasets = read_local_images("tt.png")  # replace with real file path
+
+    # read files from directory that endswith suffix in suffixes array 
+    datasets = read_local_images("images/", suffixes=[".png", ".jpg"])  # replace with real directory 
+
+
+read_local_office
+^^^^^^^^^^^^^^^^^^^^
+Read MS-Office files from path or directory
+
+.. code:: python 
+
+    from magic_pdf.data.read_api import *

    # read from image path 
-    datasets = read_local_images("tt.png")
+    datasets = read_local_office("tt.doc")  # replace with real file path

    # read files from directory that endswith suffix in suffixes array 
-    datasets = read_local_images("images/", suffixes=["png", "jpg"])
+    datasets = read_local_office("docs/")  # replace with real directory 
+
+


 Check :doc:`../../api/read_api` for more details
\ No newline at end of file
--- a/next_docs/en/user_guide/inference_result.rst
+++ b/next_docs/en/user_guide/inference_result.rst
+
+Inference Result
+==================
+
+.. admonition:: Tip
+    :class: tip
+
+    Please first navigate to :doc:`tutorial/pipeline` to get an initial understanding of how the pipeline works; this will help in understanding the content of this section.
+
+The **InferenceResult** class is a container for storing model inference results and implements a series of methods related to these results, such as draw_model, dump_model.
+Checkout :doc:`../api/model_operators` for more details about **InferenceResult**
+
+
+Model Inference Result
+-----------------------
+
+Structure Definition
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code:: python
+
+    from pydantic import BaseModel, Field
+    from enum import IntEnum
+
+    class CategoryType(IntEnum):
+            title = 0               # Title
+            plain_text = 1          # Text
+            abandon = 2             # Includes headers, footers, page numbers, and page annotations
+            figure = 3              # Image
+            figure_caption = 4      # Image description
+            table = 5               # Table
+            table_caption = 6       # Table description
+            table_footnote = 7      # Table footnote
+            isolate_formula = 8     # Block formula
+            formula_caption = 9     # Formula label
+
+            embedding = 13          # Inline formula
+            isolated = 14           # Block formula
+            text = 15               # OCR recognition result
+
+
+    class PageInfo(BaseModel):
+        page_no: int = Field(description="Page number, the first page is 0", ge=0)
+        height: int = Field(description="Page height", gt=0)
+        width: int = Field(description="Page width", ge=0)
+
+    class ObjectInferenceResult(BaseModel):
+        category_id: CategoryType = Field(description="Category", ge=0)
+        poly: list[float] = Field(description="Quadrilateral coordinates, representing the coordinates of the top-left, top-right, bottom-right, and bottom-left points respectively")
+        score: float = Field(description="Confidence of the inference result")
+        latex: str | None = Field(description="LaTeX parsing result", default=None)
+        html: str | None = Field(description="HTML parsing result", default=None)
+
+    class PageInferenceResults(BaseModel):
+            layout_dets: list[ObjectInferenceResult] = Field(description="Page recognition results", ge=0)
+            page_info: PageInfo = Field(description="Page metadata")
+
+
+Example
+^^^^^^^^^^^
+
+.. code:: json
+
+    [
+        {
+            "layout_dets": [
+                {
+                    "category_id": 2,
+                    "poly": [
+                        99.1906967163086,
+                        100.3119125366211,
+                        730.3707885742188,
+                        100.3119125366211,
+                        730.3707885742188,
+                        245.81326293945312,
+                        99.1906967163086,
+                        245.81326293945312
+                    ],
+                    "score": 0.9999997615814209
+                }
+            ],
+            "page_info": {
+                "page_no": 0,
+                "height": 2339,
+                "width": 1654
+            }
+        },
+        {
+            "layout_dets": [
+                {
+                    "category_id": 5,
+                    "poly": [
+                        99.13092803955078,
+                        2210.680419921875,
+                        497.3183898925781,
+                        2210.680419921875,
+                        497.3183898925781,
+                        2264.78076171875,
+                        99.13092803955078,
+                        2264.78076171875
+                    ],
+                    "score": 0.9999997019767761
+                }
+            ],
+            "page_info": {
+                "page_no": 1,
+                "height": 2339,
+                "width": 1654
+            }
+        }
+    ]
+
+The format of the poly coordinates is [x0, y0, x1, y1, x2, y2, x3, y3],
+representing the coordinates of the top-left, top-right, bottom-right,
+and bottom-left points respectively. |Poly Coordinate Diagram|
+
+
+
+Inference Result
+-------------------------
+
+
+.. code:: python
+
+    from magic_pdf.operators.models import InferenceResult
+    from magic_pdf.data.dataset import Dataset
+
+    dataset : Dataset = some_data_set    # not real dataset
+
+    # The inference results of all pages, ordered by page number, are stored in a list as the inference results of MinerU
+    model_inference_result: list[PageInferenceResults] = []
+
+    Inference_result = InferenceResult(model_inference_result, dataset)
+
+
+
+some_model.pdf
+^^^^^^^^^^^^^^^^^^^^
+
+.. figure:: ../_static/image/inference_result.png
+
+
+
+.. |Poly Coordinate Diagram| image:: ../_static/image/poly.png
--- a/next_docs/en/user_guide/install.rst
+++ b/next_docs/en/user_guide/install.rst
@@ -8,5 +8,5 @@ Installation
   install/install
   install//boost_with_cuda
   install/download_model_weight_files
-
+   install/config

--- a/next_docs/en/user_guide/install/boost_with_cuda.rst
+++ b/next_docs/en/user_guide/install/boost_with_cuda.rst
@@ -9,25 +9,7 @@ appropriate guide based on your system:

 -  :ref:`ubuntu_22_04_lts_section`
 -  :ref:`windows_10_or_11_section`
-  Quick Deployment with Docker

-.. admonition:: Important
-   :class: tip
-
-   Docker requires a GPU with at least 16GB of VRAM, and all acceleration features are enabled by default.
-
-   Before running this Docker, you can use the following command to check if your device supports CUDA acceleration on Docker. 
-
-   .. code-block:: bash
-
-      bash  docker run --rm --gpus=all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
-
-.. code:: sh
-
-   wget https://github.com/opendatalab/MinerU/raw/master/Dockerfile
-   docker build -t mineru:latest .
-   docker run --rm -it --gpus=all mineru:latest /bin/bash
-   magic-pdf --help

 .. _ubuntu_22_04_lts_section:


--- a/next_docs/en/user_guide/install/config.rst
+++ b/next_docs/en/user_guide/install/config.rst
+
+
+Config
+=========
+
+File **magic-pdf.json** is typically located in the **${HOME}** directory under a Linux system or in the **C:\Users\{username}** directory under a Windows system.
+
+.. admonition:: Tip 
+    :class: tip
+
+    You can override the default location of config file via the following command:
+    
+    export MINERU_TOOLS_CONFIG_JSON=new_magic_pdf.json
+
+
+
+magic-pdf.json
+----------------
+
+.. code:: json 
+
+    {
+        "bucket_info":{
+            "bucket-name-1":["ak", "sk", "endpoint"],
+            "bucket-name-2":["ak", "sk", "endpoint"]
+        },
+        "models-dir":"/tmp/models",
+        "layoutreader-model-dir":"/tmp/layoutreader",
+        "device-mode":"cpu",
+        "layout-config": {
+            "model": "layoutlmv3"
+        },
+        "formula-config": {
+            "mfd_model": "yolo_v8_mfd",
+            "mfr_model": "unimernet_small",
+            "enable": true
+        },
+        "table-config": {
+            "model": "rapid_table",
+            "enable": false,
+            "max_time": 400    
+        },
+        "config_version": "1.0.0"
+    }
+
+
+
+
+bucket_info
+^^^^^^^^^^^^^^
+Store the access_key, secret_key and endpoint of AWS S3 Compatible storage config
+
+Example: 
+
+.. code:: text
+
+        {
+            "image_bucket":[{access_key}, {secret_key}, {endpoint}],
+            "video_bucket":[{access_key}, {secret_key}, {endpoint}]
+        }
+
+
+models-dir
+^^^^^^^^^^^^
+
+Store the models download from **huggingface** or **modelshop**. You do not need to modify this field if you download the model using the scripts shipped with **MinerU**
+
+
+layoutreader-model-dir
+^^^^^^^^^^^^^^^^^^^^^^^
+
+Store the models download from **huggingface** or **modelshop**. You do not need to modify this field if you download the model using the scripts shipped with **MinerU**
+
+
+devide-mode
+^^^^^^^^^^^^^^
+
+This field have two options, **cpu** or **cuda**.
+
+**cpu**: inference via cpu
+
+**cuda**: using cuda to accelerate inference
+
+
+layout-config 
+^^^^^^^^^^^^^^^
+
+.. code:: json
+
+    {
+        "model": "layoutlmv3"  
+    }
+
+layout model can not be disabled now, And we have only kind of layout model currently.
+
+
+formula-config
+^^^^^^^^^^^^^^^^
+
+.. code:: json
+
+    {
+        "mfd_model": "yolo_v8_mfd",   
+        "mfr_model": "unimernet_small",
+        "enable": true 
+    }
+
+
+mfd_model
+""""""""""
+
+Specify the formula detection model, options are ['yolo_v8_mfd']
+
+
+mfr_model
+""""""""""
+Specify the formula recognition model, options are ['unimernet_small']
+
+Check `UniMERNet <https://github.com/opendatalab/UniMERNet>`_ for more details
+
+
+enable
+""""""""
+
+on-off flag, options are [true, false]. **true** means enable formula inference, **false** means disable formula inference
+
+
+table-config
+^^^^^^^^^^^^^^^^
+
+.. code:: json
+
+   {
+        "model": "rapid_table",
+        "enable": false,
+        "max_time": 400    
+    }
+
+model
+""""""""
+
+Specify the table inference model, options are ['rapid_table', 'tablemaster', 'struct_eqtable']
+
+
+max_time
+"""""""""
+
+Since table recognition is a time-consuming process, we set a timeout period. If the process exceeds this time, the table recognition will be terminated.
+
+
+
+enable
+"""""""
+
+on-off flag, options are [true, false]. **true** means enable table inference, **false** means disable table inference
+
+
+config_version
+^^^^^^^^^^^^^^^^
+
+The version of config schema.
+
+
+.. admonition:: Tip
+    :class: tip
+    
+    Check `Config Schema <https://github.com/opendatalab/MinerU/blob/master/magic-pdf.template.json>`_ for the latest details
+
--- a/next_docs/en/user_guide/install/install.rst
+++ b/next_docs/en/user_guide/install/install.rst
@@ -4,6 +4,7 @@ Install
 If you encounter any installation issues, please first consult the :doc:`../../additional_notes/faq`.
 If the parsing results are not as expected, refer to the :doc:`../../additional_notes/known_issues`.

+Also you can try `online demo <https://www.modelscope.cn/studios/OpenDataLab/MinerU>`_ without installation.

 .. admonition:: Warning
    :class: tip
@@ -88,7 +89,7 @@ If the parsing results are not as expected, refer to the :doc:`../../additional_


 Create an environment
-~~~~~~~~~~~~~~~~~~~~~
+---------------------------

 .. code-block:: shell

@@ -98,7 +99,7 @@ Create an environment


 Download model weight files
-~~~~~~~~~~~~~~~~~~~~~~~~~~
+------------------------------

 .. code-block:: shell

@@ -107,4 +108,32 @@ Download model weight files
    python download_models_hf.py    


-The MinerU is installed, Check out :doc:`../quick_start` or reading :doc:`boost_with_cuda` for accelerate inference
\ No newline at end of file
+
+Install LibreOffice[Optional]
+----------------------------------
+
+This section is required for handle **doc**, **docx**, **ppt**, **pptx** filetype, You can **skip** this section if no need for those filetype processing.
+
+
+Linux/Macos Platform
+""""""""""""""""""""""
+
+.. code::
+
+    apt-get/yum/brew install libreoffice
+
+
+Windows Platform 
+""""""""""""""""""""
+
+.. code::
+
+    install libreoffice 
+    append "install_dir\LibreOffice\program" to ENVIRONMENT PATH
+
+
+.. tip::
+
+    The MinerU is installed, Check out :doc:`../usage/command_line` to convert your first pdf **or** reading the following sections for more details about install
+
+
--- a/next_docs/en/user_guide/pipe_result.rst
+++ b/next_docs/en/user_guide/pipe_result.rst
+
+
+Pipe Result
+==============
+
+.. admonition:: Tip
+    :class: tip
+
+    Please first navigate to :doc:`tutorial/pipeline` to get an initial understanding of how the pipeline works; this will help in understanding the content of this section.
+
+
+The **PipeResult** class is a container for storing pipeline processing results and implements a series of methods related to these results, such as draw_layout, draw_span.
+Checkout :doc:`../api/pipe_operators` for more details about **PipeResult**
+
+
+
+Structure Definitions
+-------------------------------
+
+**some_pdf_middle.json**
+
+----------------+--------------------------------------------------------------+
+| Field Name     | Description                                                  |
+|                |                                                              |
+================+==============================================================+
+| pdf_info       | list, each element is a dict representing the parsing result |
+|                | of each PDF page, see the table below for details            |
+----------------+--------------------------------------------------------------+
+| \_             | ocr \| txt, used to indicate the mode used in this           |
+| parse_type     | intermediate parsing state                                   |
+|                |                                                              |
+----------------+--------------------------------------------------------------+
+| \_version_name | string, indicates the version of magic-pdf used in this      |
+|                | parsing                                                      |
+|                |                                                              |
+----------------+--------------------------------------------------------------+
+
+**pdf_info**
+
+Field structure description
+
+-------------------------+------------------------------------------------------------+
+| Field                   | Description                                                |
+| Name                    |                                                            |
+=========================+============================================================+
+| preproc_blocks          | Intermediate result after PDF preprocessing, not yet       |
+|                         | segmented                                                  |
+-------------------------+------------------------------------------------------------+
+| layout_bboxes           | Layout segmentation results, containing layout direction   |
+|                         | (vertical, horizontal), and bbox, sorted by reading order  |
+-------------------------+------------------------------------------------------------+
+| page_idx                | Page number, starting from 0                               |
+|                         |                                                            |
+-------------------------+------------------------------------------------------------+
+| page_size               | Page width and height                                      |
+|                         |                                                            |
+-------------------------+------------------------------------------------------------+
+| \_layout_tree           | Layout tree structure                                      |
+|                         |                                                            |
+-------------------------+------------------------------------------------------------+
+| images                  | list, each element is a dict representing an img_block     |
+-------------------------+------------------------------------------------------------+
+| tables                  | list, each element is a dict representing a table_block    |
+-------------------------+------------------------------------------------------------+
+| interline_equation      | list, each element is a dict representing an               |
+|                         | interline_equation_block                                   |
+|                         |                                                            |
+-------------------------+------------------------------------------------------------+
+| discarded_blocks        | List, block information returned by the model that needs   |
+|                         | to be dropped                                              |
+|                         |                                                            |
+-------------------------+------------------------------------------------------------+
+| para_blocks             | Result after segmenting preproc_blocks                     |
+|                         |                                                            |
+-------------------------+------------------------------------------------------------+
+
+In the above table, ``para_blocks`` is an array of dicts, each dict
+representing a block structure. A block can support up to one level of
+nesting.
+
+**block**
+
+The outer block is referred to as a first-level block, and the fields in
+the first-level block include:
+
+------------------------+-------------------------------------------------------------+
+| Field                  | Description                                                 |
+| Name                   |                                                             |
+========================+=============================================================+
+| type                   | Block type (table|image)                                    |
+------------------------+-------------------------------------------------------------+
+| bbox                   | Block bounding box coordinates                              |
+------------------------+-------------------------------------------------------------+
+| blocks                 | list, each element is a dict representing a second-level    |
+|                        | block                                                       |
+------------------------+-------------------------------------------------------------+
+
+There are only two types of first-level blocks: “table” and “image”. All
+other blocks are second-level blocks.
+
+The fields in a second-level block include:
+
+----------------------+----------------------------------------------------------------+
+| Field                | Description                                                    |
+| Name                 |                                                                |
+======================+================================================================+
+|                      | Block type                                                     |
+| type                 |                                                                |
+----------------------+----------------------------------------------------------------+
+|                      | Block bounding box coordinates                                 |
+| bbox                 |                                                                |
+----------------------+----------------------------------------------------------------+
+|                      | list, each element is a dict representing a line, used to      |
+| lines                | describe the composition of a line of information              |
+----------------------+----------------------------------------------------------------+
+
+Detailed explanation of second-level block types
+
+================== ======================
+type               Description
+================== ======================
+image_body         Main body of the image
+image_caption      Image description text
+table_body         Main body of the table
+table_caption      Table description text
+table_footnote     Table footnote
+text               Text block
+title              Title block
+interline_equation Block formula
+================== ======================
+
+**line**
+
+The field format of a line is as follows:
+
+---------------------+----------------------------------------------------------------+
+| Field               | Description                                                    |
+| Name                |                                                                |
+=====================+================================================================+
+|                     | Bounding box coordinates of the line                           |
+| bbox                |                                                                |
+---------------------+----------------------------------------------------------------+
+| spans               | list, each element is a dict representing a span, used to      |
+|                     | describe the composition of the smallest unit                  |
+---------------------+----------------------------------------------------------------+
+
+**span**
+
+---------------------+-----------------------------------------------------------+
+| Field               | Description                                               |
+| Name                |                                                           |
+=====================+===========================================================+
+| bbox                | Bounding box coordinates of the span                      |
+---------------------+-----------------------------------------------------------+
+| type                | Type of the span                                          |
+---------------------+-----------------------------------------------------------+
+| content             | Text spans use content, chart spans use img_path to store |
+| \|                  | the actual text or screenshot path information            |
+| img_path            |                                                           |
+---------------------+-----------------------------------------------------------+
+
+The types of spans are as follows:
+
+================== ==============
+type               Description
+================== ==============
+image              Image
+table              Table
+text               Text
+inline_equation    Inline formula
+interline_equation Block formula
+================== ==============
+
+**Summary**
+
+A span is the smallest storage unit for all elements.
+
+The elements stored within para_blocks are block information.
+
+The block structure is as follows:
+
+First-level block (if any) -> Second-level block -> Line -> Span
+
+.. _example-1:
+
+example
+^^^^^^^
+
+.. code:: json
+
+   {
+       "pdf_info": [
+           {
+               "preproc_blocks": [
+                   {
+                       "type": "text",
+                       "bbox": [
+                           52,
+                           61.956024169921875,
+                           294,
+                           82.99800872802734
+                       ],
+                       "lines": [
+                           {
+                               "bbox": [
+                                   52,
+                                   61.956024169921875,
+                                   294,
+                                   72.0000228881836
+                               ],
+                               "spans": [
+                                   {
+                                       "bbox": [
+                                           54.0,
+                                           61.956024169921875,
+                                           296.2261657714844,
+                                           72.0000228881836
+                                       ],
+                                       "content": "dependent on the service headway and the reliability of the departure ",
+                                       "type": "text",
+                                       "score": 1.0
+                                   }
+                               ]
+                           }
+                       ]
+                   }
+               ],
+               "layout_bboxes": [
+                   {
+                       "layout_bbox": [
+                           52,
+                           61,
+                           294,
+                           731
+                       ],
+                       "layout_label": "V",
+                       "sub_layout": []
+                   }
+               ],
+               "page_idx": 0,
+               "page_size": [
+                   612.0,
+                   792.0
+               ],
+               "_layout_tree": [],
+               "images": [],
+               "tables": [],
+               "interline_equations": [],
+               "discarded_blocks": [],
+               "para_blocks": [
+                   {
+                       "type": "text",
+                       "bbox": [
+                           52,
+                           61.956024169921875,
+                           294,
+                           82.99800872802734
+                       ],
+                       "lines": [
+                           {
+                               "bbox": [
+                                   52,
+                                   61.956024169921875,
+                                   294,
+                                   72.0000228881836
+                               ],
+                               "spans": [
+                                   {
+                                       "bbox": [
+                                           54.0,
+                                           61.956024169921875,
+                                           296.2261657714844,
+                                           72.0000228881836
+                                       ],
+                                       "content": "dependent on the service headway and the reliability of the departure ",
+                                       "type": "text",
+                                       "score": 1.0
+                                   }
+                               ]
+                           }
+                       ]
+                   }
+               ]
+           }
+       ],
+       "_parse_type": "txt",
+       "_version_name": "0.6.1"
+   }
+
+
+Pipeline Result
+------------------
+
+.. code:: python
+
+    from magic_pdf.pdf_parse_union_core_v2 import pdf_parse_union
+    from magic_pdf.operators.pipes import PipeResult
+    from magic_pdf.data.dataset import Dataset
+
+    res = pdf_parse_union(*args, **kwargs)
+    res['_parse_type'] = PARSE_TYPE_OCR
+    res['_version_name'] = __version__
+    if 'lang' in kwargs and kwargs['lang'] is not None:
+        res['lang'] = kwargs['lang']
+
+    dataset : Dataset = some_dataset   # not real dataset
+    pipeResult = PipeResult(res, dataset)
+
+
+
+some_pdf_layout.pdf
+~~~~~~~~~~~~~~~~~~~
+
+Each page layout consists of one or more boxes. The number at the top
+left of each box indicates its sequence number. Additionally, in
+``layout.pdf``, different content blocks are highlighted with different
+background colors.
+
+.. figure:: ../_static/image/layout_example.png
+   :alt: layout example
+
+   layout example
+
+some_pdf_spans.pdf
+~~~~~~~~~~~~~~~~~~
+
+All spans on the page are drawn with different colored line frames
+according to the span type. This file can be used for quality control,
+allowing for quick identification of issues such as missing text or
+unrecognized inline formulas.
+
+.. figure:: ../_static/image/spans_example.png
+   :alt: spans example
+
+   spans example
--- a/next_docs/en/user_guide/quick_start.rst
+++ b/next_docs/en/user_guide/quick_start.rst
@@ -2,12 +2,11 @@
 Quick Start 
 ==============

-Eager to get started? This page gives a good introduction to MinerU. Follow Installation to set up a project and install MinerU first.
-
+Want to learn about the usage methods under different scenarios ? This page gives good examples about multiple usage cases match your needs.

 .. toctree::
    :maxdepth: 1

-    quick_start/command_line
-    quick_start/to_markdown
-
+    quick_start/convert_pdf 
+    quick_start/convert_image
+    quick_start/convert_ms_office
--- a/next_docs/en/user_guide/quick_start/convert_image.rst
+++ b/next_docs/en/user_guide/quick_start/convert_image.rst
+
+
+Convert Image
+===============
+
+
+Command Line
+^^^^^^^^^^^^^
+
+.. code:: python
+
+    # make sure the file have correct suffix
+    magic-pdf -p a.png -o output -m auto
+
+
+API
+^^^^^^
+
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.data.read_api import read_local_images
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # proc
+    ## Create Dataset Instance
+    input_file = "some_image.jpg"       # replace with real image file
+
+    input_file_name = input_file.split(".")[0]
+    ds = read_local_images(input_file)[0]
+
+    # ocr mode
+    ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
+        md_writer, f"{input_file_name}.md", image_dir
+    )
--- a/next_docs/en/user_guide/quick_start/convert_ms_office.rst
+++ b/next_docs/en/user_guide/quick_start/convert_ms_office.rst
+
+
+Convert Doc
+=============
+
+.. admonition:: Warning
+    :class: tip
+
+    When processing MS-Office files, we first use third-party software to convert the MS-Office files to PDF.
+
+    For certain MS-Office files, the quality of the converted PDF files may not be very high, which can affect the quality of the final output.
+
+
+
+Command Line
+^^^^^^^^^^^^^
+
+.. code:: python
+
+    # replace with real ms-office file, we support MS-DOC, MS-DOCX, MS-PPT, MS-PPTX now
+    magic-pdf -p a.doc -o output -m auto
+
+
+API
+^^^^^^^^
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.data.read_api import read_local_office
+    from magic_pdf.config.enums import SupportedPdfParseMethod
+
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # proc
+    ## Create Dataset Instance
+    input_file = "some_doc.doc"     # replace with real ms-office file, we support MS-DOC, MS-DOCX, MS-PPT, MS-PPTX now
+
+    input_file_name = input_file.split(".")[0]
+    ds = read_local_office(input_file)[0]
+
+
+    ## inference
+    if ds.classify() == SupportedPdfParseMethod.OCR:
+        ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
+        md_writer, f"{input_file_name}.md", image_dir)
+    else:
+        ds.apply(doc_analyze, ocr=False).pipe_txt_mode(image_writer).dump_md(
+        md_writer, f"{input_file_name}.md", image_dir)
--- a/next_docs/en/user_guide/quick_start/convert_pdf.rst
+++ b/next_docs/en/user_guide/quick_start/convert_pdf.rst
+
+
+Convert PDF
+============
+
+Command Line
+^^^^^^^^^^^^^
+
+.. code:: python
+
+    # make sure the file have correct suffix
+    magic-pdf -p a.pdf -o output -m auto
+
+
+API
+^^^^^^
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
+    from magic_pdf.data.dataset import PymuDocDataset
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+
+    # args
+    pdf_file_name = "abc.pdf"  # replace with the real pdf path
+    name_without_suff = pdf_file_name.split(".")[0]
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # read bytes
+    reader1 = FileBasedDataReader("")
+    pdf_bytes = reader1.read(pdf_file_name)  # read the pdf content
+
+    # proc
+    ## Create Dataset Instance
+    ds = PymuDocDataset(pdf_bytes)
+
+    ## inference
+    if ds.classify() == SupportedPdfParseMethod.OCR:
+        ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
+        md_writer, f"{name_without_suff}.md", image_dir
+    )
+
+    else:
+        ds.apply(doc_analyze, ocr=False).pipe_txt_mode(image_writer).dump_md(
+        md_writer, f"{name_without_suff}.md", image_dir
+    )
--- a/next_docs/en/user_guide/tutorial.rst
+++ b/next_docs/en/user_guide/tutorial.rst
@@ -7,6 +7,5 @@ From the beginning to the end, Show how to using mineru via a minimal project
 .. toctree::
    :maxdepth: 1

-    tutorial/output_file_description
    tutorial/pipeline

--- a/next_docs/en/user_guide/tutorial/pipeline.rst
+++ b/next_docs/en/user_guide/tutorial/pipeline.rst
@@ -28,7 +28,6 @@ Minimal Example
    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
        local_md_dir
    )
-    image_dir = str(os.path.basename(local_image_dir))

    # read bytes
    reader1 = FileBasedDataReader("")
@@ -85,8 +84,6 @@ These stages are linked together through methods like ``apply``, ``doc_analyze``
 .. admonition:: Tip
    :class: tip

-    For more examples on how to use ``Dataset``, ``InferenceResult``, and ``PipeResult``, please refer to :doc:`../quick_start/to_markdown`
-
    For more detailed information about ``Dataset``, ``InferenceResult``, and ``PipeResult``, please refer to :doc:`../../api/dataset`, :doc:`../../api/model_operators`, :doc:`../../api/pipe_operators`



--- a/next_docs/en/user_guide/usage.rst
+++ b/next_docs/en/user_guide/usage.rst
+
+
+Usage
+========
+
+.. toctree::
+   :maxdepth: 1
+
+   usage/command_line
+   usage/api
+   usage/docker
+
--- a/next_docs/en/user_guide/usage/api.rst
+++ b/next_docs/en/user_guide/usage/api.rst
+
+Api Usage
+===========
+
+
+PDF
+----
+
+Local File Example
+^^^^^^^^^^^^^^^^^^
+
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
+    from magic_pdf.data.dataset import PymuDocDataset
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.config.enums import SupportedPdfParseMethod
+
+    # args
+    pdf_file_name = "abc.pdf"  # replace with the real pdf path
+    name_without_suff = pdf_file_name.split(".")[0]
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # read bytes
+    reader1 = FileBasedDataReader("")
+    pdf_bytes = reader1.read(pdf_file_name)  # read the pdf content
+
+    # proc
+    ## Create Dataset Instance
+    ds = PymuDocDataset(pdf_bytes)
+
+    ## inference
+    if ds.classify() == SupportedPdfParseMethod.OCR:
+        infer_result = ds.apply(doc_analyze, ocr=True)
+
+        ## pipeline
+        pipe_result = infer_result.pipe_ocr_mode(image_writer)
+
+    else:
+        infer_result = ds.apply(doc_analyze, ocr=False)
+
+        ## pipeline
+        pipe_result = infer_result.pipe_txt_mode(image_writer)
+
+    ### draw model result on each page
+    infer_result.draw_model(os.path.join(local_md_dir, f"{name_without_suff}_model.pdf"))
+
+    ### get model inference result
+    model_inference_result = infer_result.get_infer_res()
+
+    ### draw layout result on each page
+    pipe_result.draw_layout(os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf"))
+
+    ### draw spans result on each page
+    pipe_result.draw_span(os.path.join(local_md_dir, f"{name_without_suff}_spans.pdf"))
+
+    ### get markdown content
+    md_content = pipe_result.get_markdown(image_dir)
+
+    ### dump markdown
+    pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
+
+    ### get content list content
+    content_list_content = pipe_result.get_content_list(image_dir)
+
+    ### dump content list
+    pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)
+
+    ### get middle json
+    middle_json_content = pipe_result.get_middle_json()
+
+    ### dump middle json
+    pipe_result.dump_middle_json(md_writer, f'{name_without_suff}_middle.json')
+
+
+
+S3 File Example
+^^^^^^^^^^^^^^^^
+
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import S3DataReader, S3DataWriter
+    from magic_pdf.data.dataset import PymuDocDataset
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.config.enums import SupportedPdfParseMethod
+
+    bucket_name = "{Your S3 Bucket Name}"  # replace with real bucket name
+    ak = "{Your S3 access key}"  # replace with real s3 access key
+    sk = "{Your S3 secret key}"  # replace with real s3 secret key
+    endpoint_url = "{Your S3 endpoint_url}"  # replace with real s3 endpoint_url
+
+    reader = S3DataReader('unittest/tmp/', bucket_name, ak, sk, endpoint_url)  # replace `unittest/tmp` with the real s3 prefix
+    writer = S3DataWriter('unittest/tmp', bucket_name, ak, sk, endpoint_url)
+    image_writer = S3DataWriter('unittest/tmp/images', bucket_name, ak, sk, endpoint_url)
+    md_writer = S3DataWriter('unittest/tmp', bucket_name, ak, sk, endpoint_url)
+
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    # args
+    pdf_file_name = (
+        f"s3://{bucket_name}/unittest/tmp/bug5-11.pdf"  # replace with the real s3 path
+    )
+
+    # prepare env
+    local_dir = "output"
+    name_without_suff = os.path.basename(pdf_file_name).split(".")[0]
+
+    # read bytes
+    pdf_bytes = reader.read(pdf_file_name)  # read the pdf content
+
+    # proc
+    ## Create Dataset Instance
+    ds = PymuDocDataset(pdf_bytes)
+
+    ## inference
+    if ds.classify() == SupportedPdfParseMethod.OCR:
+        infer_result = ds.apply(doc_analyze, ocr=True)
+
+        ## pipeline
+        pipe_result = infer_result.pipe_ocr_mode(image_writer)
+
+    else:
+        infer_result = ds.apply(doc_analyze, ocr=False)
+
+        ## pipeline
+        pipe_result = infer_result.pipe_txt_mode(image_writer)
+
+    ### draw model result on each page
+    infer_result.draw_model(os.path.join(local_md_dir, f"{name_without_suff}_model.pdf"))
+
+    ### get model inference result
+    model_inference_result = infer_result.get_infer_res()
+
+    ### draw layout result on each page
+    pipe_result.draw_layout(os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf"))
+
+    ### draw spans result on each page
+    pipe_result.draw_span(os.path.join(local_md_dir, f"{name_without_suff}_spans.pdf"))
+
+    ### dump markdown
+    pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
+
+    ### dump content list
+    pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)
+
+    ### get markdown content
+    md_content = pipe_result.get_markdown(image_dir)
+
+    ### get content list content
+    content_list_content = pipe_result.get_content_list(image_dir)
+
+    ### get middle json
+    middle_json_content = pipe_result.get_middle_json()
+
+    ### dump middle json
+    pipe_result.dump_middle_json(md_writer, f'{name_without_suff}_middle.json')
+
+MS-Office
+----------
+
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedDataReader
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.data.read_api import read_local_office
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # proc
+    ## Create Dataset Instance
+    input_file = "some_ppt.ppt"     # replace with real ms-office file
+
+    input_file_name = input_file.split(".")[0]
+    ds = read_local_office(input_file)[0]
+
+    ds.apply(doc_analyze, ocr=True).pipe_txt_mode(image_writer).dump_md(
+        md_writer, f"{input_file_name}.md", image_dir
+    )
+
+This code snippet can be used to manipulate **ppt**, **pptx**, **doc**, **docx** file
+
+
+Image
+---------
+
+Single Image File
+^^^^^^^^^^^^^^^^^^^
+
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.data.read_api import read_local_images
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # proc
+    ## Create Dataset Instance
+    input_file = "some_image.jpg"       # replace with real image file
+
+    input_file_name = input_file.split(".")[0]
+    ds = read_local_images(input_file)[0]
+
+    ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
+        md_writer, f"{input_file_name}.md", image_dir
+    )
+
+
+Directory That Contains Images
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+.. code:: python
+
+    import os
+
+    from magic_pdf.data.data_reader_writer import FileBasedDataWriter
+    from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
+    from magic_pdf.data.read_api import read_local_images
+
+    # prepare env
+    local_image_dir, local_md_dir = "output/images", "output"
+    image_dir = str(os.path.basename(local_image_dir))
+
+    os.makedirs(local_image_dir, exist_ok=True)
+
+    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
+        local_md_dir
+    )
+
+    # proc
+    ## Create Dataset Instance
+    input_directory = "some_image_dir/"       # replace with real directory that contains images
+
+
+    dss = read_local_images(input_directory, suffixes=['.png', '.jpg'])
+
+    count = 0
+    for ds in dss:
+        ds.apply(doc_analyze, ocr=True).pipe_ocr_mode(image_writer).dump_md(
+            md_writer, f"{count}.md", image_dir
+        )
+        count += 1
+
+
+Check :doc:`../data/data_reader_writer` for more [reader | writer] examples and check :doc:`../../api/pipe_operators` or :doc:`../../api/model_operators` for api details
--- a/next_docs/en/user_guide/quick_start/command_line.rst
+++ b/next_docs/en/user_guide/quick_start/command_line.rst
@@ -10,7 +10,8 @@ Command Line

   Options:
     -v, --version                display the version and exit
-     -p, --path PATH              local pdf filepath or directory  [required]
+     -p, --path PATH              local filepath or directory. support PDF, PPT,
+                                  PPTX, DOC, DOCX, PNG, JPG files  [required]
     -o, --output-dir PATH        output local directory  [required]
     -m, --method [ocr|txt|auto]  the method for parsing pdf. ocr: using ocr
                                  technique to extract information from pdf. txt:
@@ -40,6 +41,20 @@ Command Line
   ## command line example
   magic-pdf -p {some_pdf} -o {some_output_dir} -m auto

+
+.. admonition:: Important
+    :class: tip
+
+    The file must endswith with the following suffix.
+       .pdf 
+       .png
+       .jpg
+       .ppt
+       .pptx
+       .doc
+       .docx
+
+
 ``{some_pdf}`` can be a single PDF file or a directory containing
 multiple PDFs. The results will be saved in the ``{some_output_dir}``
 directory. The output file list is as follows:
@@ -57,6 +72,6 @@ directory. The output file list is as follows:

 .. admonition:: Tip
   :class: tip
+   

-   For more information about the output files, please refer to the :doc:`../tutorial/output_file_description`
-
+   For more information about the output files, please refer to the :doc:`../inference_result` or :doc:`../pipe_result`
--- a/next_docs/en/user_guide/usage/docker.rst
+++ b/next_docs/en/user_guide/usage/docker.rst
+
+
+Docker 
+=======
+
+.. admonition:: Important
+   :class: tip
+
+   Docker requires a GPU with at least 16GB of VRAM, and all acceleration features are enabled by default.
+
+   Before running this Docker, you can use the following command to check if your device supports CUDA acceleration on Docker. 
+
+   .. code-block:: bash
+
+      bash  docker run --rm --gpus=all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
+
+
+.. code:: sh
+
+   wget https://github.com/opendatalab/MinerU/raw/master/Dockerfile
+   docker build -t mineru:latest .
+   docker run --rm -it --gpus=all mineru:latest /bin/bash
+   magic-pdf --help
+
--- a/next_docs/requirements.txt
+++ b/next_docs/requirements.txt
@@ -8,6 +8,7 @@ myst-parser
 Pillow==8.4.0
 pydantic>=2.7.2,<2.8.0
 PyMuPDF>=1.24.9
+pdfminer.six==20231228
 sphinx
 sphinx-argparse>=0.5.2
 sphinx-book-theme>=1.1.3