Merge pull request #1427 from opendatalab/release-1.0.0

Release 1.0.0

Merge pull request #1427 from opendatalab/release-1.0.0
Release 1.0.0
4bb54393 · Xiaomeng Zhao · GitHub · 04f084ac · 1c9f9942 · 4bb54393
Unverified Commit 4bb54393 authored Jan 10, 2025 by Xiaomeng Zhao Committed by GitHub Jan 10, 2025
20 changed files
--- a/next_docs/en/user_guide.rst
+++ b/next_docs/en/user_guide.rst
@@ -4,7 +4,9 @@
    :maxdepth: 2

    user_guide/install
+    user_guide/usage
    user_guide/quick_start
    user_guide/tutorial
    user_guide/data
-    
+    user_guide/inference_result
+    user_guide/pipe_result
--- a/next_docs/en/user_guide/data/data_reader_writer.rst
+++ b/next_docs/en/user_guide/data/data_reader_writer.rst
--- a/next_docs/en/user_guide/data/read_api.rst
+++ b/next_docs/en/user_guide/data/read_api.rst
@@ -18,24 +18,50 @@ Read the contet from jsonl which may located on local machine or remote s3. if y

 .. code:: python

-    from magic_pdf.data.io.read_api import *
+    from magic_pdf.data.read_api import *
+    from magic_pdf.data.data_reader_writer import MultiBucketS3DataReader
+    from magic_pdf.data.schemas import S3Config

-    # read jsonl from local machine 
-    datasets = read_jsonl("tt.jsonl", None)
+    # read jsonl from local machine
+    datasets = read_jsonl("tt.jsonl", None)   # replace with real jsonl file

    # read jsonl from remote s3
-    datasets = read_jsonl("s3://bucket_1/tt.jsonl", s3_reader)

+    bucket = "bucket_1"                     # replace with real s3 bucket
+    ak = "access_key_1"                     # replace with real s3 access key
+    sk = "secret_key_1"                     # replace with real s3 secret key
+    endpoint_url = "endpoint_url_1"         # replace with real s3 endpoint url
+
+    bucket_2 = "bucket_2"                   # replace with real s3 bucket
+    ak_2 = "access_key_2"                   # replace with real s3 access key
+    sk_2 = "secret_key_2"                   # replace with real s3 secret key
+    endpoint_url_2 = "endpoint_url_2"       # replace with real s3 endpoint url
+
+    s3configs = [
+        S3Config(
+            bucket_name=bucket, access_key=ak, secret_key=sk, endpoint_url=endpoint_url
+        ),
+        S3Config(
+            bucket_name=bucket_2,
+            access_key=ak_2,
+            secret_key=sk_2,
+            endpoint_url=endpoint_url_2,
+        ),
+    ]
+
+    s3_reader = MultiBucketS3DataReader(bucket, s3configs)
+
+    datasets = read_jsonl(f"s3://bucket_1/tt.jsonl", s3_reader)  # replace with real s3 jsonl file

 read_local_pdfs
-^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^

 Read pdf from path or directory.


 .. code:: python

-    from magic_pdf.data.io.read_api import *
+    from magic_pdf.data.read_api import *

    # read pdf path
    datasets = read_local_pdfs("tt.pdf")
@@ -51,13 +77,30 @@ Read images from path or directory

 .. code:: python 

-    from magic_pdf.data.io.read_api import *
+    from magic_pdf.data.read_api import *
+
+    # read from image path 
+    datasets = read_local_images("tt.png")  # replace with real file path
+
+    # read files from directory that endswith suffix in suffixes array 
+    datasets = read_local_images("images/", suffixes=[".png", ".jpg"])  # replace with real directory 
+
+
+read_local_office
+^^^^^^^^^^^^^^^^^^^^
+Read MS-Office files from path or directory
+
+.. code:: python 
+
+    from magic_pdf.data.read_api import *

    # read from image path 
-    datasets = read_local_images("tt.png")
+    datasets = read_local_office("tt.doc")  # replace with real file path

    # read files from directory that endswith suffix in suffixes array 
-    datasets = read_local_images("images/", suffixes=["png", "jpg"])
+    datasets = read_local_office("docs/")  # replace with real directory 
+
+


 Check :doc:`../../api/read_api` for more details
\ No newline at end of file
--- a/next_docs/en/user_guide/inference_result.rst
+++ b/next_docs/en/user_guide/inference_result.rst
--- a/next_docs/en/user_guide/install.rst
+++ b/next_docs/en/user_guide/install.rst
@@ -8,5 +8,5 @@ Installation
   install/install
   install//boost_with_cuda
   install/download_model_weight_files
-
+   install/config

--- a/next_docs/en/user_guide/install/boost_with_cuda.rst
+++ b/next_docs/en/user_guide/install/boost_with_cuda.rst
@@ -9,25 +9,7 @@ appropriate guide based on your system:

 -  :ref:`ubuntu_22_04_lts_section`
 -  :ref:`windows_10_or_11_section`
-  Quick Deployment with Docker

-.. admonition:: Important
-   :class: tip
-
-   Docker requires a GPU with at least 16GB of VRAM, and all acceleration features are enabled by default.
-
-   Before running this Docker, you can use the following command to check if your device supports CUDA acceleration on Docker. 
-
-   .. code-block:: bash
-
-      bash  docker run --rm --gpus=all nvidia/cuda:12.1.0-base-ubuntu22.04 nvidia-smi
-
-.. code:: sh
-
-   wget https://github.com/opendatalab/MinerU/raw/master/Dockerfile
-   docker build -t mineru:latest .
-   docker run --rm -it --gpus=all mineru:latest /bin/bash
-   magic-pdf --help

 .. _ubuntu_22_04_lts_section:


--- a/next_docs/en/user_guide/install/config.rst
+++ b/next_docs/en/user_guide/install/config.rst
--- a/next_docs/en/user_guide/install/install.rst
+++ b/next_docs/en/user_guide/install/install.rst
--- a/next_docs/en/user_guide/pipe_result.rst
+++ b/next_docs/en/user_guide/pipe_result.rst
--- a/next_docs/en/user_guide/quick_start.rst
+++ b/next_docs/en/user_guide/quick_start.rst
--- a/next_docs/en/user_guide/quick_start/convert_image.rst
+++ b/next_docs/en/user_guide/quick_start/convert_image.rst
--- a/next_docs/en/user_guide/quick_start/convert_ms_office.rst
+++ b/next_docs/en/user_guide/quick_start/convert_ms_office.rst
--- a/next_docs/en/user_guide/quick_start/convert_pdf.rst
+++ b/next_docs/en/user_guide/quick_start/convert_pdf.rst
--- a/next_docs/en/user_guide/tutorial.rst
+++ b/next_docs/en/user_guide/tutorial.rst
@@ -7,6 +7,5 @@ From the beginning to the end, Show how to using mineru via a minimal project
 .. toctree::
    :maxdepth: 1

-    tutorial/output_file_description
    tutorial/pipeline

--- a/next_docs/en/user_guide/tutorial/pipeline.rst
+++ b/next_docs/en/user_guide/tutorial/pipeline.rst
--- a/next_docs/en/user_guide/usage.rst
+++ b/next_docs/en/user_guide/usage.rst
--- a/next_docs/en/user_guide/usage/api.rst
+++ b/next_docs/en/user_guide/usage/api.rst
--- a/next_docs/en/user_guide/quick_start/command_line.rst
+++ b/next_docs/en/user_guide/quick_start/command_line.rst
--- a/next_docs/en/user_guide/usage/docker.rst
+++ b/next_docs/en/user_guide/usage/docker.rst
--- a/next_docs/requirements.txt
+++ b/next_docs/requirements.txt
@@ -8,6 +8,7 @@ myst-parser
 Pillow==8.4.0
 pydantic>=2.7.2,<2.8.0
 PyMuPDF>=1.24.9
+pdfminer.six==20231228
 sphinx
 sphinx-argparse>=0.5.2
 sphinx-book-theme>=1.1.3