Unverified Commit 6d571e2e authored by Kaiwen Liu's avatar Kaiwen Liu Committed by GitHub
Browse files

Merge pull request #7 from opendatalab/dev

Dev
parents a3358878 37c335ae
from loguru import logger
def ImportPIL(f):
try:
import PIL # noqa: F401
except ImportError:
logger.error('Pillow not installed, please install by pip.')
exit(1)
return f
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.10"
formats:
- epub
python:
install:
- requirements: docs/requirements.txt
sphinx:
configuration: docs/en/conf.py
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
Data Api
------------------
.. toctree::
:maxdepth: 2
api/dataset.rst
api/data_reader_writer.rst
api/read_api.rst
Data Reader Writer
--------------------
.. autoclass:: magic_pdf.data.data_reader_writer.DataReader
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.DataWriter
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.S3DataReader
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.S3DataWriter
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.FileBasedDataReader
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.FileBasedDataWriter
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.S3DataReader
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.S3DataWriter
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.MultiBucketS3DataReader
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.data_reader_writer.MultiBucketS3DataWriter
:members:
:inherited-members:
Dataset Api
------------------
.. autoclass:: magic_pdf.data.dataset.PageableData
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.dataset.Dataset
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.dataset.ImageDataset
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.dataset.PymuDocDataset
:members:
:inherited-members:
.. autoclass:: magic_pdf.data.dataset.Doc
:members:
:inherited-members:
read_api Api
------------------
.. automodule:: magic_pdf.data.read_api
:members:
:inherited-members:
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import subprocess
import sys
from sphinx.ext import autodoc
def install(package):
subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])
requirements_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt'))
if os.path.exists(requirements_path):
with open(requirements_path) as f:
packages = f.readlines()
for package in packages:
install(package.strip())
sys.path.insert(0, os.path.abspath('../..'))
# -- Project information -----------------------------------------------------
project = 'MinerU'
copyright = '2024, MinerU Contributors'
author = 'OpenDataLab'
# The full version, including alpha/beta/rc tags
version_file = '../../magic_pdf/libs/version.py'
with open(version_file) as f:
exec(compile(f.read(), version_file, 'exec'))
__version__ = locals()['__version__']
# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'sphinx.ext.intersphinx',
'sphinx_copybutton',
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'myst_parser',
'sphinxarg.ext',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r'\$ '
copybutton_prompt_is_regexp = True
language = 'en'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_book_theme'
html_logo = '_static/image/logo.png'
html_theme_options = {
'path_to_docs': 'docs/en',
'repository_url': 'https://github.com/opendatalab/MinerU',
'use_repository_button': True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Mock out external dependencies here.
autodoc_mock_imports = [
'cpuinfo',
'torch',
'transformers',
'psutil',
'prometheus_client',
'sentencepiece',
'vllm.cuda_utils',
'vllm._C',
'numpy',
'tqdm',
]
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == ' Bases: :py:class:`object`':
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
.. xtuner documentation master file, created by
sphinx-quickstart on Tue Jan 9 16:33:06 2024.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
Welcome to the MinerU Documentation
==============================================
.. figure:: ./_static/image/logo.png
:align: center
:alt: mineru
:class: no-scaled-link
.. raw:: html
<p style="text-align:center">
<strong>A one-stop, open-source, high-quality data extraction tool
</strong>
</p>
<p style="text-align:center">
<script async defer src="https://buttons.github.io/buttons.js"></script>
<a class="github-button" href="https://github.com/opendatalab/MinerU" data-show-count="true" data-size="large" aria-label="Star">Star</a>
<a class="github-button" href="https://github.com/opendatalab/MinerU/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
<a class="github-button" href="https://github.com/opendatalab/MinerU/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
</p>
API Reference
-------------
If you are looking for information on a specific function, class or
method, this part of the documentation is for you.
.. toctree::
:maxdepth: 2
api
@ECHO OFF
pushd %~dp0
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set SOURCEDIR=.
set BUILDDIR=_build
%SPHINXBUILD% >NUL 2>NUL
if errorlevel 9009 (
echo.
echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
echo.installed, then set the SPHINXBUILD environment variable to point
echo.to the full path of the 'sphinx-build' executable. Alternatively you
echo.may add the Sphinx directory to PATH.
echo.
echo.If you don't have Sphinx installed, grab it from
echo.https://www.sphinx-doc.org/
exit /b 1
)
if "%1" == "" goto help
%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
goto end
:help
%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
:end
popd
boto3>=1.28.43
loguru>=0.6.0
myst-parser
Pillow==8.4.0
pydantic>=2.7.2,<2.8.0
PyMuPDF>=1.24.9
sphinx
sphinx-argparse
sphinx-book-theme
sphinx-copybutton
sphinx_rtd_theme
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.10"
formats:
- epub
python:
install:
- requirements: docs/requirements.txt
sphinx:
configuration: docs/zh_cn/conf.py
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS ?=
SPHINXBUILD ?= sphinx-build
SOURCEDIR = .
BUILDDIR = _build
# Put it first so that "make" without argument is like "make help".
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
.PHONY: help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
# Configuration file for the Sphinx documentation builder.
#
# This file only contains a selection of the most common options. For a full
# list see the documentation:
# https://www.sphinx-doc.org/en/master/usage/configuration.html
# -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
import os
import subprocess
import sys
from sphinx.ext import autodoc
def install(package):
subprocess.check_call([sys.executable, '-m', 'pip', 'install', package])
requirements_path = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', 'requirements.txt'))
if os.path.exists(requirements_path):
with open(requirements_path) as f:
packages = f.readlines()
for package in packages:
install(package.strip())
sys.path.insert(0, os.path.abspath('../..'))
# -- Project information -----------------------------------------------------
project = 'MinerU'
copyright = '2024, OpenDataLab'
author = 'MinerU Contributors'
# The full version, including alpha/beta/rc tags
version_file = '../../magic_pdf/libs/version.py'
with open(version_file) as f:
exec(compile(f.read(), version_file, 'exec'))
__version__ = locals()['__version__']
# The short X.Y version
version = __version__
# The full version, including alpha/beta/rc tags
release = __version__
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'sphinx.ext.napoleon',
'sphinx.ext.viewcode',
'sphinx.ext.intersphinx',
'sphinx_copybutton',
'sphinx.ext.autodoc',
'sphinx.ext.autosummary',
'myst_parser',
'sphinxarg.ext',
]
# Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates']
# List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path.
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# Exclude the prompt "$" when copying code
copybutton_prompt_text = r'\$ '
copybutton_prompt_is_regexp = True
language = 'zh_CN'
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_book_theme'
html_logo = '_static/image/logo.png'
html_theme_options = {
'path_to_docs': 'docs/zh_cn',
'repository_url': 'https://github.com/opendatalab/MinerU',
'use_repository_button': True,
}
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
# html_static_path = ['_static']
# Mock out external dependencies here.
autodoc_mock_imports = [
'cpuinfo',
'torch',
'transformers',
'psutil',
'prometheus_client',
'sentencepiece',
'vllm.cuda_utils',
'vllm._C',
'numpy',
'tqdm',
]
class MockedClassDocumenter(autodoc.ClassDocumenter):
"""Remove note about base class when a class is derived from object."""
def add_line(self, line: str, source: str, *lineno: int) -> None:
if line == ' Bases: :py:class:`object`':
return
super().add_line(line, source, *lineno)
autodoc.ClassDocumenter = MockedClassDocumenter
navigation_with_keys = False
.. xtuner documentation master file, created by
sphinx-quickstart on Tue Jan 9 16:33:06 2024.
You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive.
欢迎来到 MinerU 的中文文档
==============================================
.. figure:: ./_static/image/logo.png
:align: center
:alt: mineru
:class: no-scaled-link
.. raw:: html
<p style="text-align:center">
<strong> 一站式开源高质量数据提取工具
</strong>
</p>
<p style="text-align:center">
<script async defer src="https://buttons.github.io/buttons.js"></script>
<a class="github-button" href="https://github.com/opendatalab/MinerU" data-show-count="true" data-size="large" aria-label="Star">Star</a>
<a class="github-button" href="https://github.com/opendatalab/MinerU/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
<a class="github-button" href="https://github.com/opendatalab/MinerU/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
</p>
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment