Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
bcbbee8c
Unverified
Commit
bcbbee8c
authored
Jun 13, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Jun 13, 2025
Browse files
Merge pull request #2622 from myhloli/dev
Dev
parents
3cc3f754
ced5a7b4
Changes
561
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
68 additions
and
86 deletions
+68
-86
mineru/backend/vlm/vlm_analyze.py
mineru/backend/vlm/vlm_analyze.py
+10
-7
mineru/cli/client.py
mineru/cli/client.py
+4
-3
mineru/cli/common.py
mineru/cli/common.py
+1
-3
mineru/model/vlm_sglang_model/model.py
mineru/model/vlm_sglang_model/model.py
+4
-0
mineru/model/vlm_sglang_model/server.py
mineru/model/vlm_sglang_model/server.py
+15
-4
mineru/utils/config_reader.py
mineru/utils/config_reader.py
+9
-3
mineru/utils/draw_bbox.py
mineru/utils/draw_bbox.py
+4
-2
mineru/utils/models_download_utils.py
mineru/utils/models_download_utils.py
+10
-2
mineru/utils/ocr_utils.py
mineru/utils/ocr_utils.py
+11
-1
next_docs/en/.readthedocs.yaml
next_docs/en/.readthedocs.yaml
+0
-16
next_docs/en/Makefile
next_docs/en/Makefile
+0
-20
next_docs/en/_static/image/MinerU-logo-hq.png
next_docs/en/_static/image/MinerU-logo-hq.png
+0
-0
next_docs/en/_static/image/MinerU-logo.png
next_docs/en/_static/image/MinerU-logo.png
+0
-0
next_docs/en/_static/image/ReadTheDocs.svg
next_docs/en/_static/image/ReadTheDocs.svg
+0
-25
next_docs/en/_static/image/datalab_logo.png
next_docs/en/_static/image/datalab_logo.png
+0
-0
next_docs/en/_static/image/flowchart_en.png
next_docs/en/_static/image/flowchart_en.png
+0
-0
next_docs/en/_static/image/flowchart_zh_cn.png
next_docs/en/_static/image/flowchart_zh_cn.png
+0
-0
next_docs/en/_static/image/inference_result.png
next_docs/en/_static/image/inference_result.png
+0
-0
next_docs/en/_static/image/layout_example.png
next_docs/en/_static/image/layout_example.png
+0
-0
next_docs/en/_static/image/logo.png
next_docs/en/_static/image/logo.png
+0
-0
No files found.
mineru/backend/vlm/vlm_analyze.py
View file @
bcbbee8c
...
@@ -9,6 +9,7 @@ from .base_predictor import BasePredictor
...
@@ -9,6 +9,7 @@ from .base_predictor import BasePredictor
from
.predictor
import
get_predictor
from
.predictor
import
get_predictor
from
.token_to_middle_json
import
result_to_middle_json
from
.token_to_middle_json
import
result_to_middle_json
from
...utils.enum_class
import
ModelPath
from
...utils.enum_class
import
ModelPath
from
...utils.models_download_utils
import
auto_download_and_get_model_root_path
class
ModelSingleton
:
class
ModelSingleton
:
...
@@ -28,6 +29,8 @@ class ModelSingleton:
...
@@ -28,6 +29,8 @@ class ModelSingleton:
)
->
BasePredictor
:
)
->
BasePredictor
:
key
=
(
backend
,)
key
=
(
backend
,)
if
key
not
in
self
.
_models
:
if
key
not
in
self
.
_models
:
if
not
model_path
:
model_path
=
auto_download_and_get_model_root_path
(
"/"
,
"vlm"
)
self
.
_models
[
key
]
=
get_predictor
(
self
.
_models
[
key
]
=
get_predictor
(
backend
=
backend
,
backend
=
backend
,
model_path
=
model_path
,
model_path
=
model_path
,
...
@@ -40,8 +43,8 @@ def doc_analyze(
...
@@ -40,8 +43,8 @@ def doc_analyze(
pdf_bytes
,
pdf_bytes
,
image_writer
:
DataWriter
|
None
,
image_writer
:
DataWriter
|
None
,
predictor
:
BasePredictor
|
None
=
None
,
predictor
:
BasePredictor
|
None
=
None
,
backend
=
"
huggingface
"
,
backend
=
"
transformers
"
,
model_path
=
ModelPath
.
vlm_root_hf
,
model_path
:
str
|
None
=
None
,
server_url
:
str
|
None
=
None
,
server_url
:
str
|
None
=
None
,
):
):
if
predictor
is
None
:
if
predictor
is
None
:
...
@@ -53,10 +56,10 @@ def doc_analyze(
...
@@ -53,10 +56,10 @@ def doc_analyze(
# load_images_time = round(time.time() - load_images_start, 2)
# load_images_time = round(time.time() - load_images_start, 2)
# logger.info(f"load images cost: {load_images_time}, speed: {round(len(images_base64_list)/load_images_time, 3)} images/s")
# logger.info(f"load images cost: {load_images_time}, speed: {round(len(images_base64_list)/load_images_time, 3)} images/s")
infer_start
=
time
.
time
()
#
infer_start = time.time()
results
=
predictor
.
batch_predict
(
images
=
images_base64_list
)
results
=
predictor
.
batch_predict
(
images
=
images_base64_list
)
infer_time
=
round
(
time
.
time
()
-
infer_start
,
2
)
#
infer_time = round(time.time() - infer_start, 2)
logger
.
info
(
f
"infer finished, cost:
{
infer_time
}
, speed:
{
round
(
len
(
results
)
/
infer_time
,
3
)
}
page/s"
)
#
logger.info(f"infer finished, cost: {infer_time}, speed: {round(len(results)/infer_time, 3)} page/s")
middle_json
=
result_to_middle_json
(
results
,
images_list
,
pdf_doc
,
image_writer
)
middle_json
=
result_to_middle_json
(
results
,
images_list
,
pdf_doc
,
image_writer
)
return
middle_json
,
results
return
middle_json
,
results
...
@@ -66,8 +69,8 @@ async def aio_doc_analyze(
...
@@ -66,8 +69,8 @@ async def aio_doc_analyze(
pdf_bytes
,
pdf_bytes
,
image_writer
:
DataWriter
|
None
,
image_writer
:
DataWriter
|
None
,
predictor
:
BasePredictor
|
None
=
None
,
predictor
:
BasePredictor
|
None
=
None
,
backend
=
"
huggingface
"
,
backend
=
"
transformers
"
,
model_path
=
ModelPath
.
vlm_root_hf
,
model_path
:
str
|
None
=
None
,
server_url
:
str
|
None
=
None
,
server_url
:
str
|
None
=
None
,
):
):
if
predictor
is
None
:
if
predictor
is
None
:
...
...
mineru/cli/client.py
View file @
bcbbee8c
...
@@ -41,17 +41,18 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
...
@@ -41,17 +41,18 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
auto: Automatically determine the method based on the file type.
auto: Automatically determine the method based on the file type.
txt: Use text extraction method.
txt: Use text extraction method.
ocr: Use OCR method for image-based PDFs.
ocr: Use OCR method for image-based PDFs.
Without method specified, 'auto' will be used by default."""
,
Without method specified, 'auto' will be used by default.
Adapted only for the case where the backend is set to "pipeline"."""
,
default
=
'auto'
,
default
=
'auto'
,
)
)
@
click
.
option
(
@
click
.
option
(
'-b'
,
'-b'
,
'--backend'
,
'--backend'
,
'backend'
,
'backend'
,
type
=
click
.
Choice
([
'pipeline'
,
'vlm-
huggingface
'
,
'vlm-sglang-engine'
,
'vlm-sglang-client'
]),
type
=
click
.
Choice
([
'pipeline'
,
'vlm-
transformers
'
,
'vlm-sglang-engine'
,
'vlm-sglang-client'
]),
help
=
"""the backend for parsing pdf:
help
=
"""the backend for parsing pdf:
pipeline: More general.
pipeline: More general.
vlm-
huggingface
: More general.
vlm-
transformers
: More general.
vlm-sglang-engine: Faster(engine).
vlm-sglang-engine: Faster(engine).
vlm-sglang-client: Faster(client).
vlm-sglang-client: Faster(client).
without method specified, pipeline will be used by default."""
,
without method specified, pipeline will be used by default."""
,
...
...
mineru/cli/common.py
View file @
bcbbee8c
...
@@ -16,7 +16,6 @@ from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc
...
@@ -16,7 +16,6 @@ from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc
from
mineru.data.data_reader_writer
import
FileBasedDataWriter
from
mineru.data.data_reader_writer
import
FileBasedDataWriter
from
mineru.utils.draw_bbox
import
draw_layout_bbox
,
draw_span_bbox
from
mineru.utils.draw_bbox
import
draw_layout_bbox
,
draw_span_bbox
from
mineru.utils.enum_class
import
MakeMode
from
mineru.utils.enum_class
import
MakeMode
from
mineru.utils.models_download_utils
import
auto_download_and_get_model_root_path
from
mineru.utils.pdf_image_tools
import
images_bytes_to_pdf_bytes
from
mineru.utils.pdf_image_tools
import
images_bytes_to_pdf_bytes
pdf_suffixes
=
[
".pdf"
]
pdf_suffixes
=
[
".pdf"
]
...
@@ -173,8 +172,7 @@ def do_parse(
...
@@ -173,8 +172,7 @@ def do_parse(
pdf_bytes
=
convert_pdf_bytes_to_bytes_by_pypdfium2
(
pdf_bytes
,
start_page_id
,
end_page_id
)
pdf_bytes
=
convert_pdf_bytes_to_bytes_by_pypdfium2
(
pdf_bytes
,
start_page_id
,
end_page_id
)
local_image_dir
,
local_md_dir
=
prepare_env
(
output_dir
,
pdf_file_name
,
parse_method
)
local_image_dir
,
local_md_dir
=
prepare_env
(
output_dir
,
pdf_file_name
,
parse_method
)
image_writer
,
md_writer
=
FileBasedDataWriter
(
local_image_dir
),
FileBasedDataWriter
(
local_md_dir
)
image_writer
,
md_writer
=
FileBasedDataWriter
(
local_image_dir
),
FileBasedDataWriter
(
local_md_dir
)
model_path
=
auto_download_and_get_model_root_path
(
'/'
,
'vlm'
)
middle_json
,
infer_result
=
vlm_doc_analyze
(
pdf_bytes
,
image_writer
=
image_writer
,
backend
=
backend
,
server_url
=
server_url
)
middle_json
,
infer_result
=
vlm_doc_analyze
(
pdf_bytes
,
image_writer
=
image_writer
,
backend
=
backend
,
model_path
=
model_path
,
server_url
=
server_url
)
pdf_info
=
middle_json
[
"pdf_info"
]
pdf_info
=
middle_json
[
"pdf_info"
]
...
...
mineru/model/vlm_sglang_model/model.py
View file @
bcbbee8c
...
@@ -22,6 +22,7 @@ from transformers import (
...
@@ -22,6 +22,7 @@ from transformers import (
from
..vlm_hf_model.configuration_mineru2
import
Mineru2QwenConfig
from
..vlm_hf_model.configuration_mineru2
import
Mineru2QwenConfig
from
..vlm_hf_model.modeling_mineru2
import
build_vision_projector
from
..vlm_hf_model.modeling_mineru2
import
build_vision_projector
from
...utils.models_download_utils
import
auto_download_and_get_model_root_path
def
flatten_nested_list
(
nested_list
):
def
flatten_nested_list
(
nested_list
):
...
@@ -61,6 +62,9 @@ class Mineru2QwenForCausalLM(nn.Module):
...
@@ -61,6 +62,9 @@ class Mineru2QwenForCausalLM(nn.Module):
# load vision tower
# load vision tower
mm_vision_tower
=
self
.
config
.
mm_vision_tower
mm_vision_tower
=
self
.
config
.
mm_vision_tower
model_root_path
=
auto_download_and_get_model_root_path
(
"/"
,
"vlm"
)
mm_vision_tower
=
f
"
{
model_root_path
}
/
{
mm_vision_tower
}
"
if
"clip"
in
mm_vision_tower
:
if
"clip"
in
mm_vision_tower
:
vision_config
=
CLIPVisionConfig
.
from_pretrained
(
mm_vision_tower
)
vision_config
=
CLIPVisionConfig
.
from_pretrained
(
mm_vision_tower
)
self
.
vision_tower
=
CLIPVisionModel
(
vision_config
)
# type: ignore
self
.
vision_tower
=
CLIPVisionModel
(
vision_config
)
# type: ignore
...
...
mineru/model/vlm_sglang_model/server.py
View file @
bcbbee8c
...
@@ -27,16 +27,27 @@ async def custom_generate_request(obj: GenerateReqInput, request: Request):
...
@@ -27,16 +27,27 @@ async def custom_generate_request(obj: GenerateReqInput, request: Request):
def
main
():
def
main
():
server_args
=
prepare_server_args
(
sys
.
argv
[
1
:])
# 检查命令行参数中是否包含--model-path
args
=
sys
.
argv
[
1
:]
has_model_path_arg
=
False
for
i
,
arg
in
enumerate
(
args
):
if
arg
==
"--model-path"
or
arg
.
startswith
(
"--model-path="
):
has_model_path_arg
=
True
break
# 如果没有--model-path参数,在参数列表中添加它
if
not
has_model_path_arg
:
default_path
=
auto_download_and_get_model_root_path
(
"/"
,
"vlm"
)
args
.
extend
([
"--model-path"
,
default_path
])
server_args
=
prepare_server_args
(
args
)
if
server_args
.
chat_template
is
None
:
if
server_args
.
chat_template
is
None
:
server_args
.
chat_template
=
"chatml"
server_args
.
chat_template
=
"chatml"
server_args
.
enable_custom_logit_processor
=
True
server_args
.
enable_custom_logit_processor
=
True
if
server_args
.
model_path
is
None
:
server_args
.
model_path
=
auto_download_and_get_model_root_path
(
"/"
,
"vlm"
)
try
:
try
:
launch_server
(
server_args
)
launch_server
(
server_args
)
finally
:
finally
:
...
...
mineru/utils/config_reader.py
View file @
bcbbee8c
...
@@ -17,7 +17,7 @@ def read_config():
...
@@ -17,7 +17,7 @@ def read_config():
config_file
=
os
.
path
.
join
(
home_dir
,
CONFIG_FILE_NAME
)
config_file
=
os
.
path
.
join
(
home_dir
,
CONFIG_FILE_NAME
)
if
not
os
.
path
.
exists
(
config_file
):
if
not
os
.
path
.
exists
(
config_file
):
logger
.
warning
(
f
'
{
config_file
}
not found, using default configuration'
)
#
logger.warning(f'{config_file} not found, using default configuration')
return
None
return
None
else
:
else
:
with
open
(
config_file
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
with
open
(
config_file
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
...
@@ -106,7 +106,9 @@ def get_formula_config():
...
@@ -106,7 +106,9 @@ def get_formula_config():
def
get_latex_delimiter_config
():
def
get_latex_delimiter_config
():
config
=
read_config
()
config
=
read_config
()
latex_delimiter_config
=
config
.
get
(
'latex-delimiter-config'
)
if
config
is
None
:
return
None
latex_delimiter_config
=
config
.
get
(
'latex-delimiter-config'
,
None
)
if
latex_delimiter_config
is
None
:
if
latex_delimiter_config
is
None
:
# logger.warning(f"'latex-delimiter-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
# logger.warning(f"'latex-delimiter-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
return
None
return
None
...
@@ -116,7 +118,9 @@ def get_latex_delimiter_config():
...
@@ -116,7 +118,9 @@ def get_latex_delimiter_config():
def
get_llm_aided_config
():
def
get_llm_aided_config
():
config
=
read_config
()
config
=
read_config
()
llm_aided_config
=
config
.
get
(
'llm-aided-config'
)
if
config
is
None
:
return
None
llm_aided_config
=
config
.
get
(
'llm-aided-config'
,
None
)
if
llm_aided_config
is
None
:
if
llm_aided_config
is
None
:
# logger.warning(f"'llm-aided-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
# logger.warning(f"'llm-aided-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
return
None
return
None
...
@@ -126,6 +130,8 @@ def get_llm_aided_config():
...
@@ -126,6 +130,8 @@ def get_llm_aided_config():
def
get_local_models_dir
():
def
get_local_models_dir
():
config
=
read_config
()
config
=
read_config
()
if
config
is
None
:
return
None
models_dir
=
config
.
get
(
'models-dir'
)
models_dir
=
config
.
get
(
'models-dir'
)
if
models_dir
is
None
:
if
models_dir
is
None
:
logger
.
warning
(
f
"'models-dir' not found in
{
CONFIG_FILE_NAME
}
, use None as default"
)
logger
.
warning
(
f
"'models-dir' not found in
{
CONFIG_FILE_NAME
}
, use None as default"
)
...
...
mineru/utils/draw_bbox.py
View file @
bcbbee8c
...
@@ -188,7 +188,8 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
...
@@ -188,7 +188,8 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
page
.
merge_page
(
overlay_pdf
.
pages
[
0
])
page
.
merge_page
(
overlay_pdf
.
pages
[
0
])
else
:
else
:
# 记录日志并继续处理下一个页面
# 记录日志并继续处理下一个页面
logger
.
warning
(
f
"layout.pdf: 第
{
i
+
1
}
页未能生成有效的overlay PDF"
)
# logger.warning(f"layout.pdf: 第{i + 1}页未能生成有效的overlay PDF")
pass
output_pdf
.
add_page
(
page
)
output_pdf
.
add_page
(
page
)
...
@@ -302,7 +303,8 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename):
...
@@ -302,7 +303,8 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename):
page
.
merge_page
(
overlay_pdf
.
pages
[
0
])
page
.
merge_page
(
overlay_pdf
.
pages
[
0
])
else
:
else
:
# 记录日志并继续处理下一个页面
# 记录日志并继续处理下一个页面
logger
.
warning
(
f
"span.pdf: 第
{
i
+
1
}
页未能生成有效的overlay PDF"
)
# logger.warning(f"span.pdf: 第{i + 1}页未能生成有效的overlay PDF")
pass
output_pdf
.
add_page
(
page
)
output_pdf
.
add_page
(
page
)
...
...
mineru/utils/models_download_utils.py
View file @
bcbbee8c
...
@@ -51,9 +51,17 @@ def auto_download_and_get_model_root_path(relative_path: str, repo_mode='pipelin
...
@@ -51,9 +51,17 @@ def auto_download_and_get_model_root_path(relative_path: str, repo_mode='pipelin
else
:
else
:
raise
ValueError
(
f
"未知的仓库类型:
{
model_source
}
"
)
raise
ValueError
(
f
"未知的仓库类型:
{
model_source
}
"
)
relative_path
=
relative_path
.
strip
(
'/'
)
cache_dir
=
None
cache_dir
=
snapshot_download
(
repo
,
allow_patterns
=
[
relative_path
,
relative_path
+
"/*"
])
if
repo_mode
==
'pipeline'
:
relative_path
=
relative_path
.
strip
(
'/'
)
cache_dir
=
snapshot_download
(
repo
,
allow_patterns
=
[
relative_path
,
relative_path
+
"/*"
])
elif
repo_mode
==
'vlm'
:
# VLM 模式下,直接下载整个模型目录
cache_dir
=
snapshot_download
(
repo
)
if
not
cache_dir
:
raise
FileNotFoundError
(
f
"Failed to download model:
{
relative_path
}
from
{
repo
}
"
)
return
cache_dir
return
cache_dir
...
...
mineru/utils/ocr_utils.py
View file @
bcbbee8c
...
@@ -4,6 +4,11 @@ import cv2
...
@@ -4,6 +4,11 @@ import cv2
import
numpy
as
np
import
numpy
as
np
class
OcrConfidence
:
min_confidence
=
0.68
min_width
=
3
def
merge_spans_to_line
(
spans
,
threshold
=
0.6
):
def
merge_spans_to_line
(
spans
,
threshold
=
0.6
):
if
len
(
spans
)
==
0
:
if
len
(
spans
)
==
0
:
return
[]
return
[]
...
@@ -304,7 +309,7 @@ def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
...
@@ -304,7 +309,7 @@ def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
p1
,
p2
,
p3
,
p4
=
box_ocr_res
[
0
]
p1
,
p2
,
p3
,
p4
=
box_ocr_res
[
0
]
text
,
score
=
box_ocr_res
[
1
]
text
,
score
=
box_ocr_res
[
1
]
# logger.info(f"text: {text}, score: {score}")
# logger.info(f"text: {text}, score: {score}")
if
score
<
0.6
:
# 过滤低置信度的结果
if
score
<
OcrConfidence
.
min_confidence
:
# 过滤低置信度的结果
continue
continue
else
:
else
:
p1
,
p2
,
p3
,
p4
=
box_ocr_res
p1
,
p2
,
p3
,
p4
=
box_ocr_res
...
@@ -317,6 +322,11 @@ def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
...
@@ -317,6 +322,11 @@ def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
# average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
# average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
# if average_angle_degrees > 0.5:
# if average_angle_degrees > 0.5:
poly
=
[
p1
,
p2
,
p3
,
p4
]
poly
=
[
p1
,
p2
,
p3
,
p4
]
if
(
p3
[
0
]
-
p1
[
0
])
<
OcrConfidence
.
min_width
:
# logger.info(f"width too small: {p3[0] - p1[0]}, text: {text}")
continue
if
calculate_is_angle
(
poly
):
if
calculate_is_angle
(
poly
):
# logger.info(f"average_angle_degrees: {average_angle_degrees}, text: {text}")
# logger.info(f"average_angle_degrees: {average_angle_degrees}, text: {text}")
# 与x轴的夹角超过0.5度,对边界做一下矫正
# 与x轴的夹角超过0.5度,对边界做一下矫正
...
...
next_docs/en/.readthedocs.yaml
deleted
100644 → 0
View file @
3cc3f754
version
:
2
build
:
os
:
ubuntu-22.04
tools
:
python
:
"
3.10"
formats
:
-
epub
python
:
install
:
-
requirements
:
next_docs/requirements.txt
sphinx
:
configuration
:
next_docs/en/conf.py
next_docs/en/Makefile
deleted
100644 → 0
View file @
3cc3f754
# Minimal makefile for Sphinx documentation
#
# You can set these variables from the command line, and also
# from the environment for the first two.
SPHINXOPTS
?=
SPHINXBUILD
?=
sphinx-build
SOURCEDIR
=
.
BUILDDIR
=
_build
# Put it first so that "make" without argument is like "make help".
help
:
@
$(SPHINXBUILD)
-M
help
"
$(SOURCEDIR)
"
"
$(BUILDDIR)
"
$(SPHINXOPTS)
$(O)
.PHONY
:
help Makefile
# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%
:
Makefile
@
$(SPHINXBUILD)
-M
$@
"
$(SOURCEDIR)
"
"
$(BUILDDIR)
"
$(SPHINXOPTS)
$(O)
next_docs/en/_static/image/MinerU-logo-hq.png
deleted
100644 → 0
View file @
3cc3f754
1.35 MB
next_docs/en/_static/image/MinerU-logo.png
deleted
100644 → 0
View file @
3cc3f754
216 KB
next_docs/en/_static/image/ReadTheDocs.svg
deleted
100644 → 0
View file @
3cc3f754
<?xml version="1.0" encoding="UTF-8" standalone="no" ?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg
xmlns=
"http://www.w3.org/2000/svg"
xmlns:xlink=
"http://www.w3.org/1999/xlink"
version=
"1.1"
width=
"224"
height=
"72"
viewBox=
"-29 -3.67 224 72"
xml:space=
"preserve"
>
<desc>
Created with Fabric.js 5.2.4
</desc>
<defs>
</defs>
<rect
x=
"0"
y=
"0"
width=
"100%"
height=
"100%"
fill=
"transparent"
></rect>
<g
transform=
"matrix(1 0 0 1 112 36)"
id=
"7a867f58-a908-4f30-a839-fb725512b521"
>
<rect
style=
"stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(255,255,255); fill-rule: nonzero; opacity: 1; visibility: hidden;"
vector-effect=
"non-scaling-stroke"
x=
"-112"
y=
"-36"
rx=
"0"
ry=
"0"
width=
"224"
height=
"72"
/>
</g>
<g
transform=
"matrix(Infinity NaN NaN Infinity 0 0)"
id=
"29611287-bf1c-4faf-8eb1-df32f6424829"
>
</g>
<g
transform=
"matrix(0.07 0 0 0.07 382.02 122.8)"
id=
"60cdd44f-027a-437a-92c4-c8d44c60ef9e"
>
<path
style=
"stroke: rgb(0,0,0); stroke-width: 0; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(50,50,42); fill-rule: nonzero; opacity: 1;"
vector-effect=
"non-scaling-stroke"
transform=
" translate(-64, -64)"
d=
"M 57.62 61.68 C 55.919999999999995 61.92 54.75 63.46 55 65.11 C 55.1668510745875 66.32380621250819 56.039448735907676 67.32218371690155 57.22 67.65 C 57.22 67.65 64.69 70.11 77.4 71.16000000000001 C 87.61000000000001 72.01 99.2 70.43 99.2 70.43 C 100.9 70.39 102.23 68.98 102.19 67.28 C 102.17037752125772 66.4652516996782 101.82707564255573 65.69186585376654 101.23597809465886 65.13079230830253 C 100.644880546762 64.56971876283853 99.85466451370849 64.26716220997277 99.03999999999999 64.29 C 98.83999999999999 64.29 98.63999999999999 64.33000000000001 98.42999999999999 64.37 C 98.42999999999999 64.37 87.08999999999999 65.78 77.88 65.02000000000001 C 65.72999999999999 64.05000000000001 59.11 61.83000000000001 59.11 61.83000000000001 C 58.63 61.670000000000016 58.1 61.59000000000001 57.62 61.670000000000016 Z M 57.62 46.46 C 55.919999999999995 46.7 54.75 48.24 55 49.89 C 55.1668510745875 51.10380621250818 56.039448735907676 52.10218371690154 57.22 52.43 C 57.22 52.43 64.69 54.89 77.4 55.94 C 87.61000000000001 56.79 99.2 55.21 99.2 55.21 C 100.9 55.17 102.23 53.76 102.19 52.06 C 102.17037752125772 51.245251699678214 101.82707564255573 50.47186585376654 101.23597809465886 49.91079230830253 C 100.644880546762 49.34971876283853 99.85466451370849 49.047162209972754 99.03999999999999 49.07 C 98.83999999999999 49.07 98.63999999999999 49.11 98.42999999999999 49.15 C 98.42999999999999 49.15 87.08999999999999 50.559999999999995 77.88 49.8 C 65.72999999999999 48.83 59.11 46.61 59.11 46.61 C 58.63 46.45 58.1 46.37 57.62 46.45 Z M 57.62 31.240000000000002 C 55.919999999999995 31.48 54.75 33.02 55 34.67 C 55.1668510745875 35.88380621250818 56.039448735907676 36.882183716901544 57.22 37.21 C 57.22 37.21 64.69 39.67 77.4 40.72 C 87.61000000000001 41.57 99.2 39.99 99.2 39.99 C 100.9 39.95 102.23 38.54 102.19 36.84 C 102.17037752125772 36.025251699678215 101.82707564255573 35.25186585376654 101.23597809465886 34.690792308302534 C 100.644880546762 34.12971876283853 99.85466451370849 33.827162209972755 99.03999999999999 33.85 C 98.83999999999999 33.85 98.63999999999999 33.89 98.42999999999999 33.93 C 98.42999999999999 33.93 87.08999999999999 35.339999999999996 77.88 34.58 C 65.72999999999999 33.61 59.11 31.389999999999997 59.11 31.389999999999997 C 58.63 31.229999999999997 58.1 31.189999999999998 57.62 31.229999999999997 Z M 57.62 16.060000000000002 C 55.919999999999995 16.3 54.75 17.840000000000003 55 19.490000000000002 C 55.1668510745875 20.703806212508187 56.039448735907676 21.702183716901544 57.22 22.03 C 57.22 22.03 64.69 24.490000000000002 77.4 25.54 C 87.61000000000001 26.39 99.2 24.81 99.2 24.81 C 100.9 24.77 102.23 23.36 102.19 21.66 C 102.17037752125772 20.84525169967821 101.82707564255573 20.07186585376654 101.23597809465886 19.510792308302534 C 100.644880546762 18.949718762838526 99.8546645137085 18.64716220997276 99.03999999999999 18.67 C 98.83999999999999 18.67 98.63999999999999 18.71 98.42999999999999 18.75 C 98.42999999999999 18.75 87.08999999999999 20.16 77.88 19.4 C 65.72999999999999 18.43 59.11 16.209999999999997 59.11 16.209999999999997 C 58.637850878541954 16.01924514007714 58.12188500879498 15.963839409097599 57.62 16.049999999999997 Z M 36.31 0 C 20.32 0.12 14.39 5.05 14.39 5.05 L 14.39 124.42 C 14.39 124.42 20.2 119.41 38.93 120.18 C 57.66 120.95000000000002 61.5 127.53 84.50999999999999 127.97000000000001 C 107.52 128.41000000000003 113.28999999999999 124.42000000000002 113.28999999999999 124.42000000000002 L 113.60999999999999 2.750000000000014 C 113.60999999999999 2.750000000000014 103.28 5.7 83.09 5.86 C 62.95 6.01 58.11 0.73 39.62 0.12 C 38.49 0.04 37.4 0 36.31 0 Z M 49.67 7.79 C 49.67 7.79 59.36 10.98 77.24000000000001 11.870000000000001 C 92.38000000000001 12.64 107.52000000000001 10.38 107.52000000000001 10.38 L 107.52000000000001 118.53 C 107.52000000000001 118.53 99.85000000000001 122.57000000000001 80.68 121.19 C 65.82000000000001 120.14 49.480000000000004 114.49 49.480000000000004 114.49 L 49.68000000000001 7.799999999999997 Z M 40.35 10.620000000000001 C 42.050000000000004 10.620000000000001 43.46 11.990000000000002 43.46 13.73 C 43.46 15.469999999999999 42.09 16.84 40.35 16.84 C 40.35 16.84 35.34 16.88 32.28 17.16 C 27.150000000000002 17.68 23.64 19.54 23.64 19.54 C 22.150000000000002 20.349999999999998 20.25 19.74 19.48 18.25 C 18.67 16.76 19.28 14.86 20.77 14.09 C 22.259999999999998 13.32 25.33 11.67 31.67 11.06 C 35.34 10.66 40.35 10.620000000000001 40.35 10.620000000000001 Z M 37.36 25.880000000000003 C 39.06 25.840000000000003 40.35 25.880000000000003 40.35 25.880000000000003 C 42.050000000000004 26.080000000000002 43.260000000000005 27.62 43.050000000000004 29.310000000000002 C 42.88374644848126 30.726609090871516 41.76660909087151 31.843746448481262 40.35 32.010000000000005 C 40.35 32.010000000000005 35.34 32.050000000000004 32.28 32.330000000000005 C 27.150000000000002 32.85000000000001 23.64 34.71000000000001 23.64 34.71000000000001 C 22.150000000000002 35.52000000000001 20.25 34.91000000000001 19.48 33.42000000000001 C 18.67 31.93000000000001 19.28 30.03000000000001 20.77 29.26000000000001 C 20.77 29.26000000000001 25.33 26.84000000000001 31.67 26.230000000000008 C 33.53 25.99000000000001 35.67 25.910000000000007 37.36 25.870000000000008 Z M 40.35 41.06 C 42.050000000000004 41.06 43.46 42.43 43.46 44.17 C 43.46 45.910000000000004 42.09 47.28 40.35 47.28 C 40.35 47.28 35.34 47.24 32.28 47.56 C 27.150000000000002 48.080000000000005 23.64 49.940000000000005 23.64 49.940000000000005 C 22.150000000000002 50.75000000000001 20.25 50.14000000000001 19.48 48.650000000000006 C 18.67 47.160000000000004 19.28 45.260000000000005 20.77 44.49000000000001 C 20.77 44.49000000000001 25.33 42.07000000000001 31.67 41.46000000000001 C 35.34 41.02000000000001 40.35 41.06000000000001 40.35 41.06000000000001 Z"
stroke-linecap=
"round"
/>
</g>
<g
transform=
"matrix(0.07 0 0 0.07 396.05 123.14)"
style=
""
id=
"eb0df536-c517-4781-a7c0-3f84cd77c272"
>
<text
xml:space=
"preserve"
font-family=
"Lato"
font-size=
"40"
font-style=
"normal"
font-weight=
"400"
style=
"stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(0,0,0); fill-rule: nonzero; opacity: 1; white-space: pre;"
><tspan
x=
"-130"
y=
"12.57"
>
Read The Docs
</tspan></text>
</g>
<g
transform=
"matrix(0.28 0 0 0.28 27.88 36)"
id=
"7b9eddb9-1652-4040-9437-2ab90652d624"
>
<path
style=
"stroke: rgb(0,0,0); stroke-width: 0; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(50,50,42); fill-rule: nonzero; opacity: 1;"
vector-effect=
"non-scaling-stroke"
transform=
" translate(-64, -64)"
d=
"M 57.62 61.68 C 55.919999999999995 61.92 54.75 63.46 55 65.11 C 55.1668510745875 66.32380621250819 56.039448735907676 67.32218371690155 57.22 67.65 C 57.22 67.65 64.69 70.11 77.4 71.16000000000001 C 87.61000000000001 72.01 99.2 70.43 99.2 70.43 C 100.9 70.39 102.23 68.98 102.19 67.28 C 102.17037752125772 66.4652516996782 101.82707564255573 65.69186585376654 101.23597809465886 65.13079230830253 C 100.644880546762 64.56971876283853 99.85466451370849 64.26716220997277 99.03999999999999 64.29 C 98.83999999999999 64.29 98.63999999999999 64.33000000000001 98.42999999999999 64.37 C 98.42999999999999 64.37 87.08999999999999 65.78 77.88 65.02000000000001 C 65.72999999999999 64.05000000000001 59.11 61.83000000000001 59.11 61.83000000000001 C 58.63 61.670000000000016 58.1 61.59000000000001 57.62 61.670000000000016 Z M 57.62 46.46 C 55.919999999999995 46.7 54.75 48.24 55 49.89 C 55.1668510745875 51.10380621250818 56.039448735907676 52.10218371690154 57.22 52.43 C 57.22 52.43 64.69 54.89 77.4 55.94 C 87.61000000000001 56.79 99.2 55.21 99.2 55.21 C 100.9 55.17 102.23 53.76 102.19 52.06 C 102.17037752125772 51.245251699678214 101.82707564255573 50.47186585376654 101.23597809465886 49.91079230830253 C 100.644880546762 49.34971876283853 99.85466451370849 49.047162209972754 99.03999999999999 49.07 C 98.83999999999999 49.07 98.63999999999999 49.11 98.42999999999999 49.15 C 98.42999999999999 49.15 87.08999999999999 50.559999999999995 77.88 49.8 C 65.72999999999999 48.83 59.11 46.61 59.11 46.61 C 58.63 46.45 58.1 46.37 57.62 46.45 Z M 57.62 31.240000000000002 C 55.919999999999995 31.48 54.75 33.02 55 34.67 C 55.1668510745875 35.88380621250818 56.039448735907676 36.882183716901544 57.22 37.21 C 57.22 37.21 64.69 39.67 77.4 40.72 C 87.61000000000001 41.57 99.2 39.99 99.2 39.99 C 100.9 39.95 102.23 38.54 102.19 36.84 C 102.17037752125772 36.025251699678215 101.82707564255573 35.25186585376654 101.23597809465886 34.690792308302534 C 100.644880546762 34.12971876283853 99.85466451370849 33.827162209972755 99.03999999999999 33.85 C 98.83999999999999 33.85 98.63999999999999 33.89 98.42999999999999 33.93 C 98.42999999999999 33.93 87.08999999999999 35.339999999999996 77.88 34.58 C 65.72999999999999 33.61 59.11 31.389999999999997 59.11 31.389999999999997 C 58.63 31.229999999999997 58.1 31.189999999999998 57.62 31.229999999999997 Z M 57.62 16.060000000000002 C 55.919999999999995 16.3 54.75 17.840000000000003 55 19.490000000000002 C 55.1668510745875 20.703806212508187 56.039448735907676 21.702183716901544 57.22 22.03 C 57.22 22.03 64.69 24.490000000000002 77.4 25.54 C 87.61000000000001 26.39 99.2 24.81 99.2 24.81 C 100.9 24.77 102.23 23.36 102.19 21.66 C 102.17037752125772 20.84525169967821 101.82707564255573 20.07186585376654 101.23597809465886 19.510792308302534 C 100.644880546762 18.949718762838526 99.8546645137085 18.64716220997276 99.03999999999999 18.67 C 98.83999999999999 18.67 98.63999999999999 18.71 98.42999999999999 18.75 C 98.42999999999999 18.75 87.08999999999999 20.16 77.88 19.4 C 65.72999999999999 18.43 59.11 16.209999999999997 59.11 16.209999999999997 C 58.637850878541954 16.01924514007714 58.12188500879498 15.963839409097599 57.62 16.049999999999997 Z M 36.31 0 C 20.32 0.12 14.39 5.05 14.39 5.05 L 14.39 124.42 C 14.39 124.42 20.2 119.41 38.93 120.18 C 57.66 120.95000000000002 61.5 127.53 84.50999999999999 127.97000000000001 C 107.52 128.41000000000003 113.28999999999999 124.42000000000002 113.28999999999999 124.42000000000002 L 113.60999999999999 2.750000000000014 C 113.60999999999999 2.750000000000014 103.28 5.7 83.09 5.86 C 62.95 6.01 58.11 0.73 39.62 0.12 C 38.49 0.04 37.4 0 36.31 0 Z M 49.67 7.79 C 49.67 7.79 59.36 10.98 77.24000000000001 11.870000000000001 C 92.38000000000001 12.64 107.52000000000001 10.38 107.52000000000001 10.38 L 107.52000000000001 118.53 C 107.52000000000001 118.53 99.85000000000001 122.57000000000001 80.68 121.19 C 65.82000000000001 120.14 49.480000000000004 114.49 49.480000000000004 114.49 L 49.68000000000001 7.799999999999997 Z M 40.35 10.620000000000001 C 42.050000000000004 10.620000000000001 43.46 11.990000000000002 43.46 13.73 C 43.46 15.469999999999999 42.09 16.84 40.35 16.84 C 40.35 16.84 35.34 16.88 32.28 17.16 C 27.150000000000002 17.68 23.64 19.54 23.64 19.54 C 22.150000000000002 20.349999999999998 20.25 19.74 19.48 18.25 C 18.67 16.76 19.28 14.86 20.77 14.09 C 22.259999999999998 13.32 25.33 11.67 31.67 11.06 C 35.34 10.66 40.35 10.620000000000001 40.35 10.620000000000001 Z M 37.36 25.880000000000003 C 39.06 25.840000000000003 40.35 25.880000000000003 40.35 25.880000000000003 C 42.050000000000004 26.080000000000002 43.260000000000005 27.62 43.050000000000004 29.310000000000002 C 42.88374644848126 30.726609090871516 41.76660909087151 31.843746448481262 40.35 32.010000000000005 C 40.35 32.010000000000005 35.34 32.050000000000004 32.28 32.330000000000005 C 27.150000000000002 32.85000000000001 23.64 34.71000000000001 23.64 34.71000000000001 C 22.150000000000002 35.52000000000001 20.25 34.91000000000001 19.48 33.42000000000001 C 18.67 31.93000000000001 19.28 30.03000000000001 20.77 29.26000000000001 C 20.77 29.26000000000001 25.33 26.84000000000001 31.67 26.230000000000008 C 33.53 25.99000000000001 35.67 25.910000000000007 37.36 25.870000000000008 Z M 40.35 41.06 C 42.050000000000004 41.06 43.46 42.43 43.46 44.17 C 43.46 45.910000000000004 42.09 47.28 40.35 47.28 C 40.35 47.28 35.34 47.24 32.28 47.56 C 27.150000000000002 48.080000000000005 23.64 49.940000000000005 23.64 49.940000000000005 C 22.150000000000002 50.75000000000001 20.25 50.14000000000001 19.48 48.650000000000006 C 18.67 47.160000000000004 19.28 45.260000000000005 20.77 44.49000000000001 C 20.77 44.49000000000001 25.33 42.07000000000001 31.67 41.46000000000001 C 35.34 41.02000000000001 40.35 41.06000000000001 40.35 41.06000000000001 Z"
stroke-linecap=
"round"
/>
</g>
<g
transform=
"matrix(0.9 0 0 0.9 94 36)"
style=
""
id=
"385bde16-f9fa-4222-bfea-1d5d5efcf730"
>
<text
xml:space=
"preserve"
font-family=
"Lato"
font-size=
"15"
font-style=
"normal"
font-weight=
"100"
style=
"stroke: none; stroke-width: 1; stroke-dasharray: none; stroke-linecap: butt; stroke-dashoffset: 0; stroke-linejoin: miter; stroke-miterlimit: 4; fill: rgb(0,0,0); fill-rule: nonzero; opacity: 1; white-space: pre;"
><tspan
x=
"-48.68"
y=
"4.71"
>
Read The Docs
</tspan></text>
</g>
</svg>
\ No newline at end of file
next_docs/en/_static/image/datalab_logo.png
deleted
100644 → 0
View file @
3cc3f754
96.1 KB
next_docs/en/_static/image/flowchart_en.png
deleted
100644 → 0
View file @
3cc3f754
105 KB
next_docs/en/_static/image/flowchart_zh_cn.png
deleted
100644 → 0
View file @
3cc3f754
106 KB
next_docs/en/_static/image/inference_result.png
deleted
100644 → 0
View file @
3cc3f754
503 KB
next_docs/en/_static/image/layout_example.png
deleted
100644 → 0
View file @
3cc3f754
559 KB
next_docs/en/_static/image/logo.png
deleted
100644 → 0
View file @
3cc3f754
17.7 KB
Prev
1
2
3
4
5
6
…
29
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment