Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
9bb25776
Commit
9bb25776
authored
Jun 05, 2025
by
myhloli
Browse files
refactor: reorganize imports to align with backend structure and improve clarity
parent
3fe1b78c
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
11 additions
and
13 deletions
+11
-13
mineru/api/__init__.py
mineru/api/__init__.py
+0
-1
mineru/backend/pipeline/model_json_to_middle_json.py
mineru/backend/pipeline/model_json_to_middle_json.py
+1
-1
mineru/backend/pipeline/pipeline_magic_model.py
mineru/backend/pipeline/pipeline_magic_model.py
+0
-0
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py
+0
-0
mineru/backend/vlm/token_to_middle_json.py
mineru/backend/vlm/token_to_middle_json.py
+1
-2
mineru/backend/vlm/vlm_magic_model.py
mineru/backend/vlm/vlm_magic_model.py
+3
-3
mineru/backend/vlm/vlm_middle_json_mkcontent.py
mineru/backend/vlm/vlm_middle_json_mkcontent.py
+1
-1
mineru/cli/common.py
mineru/cli/common.py
+4
-4
mineru/utils/llm_aided.py
mineru/utils/llm_aided.py
+1
-1
No files found.
mineru/api/__init__.py
deleted
100644 → 0
View file @
3fe1b78c
# Copyright (c) Opendatalab. All rights reserved.
mineru/backend/pipeline/model_json_to_middle_json.py
View file @
9bb25776
...
@@ -12,7 +12,7 @@ from mineru.utils.boxbase import calculate_overlap_area_in_bbox1_area_ratio
...
@@ -12,7 +12,7 @@ from mineru.utils.boxbase import calculate_overlap_area_in_bbox1_area_ratio
from
mineru.utils.cut_image
import
cut_image_and_table
from
mineru.utils.cut_image
import
cut_image_and_table
from
mineru.utils.llm_aided
import
llm_aided_title
from
mineru.utils.llm_aided
import
llm_aided_title
from
mineru.utils.model_utils
import
clean_memory
from
mineru.utils.model_utils
import
clean_memory
from
mineru.
utils
.pipeline_magic_model
import
MagicModel
from
mineru.
backend.pipeline
.pipeline_magic_model
import
MagicModel
from
mineru.utils.span_block_fix
import
fill_spans_in_blocks
,
fix_discarded_block
,
fix_block_spans
from
mineru.utils.span_block_fix
import
fill_spans_in_blocks
,
fix_discarded_block
,
fix_block_spans
from
mineru.utils.span_pre_proc
import
remove_outside_spans
,
remove_overlaps_low_confidence_spans
,
\
from
mineru.utils.span_pre_proc
import
remove_outside_spans
,
remove_overlaps_low_confidence_spans
,
\
remove_overlaps_min_spans
,
txt_spans_extract
remove_overlaps_min_spans
,
txt_spans_extract
...
...
mineru/
utils
/pipeline_magic_model.py
→
mineru/
backend/pipeline
/pipeline_magic_model.py
View file @
9bb25776
File moved
mineru/
api
/pipeline_middle_json_mkcontent.py
→
mineru/
backend/pipeline
/pipeline_middle_json_mkcontent.py
View file @
9bb25776
File moved
mineru/backend/vlm/token_to_middle_json.py
View file @
9bb25776
import
re
import
re
from
mineru.utils.block_pre_proc
import
fix_text_overlap_title_blocks
from
mineru.utils.cut_image
import
cut_image_and_table
from
mineru.utils.cut_image
import
cut_image_and_table
from
mineru.utils.enum_class
import
BlockType
,
ContentType
from
mineru.utils.enum_class
import
BlockType
,
ContentType
from
mineru.utils.hash_utils
import
str_md5
from
mineru.utils.hash_utils
import
str_md5
from
mineru.
utils
.vlm_magic_model
import
fix_two_layer_blocks
,
fix_title_blocks
from
mineru.
backend.vlm
.vlm_magic_model
import
fix_two_layer_blocks
,
fix_title_blocks
from
mineru.version
import
__version__
from
mineru.version
import
__version__
...
...
mineru/
utils
/vlm_magic_model.py
→
mineru/
backend/vlm
/vlm_magic_model.py
View file @
9bb25776
import
re
import
re
from
typing
import
Literal
from
typing
import
Literal
from
.boxbase
import
bbox_distance
,
is_in
from
mineru.utils
.boxbase
import
bbox_distance
,
is_in
from
.enum_class
import
BlockType
from
mineru.utils
.enum_class
import
BlockType
from
..api
.vlm_middle_json_mkcontent
import
merge_para_with_text
from
mineru.backend.vlm
.vlm_middle_json_mkcontent
import
merge_para_with_text
def
__reduct_overlap
(
bboxes
):
def
__reduct_overlap
(
bboxes
):
...
...
mineru/
api
/vlm_middle_json_mkcontent.py
→
mineru/
backend/vlm
/vlm_middle_json_mkcontent.py
View file @
9bb25776
import
re
import
re
from
.
.utils.enum_class
import
MakeMode
,
BlockType
,
ContentType
from
mineru
.utils.enum_class
import
MakeMode
,
BlockType
,
ContentType
def
merge_para_with_text
(
para_block
):
def
merge_para_with_text
(
para_block
):
...
...
mineru/cli/common.py
View file @
9bb25776
...
@@ -8,9 +8,9 @@ from pathlib import Path
...
@@ -8,9 +8,9 @@ from pathlib import Path
import
pypdfium2
as
pdfium
import
pypdfium2
as
pdfium
from
loguru
import
logger
from
loguru
import
logger
from
mineru.
api
.pipeline_middle_json_mkcontent
import
union_make
as
pipeline_union_make
from
mineru.
backend.pipeline
.pipeline_middle_json_mkcontent
import
union_make
as
pipeline_union_make
from
mineru.backend.pipeline.model_json_to_middle_json
import
result_to_middle_json
as
pipeline_result_to_middle_json
from
mineru.backend.pipeline.model_json_to_middle_json
import
result_to_middle_json
as
pipeline_result_to_middle_json
from
mineru.
api
.vlm_middle_json_mkcontent
import
union_make
as
vlm_union_make
from
mineru.
backend.vlm
.vlm_middle_json_mkcontent
import
union_make
as
vlm_union_make
from
mineru.backend.vlm.vlm_analyze
import
doc_analyze
as
vlm_doc_analyze
from
mineru.backend.vlm.vlm_analyze
import
doc_analyze
as
vlm_doc_analyze
from
mineru.backend.pipeline.pipeline_analyze
import
doc_analyze
as
pipeline_doc_analyze
from
mineru.backend.pipeline.pipeline_analyze
import
doc_analyze
as
pipeline_doc_analyze
from
mineru.data.data_reader_writer
import
FileBasedDataWriter
from
mineru.data.data_reader_writer
import
FileBasedDataWriter
...
@@ -215,8 +215,8 @@ def do_parse(
...
@@ -215,8 +215,8 @@ def do_parse(
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
pdf_path
=
"../../demo/pdfs/hello-algo-1.1.0-zh-c-word转换的span有问题.pdf"
#
pdf_path = "../../demo/pdfs/hello-algo-1.1.0-zh-c-word转换的span有问题.pdf"
#
pdf_path = "C:/Users/zhaoxiaomeng/Downloads/
input_img_0.jpg
"
pdf_path
=
"C:/Users/zhaoxiaomeng/Downloads/
数学新星问题征解第一期(2014.03).pdf
"
try
:
try
:
do_parse
(
"./output"
,
[
Path
(
pdf_path
).
stem
],
[
read_fn
(
Path
(
pdf_path
))],[
"ch"
],
end_page_id
=
20
,)
do_parse
(
"./output"
,
[
Path
(
pdf_path
).
stem
],
[
read_fn
(
Path
(
pdf_path
))],[
"ch"
],
end_page_id
=
20
,)
...
...
mineru/utils/llm_aided.py
View file @
9bb25776
...
@@ -3,7 +3,7 @@ from loguru import logger
...
@@ -3,7 +3,7 @@ from loguru import logger
from
openai
import
OpenAI
from
openai
import
OpenAI
import
ast
import
ast
from
mineru.
api
.pipeline_middle_json_mkcontent
import
merge_para_with_text
from
mineru.
backend.pipeline
.pipeline_middle_json_mkcontent
import
merge_para_with_text
def
llm_aided_title
(
page_info_list
,
title_aided_config
):
def
llm_aided_title
(
page_info_list
,
title_aided_config
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment