Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
4eaa85fd
Commit
4eaa85fd
authored
Jun 11, 2025
by
myhloli
Browse files
refactor: update make mode constants to improve content list handling
parent
c01b780b
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
7 deletions
+7
-7
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py
+2
-2
mineru/backend/vlm/vlm_middle_json_mkcontent.py
mineru/backend/vlm/vlm_middle_json_mkcontent.py
+2
-2
mineru/cli/common.py
mineru/cli/common.py
+2
-2
mineru/utils/enum_class.py
mineru/utils/enum_class.py
+1
-1
No files found.
mineru/backend/pipeline/pipeline_middle_json_mkcontent.py
View file @
4eaa85fd
...
...
@@ -260,14 +260,14 @@ def union_make(pdf_info_dict: list,
if
make_mode
in
[
MakeMode
.
MM_MD
,
MakeMode
.
NLP_MD
]:
page_markdown
=
make_blocks_to_markdown
(
paras_of_layout
,
make_mode
,
img_buket_path
)
output_content
.
extend
(
page_markdown
)
elif
make_mode
==
MakeMode
.
STANDARD_FORMA
T
:
elif
make_mode
==
MakeMode
.
CONTENT_LIS
T
:
for
para_block
in
paras_of_layout
:
para_content
=
make_blocks_to_content_list
(
para_block
,
img_buket_path
,
page_idx
)
output_content
.
append
(
para_content
)
if
make_mode
in
[
MakeMode
.
MM_MD
,
MakeMode
.
NLP_MD
]:
return
'
\n\n
'
.
join
(
output_content
)
elif
make_mode
==
MakeMode
.
STANDARD_FORMA
T
:
elif
make_mode
==
MakeMode
.
CONTENT_LIS
T
:
return
output_content
else
:
logger
.
error
(
f
"Unsupported make mode:
{
make_mode
}
"
)
...
...
mineru/backend/vlm/vlm_middle_json_mkcontent.py
View file @
4eaa85fd
...
...
@@ -186,14 +186,14 @@ def union_make(pdf_info_dict: list,
if
make_mode
in
[
MakeMode
.
MM_MD
,
MakeMode
.
NLP_MD
]:
page_markdown
=
mk_blocks_to_markdown
(
paras_of_layout
,
make_mode
,
img_buket_path
)
output_content
.
extend
(
page_markdown
)
elif
make_mode
==
MakeMode
.
STANDARD_FORMA
T
:
elif
make_mode
==
MakeMode
.
CONTENT_LIS
T
:
for
para_block
in
paras_of_layout
:
para_content
=
make_blocks_to_content_list
(
para_block
,
img_buket_path
,
page_idx
)
output_content
.
append
(
para_content
)
if
make_mode
in
[
MakeMode
.
MM_MD
,
MakeMode
.
NLP_MD
]:
return
'
\n\n
'
.
join
(
output_content
)
elif
make_mode
==
MakeMode
.
STANDARD_FORMA
T
:
elif
make_mode
==
MakeMode
.
CONTENT_LIS
T
:
return
output_content
return
None
...
...
mineru/cli/common.py
View file @
4eaa85fd
...
...
@@ -143,7 +143,7 @@ def do_parse(
if
f_dump_content_list
:
image_dir
=
str
(
os
.
path
.
basename
(
local_image_dir
))
content_list
=
pipeline_union_make
(
pdf_info
,
MakeMode
.
STANDARD_FORMA
T
,
image_dir
)
content_list
=
pipeline_union_make
(
pdf_info
,
MakeMode
.
CONTENT_LIS
T
,
image_dir
)
md_writer
.
write_string
(
f
"
{
pdf_file_name
}
_content_list.json"
,
json
.
dumps
(
content_list
,
ensure_ascii
=
False
,
indent
=
4
),
...
...
@@ -200,7 +200,7 @@ def do_parse(
if
f_dump_content_list
:
image_dir
=
str
(
os
.
path
.
basename
(
local_image_dir
))
content_list
=
vlm_union_make
(
pdf_info
,
MakeMode
.
STANDARD_FORMA
T
,
image_dir
)
content_list
=
vlm_union_make
(
pdf_info
,
MakeMode
.
CONTENT_LIS
T
,
image_dir
)
md_writer
.
write_string
(
f
"
{
pdf_file_name
}
_content_list.json"
,
json
.
dumps
(
content_list
,
ensure_ascii
=
False
,
indent
=
4
),
...
...
mineru/utils/enum_class.py
View file @
4eaa85fd
...
...
@@ -42,7 +42,7 @@ class CategoryId:
class
MakeMode
:
MM_MD
=
'mm_markdown'
NLP_MD
=
'nlp_markdown'
STANDARD_FORMAT
=
'standard_forma
t'
CONTENT_LIST
=
'content_lis
t'
class
ModelPath
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment