Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
a29489ef
Commit
a29489ef
authored
Jun 09, 2025
by
myhloli
Browse files
refactor: update config file name and enhance model path handling
parent
a149a49c
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
13 additions
and
5 deletions
+13
-5
mineru.template.json
mineru.template.json
+0
-0
mineru/backend/vlm/hf_predictor.py
mineru/backend/vlm/hf_predictor.py
+1
-0
mineru/cli/common.py
mineru/cli/common.py
+6
-3
mineru/model/vlm_hf_model/modeling_mineru2.py
mineru/model/vlm_hf_model/modeling_mineru2.py
+5
-1
mineru/utils/config_reader.py
mineru/utils/config_reader.py
+1
-1
No files found.
m
agic-pdf
.template.json
→
m
ineru
.template.json
View file @
a29489ef
File moved
mineru/backend/vlm/hf_predictor.py
View file @
a29489ef
...
...
@@ -77,6 +77,7 @@ class HuggingfacePredictor(BasePredictor):
low_cpu_mem_usage
=
True
,
**
kwargs
,
)
setattr
(
self
.
model
.
config
,
"_name_or_path"
,
model_path
)
self
.
model
.
eval
()
vision_tower
=
self
.
model
.
get_model
().
vision_tower
...
...
mineru/cli/common.py
View file @
a29489ef
...
...
@@ -158,6 +158,9 @@ def do_parse(
logger
.
info
(
f
"local output dir is
{
local_md_dir
}
"
)
else
:
if
backend
.
startswith
(
"vlm-"
):
backend
=
backend
[
4
:]
f_draw_span_bbox
=
False
parse_method
=
"vlm"
for
idx
,
pdf_bytes
in
enumerate
(
pdf_bytes_list
):
...
...
@@ -216,10 +219,10 @@ def do_parse(
if
__name__
==
"__main__"
:
#
pdf_path = "../../demo/pdfs/demo2.pdf"
pdf_path
=
"C:/Users/zhaoxiaomeng/Downloads/input_img_0.jpg"
pdf_path
=
"../../demo/pdfs/demo2.pdf"
#
pdf_path = "C:/Users/zhaoxiaomeng/Downloads/input_img_0.jpg"
try
:
do_parse
(
"./output"
,
[
Path
(
pdf_path
).
stem
],
[
read_fn
(
Path
(
pdf_path
))],[
"ch"
],
end_page_id
=
20
,
)
do_parse
(
"./output"
,
[
Path
(
pdf_path
).
stem
],
[
read_fn
(
Path
(
pdf_path
))],[
"ch"
],
end_page_id
=
1
,
backend
=
'vlm-huggingface'
)
except
Exception
as
e
:
logger
.
exception
(
e
)
mineru/model/vlm_hf_model/modeling_mineru2.py
View file @
a29489ef
...
...
@@ -79,7 +79,11 @@ class SiglipVisionTower(nn.Module):
def
build_vision_tower
(
config
:
Mineru2QwenConfig
):
vision_tower
=
getattr
(
config
,
"mm_vision_tower"
,
getattr
(
config
,
"vision_tower"
,
""
))
model_path
=
getattr
(
config
,
"_name_or_path"
,
""
)
if
"siglip"
in
vision_tower
.
lower
():
if
model_path
:
return
SiglipVisionTower
(
f
"
{
model_path
}
/
{
vision_tower
}
"
)
else
:
return
SiglipVisionTower
(
vision_tower
)
raise
ValueError
(
f
"Unknown vision tower:
{
vision_tower
}
"
)
...
...
mineru/utils/config_reader.py
View file @
a29489ef
...
...
@@ -6,7 +6,7 @@ import torch
from
loguru
import
logger
# 定义配置文件名常量
CONFIG_FILE_NAME
=
os
.
getenv
(
'MINERU_TOOLS_CONFIG_JSON'
,
'm
agic-pdf
.json'
)
CONFIG_FILE_NAME
=
os
.
getenv
(
'MINERU_TOOLS_CONFIG_JSON'
,
'm
ineru
.json'
)
def
read_config
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment