Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
ec85af39
Commit
ec85af39
authored
Jun 20, 2025
by
myhloli
Browse files
fix: add error handling for block parsing in vlm_magic_model.py
parent
b40c4327
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
19 deletions
+26
-19
mineru/backend/vlm/vlm_magic_model.py
mineru/backend/vlm/vlm_magic_model.py
+26
-19
No files found.
mineru/backend/vlm/vlm_magic_model.py
View file @
ec85af39
import
re
import
re
from
typing
import
Literal
from
typing
import
Literal
from
loguru
import
logger
from
mineru.utils.boxbase
import
bbox_distance
,
is_in
from
mineru.utils.boxbase
import
bbox_distance
,
is_in
from
mineru.utils.enum_class
import
ContentType
,
BlockType
,
SplitFlag
from
mineru.utils.enum_class
import
ContentType
,
BlockType
,
SplitFlag
from
mineru.backend.vlm.vlm_middle_json_mkcontent
import
merge_para_with_text
from
mineru.backend.vlm.vlm_middle_json_mkcontent
import
merge_para_with_text
...
@@ -22,25 +24,30 @@ class MagicModel:
...
@@ -22,25 +24,30 @@ class MagicModel:
# 解析每个块
# 解析每个块
for
index
,
block_info
in
enumerate
(
block_infos
):
for
index
,
block_info
in
enumerate
(
block_infos
):
block_bbox
=
block_info
[
0
].
strip
()
block_bbox
=
block_info
[
0
].
strip
()
x1
,
y1
,
x2
,
y2
=
map
(
int
,
block_bbox
.
split
())
try
:
x_1
,
y_1
,
x_2
,
y_2
=
(
x1
,
y1
,
x2
,
y2
=
map
(
int
,
block_bbox
.
split
())
int
(
x1
*
width
/
1000
),
x_1
,
y_1
,
x_2
,
y_2
=
(
int
(
y1
*
height
/
1000
),
int
(
x1
*
width
/
1000
),
int
(
x2
*
width
/
1000
),
int
(
y1
*
height
/
1000
),
int
(
y2
*
height
/
1000
),
int
(
x2
*
width
/
1000
),
)
int
(
y2
*
height
/
1000
),
if
x_2
<
x_1
:
)
x_1
,
x_2
=
x_2
,
x_1
if
x_2
<
x_1
:
if
y_2
<
y_1
:
x_1
,
x_2
=
x_2
,
x_1
y_1
,
y_2
=
y_2
,
y_1
if
y_2
<
y_1
:
block_bbox
=
(
x_1
,
y_1
,
x_2
,
y_2
)
y_1
,
y_2
=
y_2
,
y_1
block_type
=
block_info
[
1
].
strip
()
block_bbox
=
(
x_1
,
y_1
,
x_2
,
y_2
)
block_content
=
block_info
[
2
].
strip
()
block_type
=
block_info
[
1
].
strip
()
block_content
=
block_info
[
2
].
strip
()
# print(f"坐标: {block_bbox}")
# print(f"类型: {block_type}")
# print(f"坐标: {block_bbox}")
# print(f"内容: {block_content}")
# print(f"类型: {block_type}")
# print("-" * 50)
# print(f"内容: {block_content}")
# print("-" * 50)
except
Exception
as
e
:
# 如果解析失败,可能是因为格式不正确,跳过这个块
logger
.
warning
(
f
"Invalid block format:
{
block_info
}
, error:
{
e
}
"
)
continue
span_type
=
"unknown"
span_type
=
"unknown"
if
block_type
in
[
if
block_type
in
[
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment