Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
fbc8d21d
"examples/vscode:/vscode.git/clone" did not exist on "4fb0241bfb095cd5284b8c7b7f879991a4c74309"
Commit
fbc8d21d
authored
Jul 14, 2025
by
myhloli
Browse files
refactor: optimize overlap removal logic in remove_overlaps_min_blocks function
parent
941f36f5
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
42 additions
and
30 deletions
+42
-30
mineru/utils/model_utils.py
mineru/utils/model_utils.py
+42
-30
No files found.
mineru/utils/model_utils.py
View file @
fbc8d21d
...
@@ -206,37 +206,49 @@ def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0
...
@@ -206,37 +206,49 @@ def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0
def
remove_overlaps_min_blocks
(
res_list
):
def
remove_overlaps_min_blocks
(
res_list
):
#
重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。
# 重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。
#
删除重叠blocks中较小的那些
# 删除重叠blocks中较小的那些
need_remove
=
[]
need_remove
=
[]
for
res1
in
res_list
:
for
i
in
range
(
len
(
res_list
)):
for
res2
in
res_list
:
# 如果当前元素已在需要移除列表中,则跳过
if
res1
!=
res2
:
if
res_list
[
i
]
in
need_remove
:
overlap_box
=
get_minbox_if_overlap_by_ratio
(
continue
res1
[
'bbox'
],
res2
[
'bbox'
],
0.8
)
for
j
in
range
(
i
+
1
,
len
(
res_list
)):
if
overlap_box
is
not
None
:
# 如果比较对象已在需要移除列表中,则跳过
res_to_remove
=
next
(
if
res_list
[
j
]
in
need_remove
:
(
res
for
res
in
res_list
if
res
[
'bbox'
]
==
overlap_box
),
continue
None
,
)
overlap_box
=
get_minbox_if_overlap_by_ratio
(
if
(
res_list
[
i
][
'bbox'
],
res_list
[
j
][
'bbox'
],
0.8
res_to_remove
is
not
None
)
and
res_to_remove
not
in
need_remove
):
if
overlap_box
is
not
None
:
large_res
=
res1
if
res1
!=
res_to_remove
else
res2
res_to_remove
=
None
x1
,
y1
,
x2
,
y2
=
large_res
[
'bbox'
]
large_res
=
None
sx1
,
sy1
,
sx2
,
sy2
=
res_to_remove
[
'bbox'
]
x1
=
min
(
x1
,
sx1
)
# 确定哪个是小块(要移除的)
y1
=
min
(
y1
,
sy1
)
if
overlap_box
==
res_list
[
i
][
'bbox'
]:
x2
=
max
(
x2
,
sx2
)
res_to_remove
=
res_list
[
i
]
y2
=
max
(
y2
,
sy2
)
large_res
=
res_list
[
j
]
large_res
[
'bbox'
]
=
[
x1
,
y1
,
x2
,
y2
]
elif
overlap_box
==
res_list
[
j
][
'bbox'
]:
need_remove
.
append
(
res_to_remove
)
res_to_remove
=
res_list
[
j
]
large_res
=
res_list
[
i
]
if
len
(
need_remove
)
>
0
:
for
res
in
need_remove
:
if
res_to_remove
is
not
None
and
res_to_remove
not
in
need_remove
:
res_list
.
remove
(
res
)
# 更新大块的边界为两者的并集
x1
,
y1
,
x2
,
y2
=
large_res
[
'bbox'
]
sx1
,
sy1
,
sx2
,
sy2
=
res_to_remove
[
'bbox'
]
x1
=
min
(
x1
,
sx1
)
y1
=
min
(
y1
,
sy1
)
x2
=
max
(
x2
,
sx2
)
y2
=
max
(
y2
,
sy2
)
large_res
[
'bbox'
]
=
[
x1
,
y1
,
x2
,
y2
]
need_remove
.
append
(
res_to_remove
)
# 从列表中移除标记的元素
for
res
in
need_remove
:
res_list
.
remove
(
res
)
return
res_list
,
need_remove
return
res_list
,
need_remove
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment