Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
df15393c
Commit
df15393c
authored
Jul 03, 2025
by
myhloli
Browse files
refactor: optimize overlap detection logic in block_pre_proc.py for efficiency
parent
cd78980c
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
26 additions
and
25 deletions
+26
-25
mineru/utils/block_pre_proc.py
mineru/utils/block_pre_proc.py
+26
-25
No files found.
mineru/utils/block_pre_proc.py
View file @
df15393c
...
@@ -213,32 +213,33 @@ def remove_overlaps_min_blocks(all_bboxes):
...
@@ -213,32 +213,33 @@ def remove_overlaps_min_blocks(all_bboxes):
# 重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。
# 重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。
# 删除重叠blocks中较小的那些
# 删除重叠blocks中较小的那些
need_remove
=
[]
need_remove
=
[]
for
block1
in
all_bboxes
:
for
i
in
range
(
len
(
all_bboxes
)):
for
block2
in
all_bboxes
:
for
j
in
range
(
i
+
1
,
len
(
all_bboxes
)):
if
block1
!=
block2
:
block1
=
all_bboxes
[
i
]
block1_bbox
=
block1
[:
4
]
block2
=
all_bboxes
[
j
]
block2_bbox
=
block2
[:
4
]
block1_bbox
=
block1
[:
4
]
overlap_box
=
get_minbox_if_overlap_by_ratio
(
block2_bbox
=
block2
[:
4
]
block1_bbox
,
block2_bbox
,
0.8
overlap_box
=
get_minbox_if_overlap_by_ratio
(
block1_bbox
,
block2_bbox
,
0.8
)
if
overlap_box
is
not
None
:
block_to_remove
=
next
(
(
block
for
block
in
all_bboxes
if
block
[:
4
]
==
overlap_box
),
None
,
)
)
if
overlap_box
is
not
None
:
if
(
block_to_remove
=
next
(
block_to_remove
is
not
None
(
block
for
block
in
all_bboxes
if
block
[:
4
]
==
overlap_box
),
and
block_to_remove
not
in
need_remove
None
,
):
)
large_block
=
block1
if
block1
!=
block_to_remove
else
block2
if
(
x1
,
y1
,
x2
,
y2
=
large_block
[:
4
]
block_to_remove
is
not
None
sx1
,
sy1
,
sx2
,
sy2
=
block_to_remove
[:
4
]
and
block_to_remove
not
in
need_remove
x1
=
min
(
x1
,
sx1
)
):
y1
=
min
(
y1
,
sy1
)
large_block
=
block1
if
block1
!=
block_to_remove
else
block2
x2
=
max
(
x2
,
sx2
)
x1
,
y1
,
x2
,
y2
=
large_block
[:
4
]
y2
=
max
(
y2
,
sy2
)
sx1
,
sy1
,
sx2
,
sy2
=
block_to_remove
[:
4
]
large_block
[:
4
]
=
[
x1
,
y1
,
x2
,
y2
]
x1
=
min
(
x1
,
sx1
)
need_remove
.
append
(
block_to_remove
)
y1
=
min
(
y1
,
sy1
)
x2
=
max
(
x2
,
sx2
)
y2
=
max
(
y2
,
sy2
)
large_block
[:
4
]
=
[
x1
,
y1
,
x2
,
y2
]
need_remove
.
append
(
block_to_remove
)
if
len
(
need_remove
)
>
0
:
if
len
(
need_remove
)
>
0
:
for
block
in
need_remove
:
for
block
in
need_remove
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment