Commit 7d8f68cb authored by myhloli's avatar myhloli
Browse files

refactor: improve overlap handling by removing smaller blocks in...

refactor: improve overlap handling by removing smaller blocks in block_pre_proc.py and pipeline_magic_model.py
parent 6094699c
...@@ -18,7 +18,6 @@ class MagicModel: ...@@ -18,7 +18,6 @@ class MagicModel:
"""处理重叠的image_body和table_body""" """处理重叠的image_body和table_body"""
self.__fix_by_remove_overlap_image_table_body() self.__fix_by_remove_overlap_image_table_body()
def __fix_by_remove_overlap_image_table_body(self): def __fix_by_remove_overlap_image_table_body(self):
need_remove_list = [] need_remove_list = []
layout_dets = self.__page_model_info['layout_dets'] layout_dets = self.__page_model_info['layout_dets']
...@@ -28,6 +27,7 @@ class MagicModel: ...@@ -28,6 +27,7 @@ class MagicModel:
table_blocks = list(filter( table_blocks = list(filter(
lambda x: x['category_id'] == CategoryId.TableBody, layout_dets lambda x: x['category_id'] == CategoryId.TableBody, layout_dets
)) ))
def add_need_remove_block(blocks): def add_need_remove_block(blocks):
for i in range(len(blocks)): for i in range(len(blocks)):
for j in range(i + 1, len(blocks)): for j in range(i + 1, len(blocks)):
...@@ -37,15 +37,19 @@ class MagicModel: ...@@ -37,15 +37,19 @@ class MagicModel:
block1['bbox'], block2['bbox'], 0.8 block1['bbox'], block2['bbox'], 0.8
) )
if overlap_box is not None: if overlap_box is not None:
block_to_remove = next( # 判断哪个区块的面积更小,移除较小的区块
(block for block in blocks if block['bbox'] == overlap_box), area1 = (block1['bbox'][2] - block1['bbox'][0]) * (block1['bbox'][3] - block1['bbox'][1])
None, area2 = (block2['bbox'][2] - block2['bbox'][0]) * (block2['bbox'][3] - block2['bbox'][1])
)
if ( if area1 <= area2:
block_to_remove is not None block_to_remove = block1
and block_to_remove not in need_remove_list large_block = block2
): else:
large_block = block1 if block1 != block_to_remove else block2 block_to_remove = block2
large_block = block1
if block_to_remove not in need_remove_list:
# 扩展大区块的边界框
x1, y1, x2, y2 = large_block['bbox'] x1, y1, x2, y2 = large_block['bbox']
sx1, sy1, sx2, sy2 = block_to_remove['bbox'] sx1, sy1, sx2, sy2 = block_to_remove['bbox']
x1 = min(x1, sx1) x1 = min(x1, sx1)
...@@ -55,11 +59,15 @@ class MagicModel: ...@@ -55,11 +59,15 @@ class MagicModel:
large_block['bbox'] = [x1, y1, x2, y2] large_block['bbox'] = [x1, y1, x2, y2]
need_remove_list.append(block_to_remove) need_remove_list.append(block_to_remove)
# 处理图像-图像重叠
add_need_remove_block(image_blocks) add_need_remove_block(image_blocks)
# 处理表格-表格重叠
add_need_remove_block(table_blocks) add_need_remove_block(table_blocks)
# 从布局中移除标记的区块
for need_remove in need_remove_list: for need_remove in need_remove_list:
layout_dets.remove(need_remove) if need_remove in layout_dets:
layout_dets.remove(need_remove)
def __fix_axis(self): def __fix_axis(self):
......
...@@ -223,15 +223,18 @@ def remove_overlaps_min_blocks(all_bboxes): ...@@ -223,15 +223,18 @@ def remove_overlaps_min_blocks(all_bboxes):
block1_bbox, block2_bbox, 0.8 block1_bbox, block2_bbox, 0.8
) )
if overlap_box is not None: if overlap_box is not None:
block_to_remove = next( # 判断哪个区块的面积更小,移除较小的区块
(block for block in all_bboxes if block[:4] == overlap_box), area1 = (block1[2] - block1[0]) * (block1[3] - block1[1])
None, area2 = (block2[2] - block2[0]) * (block2[3] - block2[1])
)
if ( if area1 <= area2:
block_to_remove is not None block_to_remove = block1
and block_to_remove not in need_remove large_block = block2
): else:
large_block = block1 if block1 != block_to_remove else block2 block_to_remove = block2
large_block = block1
if block_to_remove not in need_remove:
x1, y1, x2, y2 = large_block[:4] x1, y1, x2, y2 = large_block[:4]
sx1, sy1, sx2, sy2 = block_to_remove[:4] sx1, sy1, sx2, sy2 = block_to_remove[:4]
x1 = min(x1, sx1) x1 = min(x1, sx1)
...@@ -241,8 +244,8 @@ def remove_overlaps_min_blocks(all_bboxes): ...@@ -241,8 +244,8 @@ def remove_overlaps_min_blocks(all_bboxes):
large_block[:4] = [x1, y1, x2, y2] large_block[:4] = [x1, y1, x2, y2]
need_remove.append(block_to_remove) need_remove.append(block_to_remove)
if len(need_remove) > 0: for block in need_remove:
for block in need_remove: if block in all_bboxes:
all_bboxes.remove(block) all_bboxes.remove(block)
return all_bboxes return all_bboxes
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment