"examples/vscode:/vscode.git/clone" did not exist on "4fb0241bfb095cd5284b8c7b7f879991a4c74309"
Commit fbc8d21d authored by myhloli's avatar myhloli
Browse files

refactor: optimize overlap removal logic in remove_overlaps_min_blocks function

parent 941f36f5
...@@ -206,37 +206,49 @@ def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0 ...@@ -206,37 +206,49 @@ def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0
def remove_overlaps_min_blocks(res_list): def remove_overlaps_min_blocks(res_list):
# 重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。 # 重叠block,小的不能直接删除,需要和大的那个合并成一个更大的。
# 删除重叠blocks中较小的那些 # 删除重叠blocks中较小的那些
need_remove = [] need_remove = []
for res1 in res_list: for i in range(len(res_list)):
for res2 in res_list: # 如果当前元素已在需要移除列表中,则跳过
if res1 != res2: if res_list[i] in need_remove:
overlap_box = get_minbox_if_overlap_by_ratio( continue
res1['bbox'], res2['bbox'], 0.8
) for j in range(i + 1, len(res_list)):
if overlap_box is not None: # 如果比较对象已在需要移除列表中,则跳过
res_to_remove = next( if res_list[j] in need_remove:
(res for res in res_list if res['bbox'] == overlap_box), continue
None,
) overlap_box = get_minbox_if_overlap_by_ratio(
if ( res_list[i]['bbox'], res_list[j]['bbox'], 0.8
res_to_remove is not None )
and res_to_remove not in need_remove
): if overlap_box is not None:
large_res = res1 if res1 != res_to_remove else res2 res_to_remove = None
x1, y1, x2, y2 = large_res['bbox'] large_res = None
sx1, sy1, sx2, sy2 = res_to_remove['bbox']
x1 = min(x1, sx1) # 确定哪个是小块(要移除的)
y1 = min(y1, sy1) if overlap_box == res_list[i]['bbox']:
x2 = max(x2, sx2) res_to_remove = res_list[i]
y2 = max(y2, sy2) large_res = res_list[j]
large_res['bbox'] = [x1, y1, x2, y2] elif overlap_box == res_list[j]['bbox']:
need_remove.append(res_to_remove) res_to_remove = res_list[j]
large_res = res_list[i]
if len(need_remove) > 0:
for res in need_remove: if res_to_remove is not None and res_to_remove not in need_remove:
res_list.remove(res) # 更新大块的边界为两者的并集
x1, y1, x2, y2 = large_res['bbox']
sx1, sy1, sx2, sy2 = res_to_remove['bbox']
x1 = min(x1, sx1)
y1 = min(y1, sy1)
x2 = max(x2, sx2)
y2 = max(y2, sy2)
large_res['bbox'] = [x1, y1, x2, y2]
need_remove.append(res_to_remove)
# 从列表中移除标记的元素
for res in need_remove:
res_list.remove(res)
return res_list, need_remove return res_list, need_remove
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment