remove_bbox_overlap.py 1.43 KB
Newer Older
许瑞's avatar
许瑞 committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in


def _remove_overlap_between_bbox(spans):
    res = []
    for v in spans:
        for i in range(len(res)):
            if _is_in(res[i]["bbox"], v["bbox"]):
                continue
            if _is_in_or_part_overlap(res[i]["bbox"], v["bbox"]):
                ix0, iy0, ix1, iy1 = res[i]["bbox"]
                x0, y0, x1, y1 = v["bbox"]

                diff_x = min(x1, ix1) - max(x0, ix0)
                diff_y = min(y1, iy1) - max(y0, iy0)

许瑞's avatar
许瑞 committed
17
                if diff_y > diff_x:
许瑞's avatar
许瑞 committed
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
                    if x1 >= ix1:
                        mid = (x0 + ix1) // 2
                        ix1 = min(mid, ix1)
                        x0 = max(mid + 1, x0)
                    else:
                        mid = (ix0 + x1) // 2
                        ix0 = max(mid + 1, ix0)
                        x1 = min(mid, x1)
                else:
                    if y1 >= iy1:
                        mid = (y0 + iy1) // 2
                        y0 = max(mid + 1, y0)
                        iy1 = min(iy1, mid)
                    else:
                        mid = (iy0 + y1) // 2
                        y1 = min(y1, mid)
                        iy0 = max(mid + 1, iy0)
                res[i]["bbox"] = [ix0, iy0, ix1, iy1]
                v["bbox"] = [x0, y0, x1, y1]

        res.append(v)
    return res


def remove_overlap_between_bbox(spans):
    return _remove_overlap_between_bbox(spans)