"vscode:/vscode.git/clone" did not exist on "cce358718877f08933c5a06a38b555139543d711"
remove_bbox_overlap.py 3.53 KB
Newer Older
许瑞's avatar
许瑞 committed
1
2
3
from magic_pdf.libs.boxbase import _is_in_or_part_overlap, _is_in


4
def _remove_overlap_between_bbox_for_span(spans):
许瑞's avatar
许瑞 committed
5
    res = []
许瑞's avatar
许瑞 committed
6
7
8
9
10
11
12
13
14
15
16
17
18

    keeps = [True] * len(spans)
    for i in range(len(spans)):
        for j in range(len(spans)):
            if i == j:
                continue
            if _is_in(spans[i]["bbox"], spans[j]["bbox"]):
                keeps[i] = False

    for idx, v in enumerate(spans):
        if not keeps[idx]:
            continue

许瑞's avatar
许瑞 committed
19
        for i in range(len(res)):
20
            if _is_in(v["bbox"], res[i]["bbox"]):
许瑞's avatar
许瑞 committed
21
22
23
24
25
26
27
28
                continue
            if _is_in_or_part_overlap(res[i]["bbox"], v["bbox"]):
                ix0, iy0, ix1, iy1 = res[i]["bbox"]
                x0, y0, x1, y1 = v["bbox"]

                diff_x = min(x1, ix1) - max(x0, ix0)
                diff_y = min(y1, iy1) - max(y0, iy0)

许瑞's avatar
许瑞 committed
29
                if diff_y > diff_x:
许瑞's avatar
许瑞 committed
30
31
                    if x1 >= ix1:
                        mid = (x0 + ix1) // 2
32
33
                        ix1 = min(mid - 0.25, ix1)
                        x0 = max(mid + 0.25, x0)
许瑞's avatar
许瑞 committed
34
35
                    else:
                        mid = (ix0 + x1) // 2
36
                        ix0 = max(mid + 0.25, ix0)
37
                        x1 = min(mid - 0.25, x1)
许瑞's avatar
许瑞 committed
38
39
40
                else:
                    if y1 >= iy1:
                        mid = (y0 + iy1) // 2
41
42
                        y0 = max(mid + 0.25, y0)
                        iy1 = min(iy1, mid-0.25)
许瑞's avatar
许瑞 committed
43
44
                    else:
                        mid = (iy0 + y1) // 2
45
46
                        y1 = min(y1, mid-0.25)
                        iy0 = max(mid + 0.25, iy0)
许瑞's avatar
许瑞 committed
47
48
49
50
51
52
53
                res[i]["bbox"] = [ix0, iy0, ix1, iy1]
                v["bbox"] = [x0, y0, x1, y1]

        res.append(v)
    return res


54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
def _remove_overlap_between_bbox_for_block(all_bboxes):
    res = []

    keeps = [True] * len(all_bboxes)
    for i in range(len(all_bboxes)):
        for j in range(len(all_bboxes)):
            if i == j:
                continue
            if _is_in(all_bboxes[i][:4], all_bboxes[j][:4]):
                keeps[i] = False

    for idx, v in enumerate(all_bboxes):
        if not keeps[idx]:
            continue

        for i in range(len(res)):
            if _is_in(v[:4], res[i][:4]):
                continue
            if _is_in_or_part_overlap(res[i][:4], v[:4]):
                ix0, iy0, ix1, iy1 = res[i][:4]
                x0, y0, x1, y1 = v[:4]

                diff_x = min(x1, ix1) - max(x0, ix0)
                diff_y = min(y1, iy1) - max(y0, iy0)

                if diff_y > diff_x:
                    if x1 >= ix1:
                        mid = (x0 + ix1) // 2
                        ix1 = min(mid - 0.25, ix1)
                        x0 = max(mid + 0.25, x0)
                    else:
                        mid = (ix0 + x1) // 2
                        ix0 = max(mid + 0.25, ix0)
                        x1 = min(mid - 0.25, x1)
                else:
                    if y1 >= iy1:
                        mid = (y0 + iy1) // 2
                        y0 = max(mid + 0.25, y0)
                        iy1 = min(iy1, mid-0.25)
                    else:
                        mid = (iy0 + y1) // 2
                        y1 = min(y1, mid-0.25)
                        iy0 = max(mid + 0.25, iy0)
                res[i][:4] = [ix0, iy0, ix1, iy1]
                v[:4] = [x0, y0, x1, y1]

        res.append(v)
    return res


def remove_overlap_between_bbox_for_span(spans):
    return _remove_overlap_between_bbox_for_span(spans)


def remove_overlap_between_bbox_for_block(all_bboxes):
    return _remove_overlap_between_bbox_for_block(all_bboxes)