predict_table.py 8.36 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import subprocess

__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
WenmuZhou's avatar
WenmuZhou committed
21
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
WenmuZhou's avatar
WenmuZhou committed
22
23
24
25
26
27
28
29
30
31
32
33
sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))

os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
import cv2
import copy
import numpy as np
import time
import tools.infer.predict_rec as predict_rec
import tools.infer.predict_det as predict_det
import ppstructure.table.predict_structure as predict_strture
from ppocr.utils.utility import get_image_file_list, check_and_read_gif
from ppocr.utils.logging import get_logger
WenmuZhou's avatar
WenmuZhou committed
34
35
from matcher import distance, compute_iou
from ppstructure.utility import parse_args
WenmuZhou's avatar
WenmuZhou committed
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55

logger = get_logger()


def expand(pix, det_box, shape):
    x0, y0, x1, y1 = det_box
    #     print(shape)
    h, w, c = shape
    tmp_x0 = x0 - pix
    tmp_x1 = x1 + pix
    tmp_y0 = y0 - pix
    tmp_y1 = y1 + pix
    x0_ = tmp_x0 if tmp_x0 >= 0 else 0
    x1_ = tmp_x1 if tmp_x1 <= w else w
    y0_ = tmp_y0 if tmp_y0 >= 0 else 0
    y1_ = tmp_y1 if tmp_y1 <= h else h
    return x0_, y0_, x1_, y1_


class TableSystem(object):
WenmuZhou's avatar
WenmuZhou committed
56
57
58
    def __init__(self, args, text_detector=None, text_recognizer=None):
        self.text_detector = predict_det.TextDetector(args) if text_detector is None else text_detector
        self.text_recognizer = predict_rec.TextRecognizer(args) if text_recognizer is None else text_recognizer
WenmuZhou's avatar
WenmuZhou committed
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
        self.table_structurer = predict_strture.TableStructurer(args)

    def __call__(self, img):
        ori_im = img.copy()
        structure_res, elapse = self.table_structurer(copy.deepcopy(img))
        dt_boxes, elapse = self.text_detector(copy.deepcopy(img))
        dt_boxes = sorted_boxes(dt_boxes)

        r_boxes = []
        for box in dt_boxes:
            x_min = box[:, 0].min() - 1
            x_max = box[:, 0].max() + 1
            y_min = box[:, 1].min() - 1
            y_max = box[:, 1].max() + 1
            box = [x_min, y_min, x_max, y_max]
            r_boxes.append(box)
        dt_boxes = np.array(r_boxes)

WenmuZhou's avatar
WenmuZhou committed
77
78
        logger.debug("dt_boxes num : {}, elapse : {}".format(
            len(dt_boxes), elapse))
WenmuZhou's avatar
WenmuZhou committed
79
80
81
82
83
84
85
86
87
88
        if dt_boxes is None:
            return None, None
        img_crop_list = []

        for i in range(len(dt_boxes)):
            det_box = dt_boxes[i]
            x0, y0, x1, y1 = expand(2, det_box, ori_im.shape)
            text_rect = ori_im[int(y0):int(y1), int(x0):int(x1), :]
            img_crop_list.append(text_rect)
        rec_res, elapse = self.text_recognizer(img_crop_list)
WenmuZhou's avatar
WenmuZhou committed
89
90
        logger.debug("rec_res num  : {}, elapse : {}".format(
            len(rec_res), elapse))
WenmuZhou's avatar
WenmuZhou committed
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173

        pred_html, pred = self.rebuild_table(structure_res, dt_boxes, rec_res)
        return pred_html

    def rebuild_table(self, structure_res, dt_boxes, rec_res):
        pred_structures, pred_bboxes = structure_res
        matched_index = self.match_result(dt_boxes, pred_bboxes)
        pred_html, pred = self.get_pred_html(pred_structures, matched_index, rec_res)
        return pred_html, pred

    def match_result(self, dt_boxes, pred_bboxes):
        matched = {}
        for i, gt_box in enumerate(dt_boxes):
            # gt_box = [np.min(gt_box[:, 0]), np.min(gt_box[:, 1]), np.max(gt_box[:, 0]), np.max(gt_box[:, 1])]
            distances = []
            for j, pred_box in enumerate(pred_bboxes):
                distances.append(
                    (distance(gt_box, pred_box), 1. - compute_iou(gt_box, pred_box)))  # 获取两两cell之间的L1距离和 1- IOU
            sorted_distances = distances.copy()
            # 根据距离和IOU挑选最"近"的cell
            sorted_distances = sorted(sorted_distances, key=lambda item: (item[1], item[0]))
            if distances.index(sorted_distances[0]) not in matched.keys():
                matched[distances.index(sorted_distances[0])] = [i]
            else:
                matched[distances.index(sorted_distances[0])].append(i)
        return matched

    def get_pred_html(self, pred_structures, matched_index, ocr_contents):
        end_html = []
        td_index = 0
        for tag in pred_structures:
            if '</td>' in tag:
                if td_index in matched_index.keys():
                    b_with = False
                    if '<b>' in ocr_contents[matched_index[td_index][0]] and len(matched_index[td_index]) > 1:
                        b_with = True
                        end_html.extend('<b>')
                    for i, td_index_index in enumerate(matched_index[td_index]):
                        content = ocr_contents[td_index_index][0]
                        if len(matched_index[td_index]) > 1:
                            if len(content) == 0:
                                continue
                            if content[0] == ' ':
                                content = content[1:]
                            if '<b>' in content:
                                content = content[3:]
                            if '</b>' in content:
                                content = content[:-4]
                            if len(content) == 0:
                                continue
                            if i != len(matched_index[td_index]) - 1 and ' ' != content[-1]:
                                content += ' '
                        end_html.extend(content)
                    if b_with:
                        end_html.extend('</b>')

                end_html.append(tag)
                td_index += 1
            else:
                end_html.append(tag)
        return ''.join(end_html), end_html


def sorted_boxes(dt_boxes):
    """
    Sort text boxes in order from top to bottom, left to right
    args:
        dt_boxes(array):detected text boxes with shape [4, 2]
    return:
        sorted boxes(array) with shape [4, 2]
    """
    num_boxes = dt_boxes.shape[0]
    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
    _boxes = list(sorted_boxes)

    for i in range(num_boxes - 1):
        if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
                (_boxes[i + 1][0][0] < _boxes[i][0][0]):
            tmp = _boxes[i]
            _boxes[i] = _boxes[i + 1]
            _boxes[i + 1] = tmp
    return _boxes

WenmuZhou's avatar
WenmuZhou committed
174

WenmuZhou's avatar
WenmuZhou committed
175
176
177
178
179
180
181
182
def to_excel(html_table, excel_path):
    from tablepyxl import tablepyxl
    tablepyxl.document_to_xl(html_table, excel_path)


def main(args):
    image_file_list = get_image_file_list(args.image_dir)
    image_file_list = image_file_list[args.process_id::args.total_process_num]
WenmuZhou's avatar
WenmuZhou committed
183
    os.makedirs(args.output, exist_ok=True)
WenmuZhou's avatar
WenmuZhou committed
184
185
186
187
188
189

    text_sys = TableSystem(args)
    img_num = len(image_file_list)
    for i, image_file in enumerate(image_file_list):
        logger.info("[{}/{}] {}".format(i, img_num, image_file))
        img, flag = check_and_read_gif(image_file)
WenmuZhou's avatar
WenmuZhou committed
190
        excel_path = os.path.join(args.table_output, os.path.basename(image_file).split('.')[0] + '.xlsx')
WenmuZhou's avatar
WenmuZhou committed
191
192
193
        if not flag:
            img = cv2.imread(image_file)
        if img is None:
WenmuZhou's avatar
WenmuZhou committed
194
            logger.error("error in loading image:{}".format(image_file))
WenmuZhou's avatar
WenmuZhou committed
195
196
197
198
199
200
201
202
203
204
205
206
            continue
        starttime = time.time()
        pred_html = text_sys(img)

        to_excel(pred_html, excel_path)
        logger.info('excel saved to {}'.format(excel_path))
        logger.info(pred_html)
        elapse = time.time() - starttime
        logger.info("Predict time : {:.3f}s".format(elapse))


if __name__ == "__main__":
WenmuZhou's avatar
WenmuZhou committed
207
    args = parse_args()
WenmuZhou's avatar
WenmuZhou committed
208
209
210
211
212
213
214
215
216
217
218
219
220
221
    if args.use_mp:
        p_list = []
        total_process_num = args.total_process_num
        for process_id in range(total_process_num):
            cmd = [sys.executable, "-u"] + sys.argv + [
                "--process_id={}".format(process_id),
                "--use_mp={}".format(False)
            ]
            p = subprocess.Popen(cmd, stdout=sys.stdout, stderr=sys.stdout)
            p_list.append(p)
        for p in p_list:
            p.wait()
    else:
        main(args)