test_hubserving.py 5.7 KB
Newer Older
dyning's avatar
dyning committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))

MissPenguin's avatar
MissPenguin committed
20
21
22
from ppocr.utils.logging import get_logger
logger = get_logger()

dyning's avatar
dyning committed
23
24
25
26
27
import cv2
import numpy as np
import time
from PIL import Image
from ppocr.utils.utility import get_image_file_list
WenmuZhou's avatar
WenmuZhou committed
28
29
from tools.infer.utility import draw_ocr, draw_boxes, str2bool
from ppstructure.utility import draw_structure_result
WenmuZhou's avatar
WenmuZhou committed
30
from ppstructure.predict_system import to_excel
dyning's avatar
dyning committed
31
32
33
34

import requests
import json
import base64
dyning's avatar
dyning committed
35

dyning's avatar
dyning committed
36
37
38
39

def cv2_to_base64(image):
    return base64.b64encode(image).decode('utf8')

dyning's avatar
dyning committed
40
41
42
43
44
45
46

def draw_server_result(image_file, res):
    img = cv2.imread(image_file)
    image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    if len(res) == 0:
        return np.array(image)
    keys = res[0].keys()
littletomatodonkey's avatar
littletomatodonkey committed
47
48
    if 'text_region' not in keys:  # for ocr_rec, draw function is invalid 
        logger.info("draw function is invalid for ocr_rec!")
dyning's avatar
dyning committed
49
        return None
littletomatodonkey's avatar
littletomatodonkey committed
50
51
    elif 'text' not in keys:  # for ocr_det
        logger.info("draw text boxes only!")
dyning's avatar
dyning committed
52
53
54
55
56
57
        boxes = []
        for dno in range(len(res)):
            boxes.append(res[dno]['text_region'])
        boxes = np.array(boxes)
        draw_img = draw_boxes(image, boxes)
        return draw_img
littletomatodonkey's avatar
littletomatodonkey committed
58
59
    else:  # for ocr_system
        logger.info("draw boxes and texts!")
dyning's avatar
dyning committed
60
61
62
63
64
65
66
67
68
        boxes = []
        texts = []
        scores = []
        for dno in range(len(res)):
            boxes.append(res[dno]['text_region'])
            texts.append(res[dno]['text'])
            scores.append(res[dno]['confidence'])
        boxes = np.array(boxes)
        scores = np.array(scores)
littletomatodonkey's avatar
littletomatodonkey committed
69
70
        draw_img = draw_ocr(
            image, boxes, texts, scores, draw_txt=True, drop_score=0.5)
dyning's avatar
dyning committed
71
72
73
        return draw_img


WenmuZhou's avatar
WenmuZhou committed
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def save_structure_res(res, save_folder, image_file):
    img = cv2.imread(image_file)
    excel_save_folder = os.path.join(save_folder, os.path.basename(image_file))
    os.makedirs(excel_save_folder, exist_ok=True)
    # save res
    with open(
            os.path.join(excel_save_folder, 'res.txt'), 'w',
            encoding='utf8') as f:
        for region in res:
            if region['type'] == 'Table':
                excel_path = os.path.join(excel_save_folder,
                                          '{}.xlsx'.format(region['bbox']))
                to_excel(region['res'], excel_path)
            elif region['type'] == 'Figure':
                x1, y1, x2, y2 = region['bbox']
                print(region['bbox'])
                roi_img = img[y1:y2, x1:x2, :]
                img_path = os.path.join(excel_save_folder,
                                        '{}.jpg'.format(region['bbox']))
                cv2.imwrite(img_path, roi_img)
            else:
                for text_result in region['res']:
                    f.write('{}\n'.format(json.dumps(text_result)))


WenmuZhou's avatar
WenmuZhou committed
99
100
def main(args):
    image_file_list = get_image_file_list(args.image_dir)
dyning's avatar
dyning committed
101
102
103
104
105
106
107
108
109
    is_visualize = False
    headers = {"Content-type": "application/json"}
    cnt = 0
    total_time = 0
    for image_file in image_file_list:
        img = open(image_file, 'rb').read()
        if img is None:
            logger.info("error in loading image:{}".format(image_file))
            continue
WenmuZhou's avatar
WenmuZhou committed
110
        img_name = os.path.basename(image_file)
dyning's avatar
dyning committed
111
112
        # 发送HTTP请求
        starttime = time.time()
littletomatodonkey's avatar
littletomatodonkey committed
113
        data = {'images': [cv2_to_base64(img)]}
WenmuZhou's avatar
WenmuZhou committed
114
115
        r = requests.post(
            url=args.server_url, headers=headers, data=json.dumps(data))
dyning's avatar
dyning committed
116
117
        elapse = time.time() - starttime
        total_time += elapse
littletomatodonkey's avatar
littletomatodonkey committed
118
        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))
dyning's avatar
dyning committed
119
        res = r.json()["results"][0]
littletomatodonkey's avatar
littletomatodonkey committed
120
        logger.info(res)
dyning's avatar
dyning committed
121

WenmuZhou's avatar
WenmuZhou committed
122
123
124
        if args.visualize:
            draw_img = None
            if 'structure_table' in args.server_url:
WenmuZhou's avatar
WenmuZhou committed
125
                to_excel(res['html'], './{}.xlsx'.format(img_name))
WenmuZhou's avatar
WenmuZhou committed
126
            elif 'structure_system' in args.server_url:
WenmuZhou's avatar
WenmuZhou committed
127
                save_structure_res(res['regions'], args.output, image_file)
WenmuZhou's avatar
WenmuZhou committed
128
129
            else:
                draw_img = draw_server_result(image_file, res)
dyning's avatar
dyning committed
130
            if draw_img is not None:
WenmuZhou's avatar
WenmuZhou committed
131
132
                if not os.path.exists(args.output):
                    os.makedirs(args.output)
dyning's avatar
dyning committed
133
                cv2.imwrite(
WenmuZhou's avatar
WenmuZhou committed
134
                    os.path.join(args.output, os.path.basename(image_file)),
dyning's avatar
dyning committed
135
                    draw_img[:, :, ::-1])
littletomatodonkey's avatar
littletomatodonkey committed
136
                logger.info("The visualized image saved in {}".format(
WenmuZhou's avatar
WenmuZhou committed
137
                    os.path.join(args.output, os.path.basename(image_file))))
dyning's avatar
dyning committed
138
139
        cnt += 1
        if cnt % 100 == 0:
littletomatodonkey's avatar
littletomatodonkey committed
140
141
            logger.info("{} processed".format(cnt))
    logger.info("avg time cost: {}".format(float(total_time) / cnt))
dyning's avatar
dyning committed
142

littletomatodonkey's avatar
littletomatodonkey committed
143

WenmuZhou's avatar
WenmuZhou committed
144
145
146
147
148
149
def parse_args():
    import argparse
    parser = argparse.ArgumentParser(description="args for hub serving")
    parser.add_argument("--server_url", type=str, required=True)
    parser.add_argument("--image_dir", type=str, required=True)
    parser.add_argument("--visualize", type=str2bool, default=False)
WenmuZhou's avatar
WenmuZhou committed
150
    parser.add_argument("--output", type=str, default='./hubserving_result')
WenmuZhou's avatar
WenmuZhou committed
151
152
153
154
    args = parser.parse_args()
    return args


littletomatodonkey's avatar
littletomatodonkey committed
155
if __name__ == '__main__':
WenmuZhou's avatar
WenmuZhou committed
156
157
    args = parse_args()
    main(args)