paddleocr.py 16 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys

__dir__ = os.path.dirname(__file__)
sys.path.append(os.path.join(__dir__, ''))

import cv2
WenmuZhou's avatar
WenmuZhou committed
22
import logging
WenmuZhou's avatar
WenmuZhou committed
23
24
25
26
import numpy as np
from pathlib import Path

from tools.infer import predict_system
WenmuZhou's avatar
WenmuZhou committed
27
from ppocr.utils.logging import get_logger
WenmuZhou's avatar
WenmuZhou committed
28

WenmuZhou's avatar
WenmuZhou committed
29
logger = get_logger()
30
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
31
from ppocr.utils.network import maybe_download, download_with_progressbar, is_link, confirm_model_dir_url
32
33
34
from tools.infer.utility import draw_ocr, str2bool
from ppstructure.utility import init_args, draw_structure_result
from ppstructure.predict_system import OCRSystem, save_structure_res
WenmuZhou's avatar
WenmuZhou committed
35

36
__all__ = ['PaddleOCR','PPStructure','draw_ocr','draw_structure_result','save_structure_res']
WenmuZhou's avatar
WenmuZhou committed
37

WenmuZhou's avatar
WenmuZhou committed
38
model_urls = {
tink2123's avatar
tink2123 committed
39
40
    'det': {
        'ch':
WenmuZhou's avatar
WenmuZhou committed
41
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
tink2123's avatar
tink2123 committed
42
        'en':
43
44
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar',
        'structure': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar'
tink2123's avatar
tink2123 committed
45
    },
WenmuZhou's avatar
WenmuZhou committed
46
47
48
    'rec': {
        'ch': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
49
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
50
51
52
53
            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
        },
        'en': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
54
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
55
            'dict_path': './ppocr/utils/en_dict.txt'
WenmuZhou's avatar
WenmuZhou committed
56
57
58
        },
        'french': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
59
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
60
61
62
63
            'dict_path': './ppocr/utils/dict/french_dict.txt'
        },
        'german': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
64
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
65
66
67
68
            'dict_path': './ppocr/utils/dict/german_dict.txt'
        },
        'korean': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
69
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
70
71
72
73
            'dict_path': './ppocr/utils/dict/korean_dict.txt'
        },
        'japan': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
74
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
75
            'dict_path': './ppocr/utils/dict/japan_dict.txt'
tink2123's avatar
tink2123 committed
76
77
78
        },
        'chinese_cht': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
79
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
80
81
82
83
            'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
        },
        'ta': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
84
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
85
86
87
88
            'dict_path': './ppocr/utils/dict/ta_dict.txt'
        },
        'te': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
89
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
90
91
92
93
            'dict_path': './ppocr/utils/dict/te_dict.txt'
        },
        'ka': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
94
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
95
96
97
98
            'dict_path': './ppocr/utils/dict/ka_dict.txt'
        },
        'latin': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
99
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
100
101
102
103
            'dict_path': './ppocr/utils/dict/latin_dict.txt'
        },
        'arabic': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
104
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
105
106
107
108
            'dict_path': './ppocr/utils/dict/arabic_dict.txt'
        },
        'cyrillic': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
109
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
110
111
112
113
            'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
        },
        'devanagari': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
114
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
115
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
116
117
118
119
        },
        'structure': {
            'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tar',
            'dict_path': 'ppocr/utils/dict/table_dict.txt'
WenmuZhou's avatar
WenmuZhou committed
120
121
        }
    },
122
123
124
125
126
    'cls': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar',
    'table': {
        'url': 'https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tar',
        'dict_path': 'ppocr/utils/dict/table_structure_dict.txt'
    }
WenmuZhou's avatar
WenmuZhou committed
127
128
129
}

SUPPORT_DET_MODEL = ['DB']
tink2123's avatar
tink2123 committed
130
VERSION = '2.1'
131
132
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
WenmuZhou's avatar
WenmuZhou committed
133
134


WenmuZhou's avatar
WenmuZhou committed
135
def parse_args(mMain=True):
WenmuZhou's avatar
WenmuZhou committed
136
    import argparse
WenmuZhou's avatar
WenmuZhou committed
137
138
139
140
141
    parser = init_args()
    parser.add_help = mMain
    parser.add_argument("--lang", type=str, default='ch')
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)
142
    parser.add_argument("--type", type=str, default='ocr')
WenmuZhou's avatar
WenmuZhou committed
143
144

    for action in parser._actions:
145
        if action.dest in ['rec_char_dict_path', 'table_char_dict_path']:
WenmuZhou's avatar
WenmuZhou committed
146
            action.default = None
WenmuZhou's avatar
WenmuZhou committed
147
    if mMain:
WenmuZhou's avatar
WenmuZhou committed
148
        return parser.parse_args()
WenmuZhou's avatar
WenmuZhou committed
149
    else:
150
        inference_args_dict = {}
WenmuZhou's avatar
WenmuZhou committed
151
152
        for action in parser._actions:
            inference_args_dict[action.dest] = action.default
153
        return argparse.Namespace(**inference_args_dict)
WenmuZhou's avatar
WenmuZhou committed
154
155
156


class PaddleOCR(predict_system.TextSystem):
157
    def __init__(self, **kwargs):
WenmuZhou's avatar
WenmuZhou committed
158
159
160
161
162
        """
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        """
WenmuZhou's avatar
WenmuZhou committed
163
164
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
WenmuZhou's avatar
WenmuZhou committed
165
166
        if not params.show_log:
            logger.setLevel(logging.INFO)
WenmuZhou's avatar
WenmuZhou committed
167
168
        self.use_angle_cls = params.use_angle_cls
        lang = params.lang
tink2123's avatar
tink2123 committed
169
        latin_lang = [
tink2123's avatar
tink2123 committed
170
171
172
173
            'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
            'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
            'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk',
            'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
tink2123's avatar
tink2123 committed
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
        ]
        arabic_lang = ['ar', 'fa', 'ug', 'ur']
        cyrillic_lang = [
            'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd',
            'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
        ]
        devanagari_lang = [
            'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new',
            'gom', 'sa', 'bgc'
        ]
        if lang in latin_lang:
            lang = "latin"
        elif lang in arabic_lang:
            lang = "arabic"
        elif lang in cyrillic_lang:
            lang = "cyrillic"
        elif lang in devanagari_lang:
            lang = "devanagari"
WenmuZhou's avatar
WenmuZhou committed
192
193
        assert lang in model_urls[
            'rec'], 'param lang must in {}, but got {}'.format(
WenmuZhou's avatar
WenmuZhou committed
194
            model_urls['rec'].keys(), lang)
tink2123's avatar
tink2123 committed
195
196
197
198
        if lang == "ch":
            det_lang = "ch"
        else:
            det_lang = "en"
WenmuZhou's avatar
WenmuZhou committed
199
        use_inner_dict = False
WenmuZhou's avatar
WenmuZhou committed
200
        if params.rec_char_dict_path is None:
WenmuZhou's avatar
WenmuZhou committed
201
            use_inner_dict = True
WenmuZhou's avatar
WenmuZhou committed
202
            params.rec_char_dict_path = model_urls['rec'][lang][
WenmuZhou's avatar
WenmuZhou committed
203
                'dict_path']
WenmuZhou's avatar
WenmuZhou committed
204

205
        # init model dir
206
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
207
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'det', det_lang),
208
209
                                                              model_urls['det'][det_lang])
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
210
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'rec', lang),
211
212
                                                              model_urls['rec'][lang]['url'])
        params.cls_model_dir, cls_url = confirm_model_dir_url(params.cls_model_dir,
213
                                                              os.path.join(BASE_DIR, VERSION, 'ocr', 'cls'),
214
                                                              model_urls['cls'])
WenmuZhou's avatar
WenmuZhou committed
215
        # download model
216
217
218
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.cls_model_dir, cls_url)
WenmuZhou's avatar
WenmuZhou committed
219

WenmuZhou's avatar
WenmuZhou committed
220
        if params.det_algorithm not in SUPPORT_DET_MODEL:
WenmuZhou's avatar
WenmuZhou committed
221
222
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
            sys.exit(0)
WenmuZhou's avatar
WenmuZhou committed
223
        if params.rec_algorithm not in SUPPORT_REC_MODEL:
WenmuZhou's avatar
WenmuZhou committed
224
225
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
WenmuZhou's avatar
WenmuZhou committed
226
        if use_inner_dict:
WenmuZhou's avatar
WenmuZhou committed
227
228
            params.rec_char_dict_path = str(
                Path(__file__).parent / params.rec_char_dict_path)
WenmuZhou's avatar
WenmuZhou committed
229

WenmuZhou's avatar
WenmuZhou committed
230
        print(params)
WenmuZhou's avatar
WenmuZhou committed
231
        # init det_model and rec_model
WenmuZhou's avatar
WenmuZhou committed
232
        super().__init__(params)
WenmuZhou's avatar
WenmuZhou committed
233

234
    def ocr(self, img, det=True, rec=True, cls=True):
WenmuZhou's avatar
WenmuZhou committed
235
236
237
238
239
240
241
242
        """
        ocr with paddleocr
        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not, if false, only rec will be exec. default is True
            rec: use text recognition or not, if false, only det will be exec. default is True
        """
        assert isinstance(img, (np.ndarray, list, str))
WenmuZhou's avatar
WenmuZhou committed
243
244
245
        if isinstance(img, list) and det == True:
            logger.error('When input a list of images, det must be false')
            exit(0)
246
        if cls == True and self.use_angle_cls == False:
WenmuZhou's avatar
WenmuZhou committed
247
248
249
            logger.warning(
                'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
            )
WenmuZhou's avatar
WenmuZhou committed
250

WenmuZhou's avatar
WenmuZhou committed
251
        if isinstance(img, str):
WenmuZhou's avatar
WenmuZhou committed
252
253
254
255
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
WenmuZhou's avatar
WenmuZhou committed
256
257
258
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
259
260
261
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
WenmuZhou's avatar
WenmuZhou committed
262
263
264
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
WenmuZhou's avatar
WenmuZhou committed
265
266
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
WenmuZhou's avatar
WenmuZhou committed
267
        if det and rec:
268
            dt_boxes, rec_res = self.__call__(img, cls)
WenmuZhou's avatar
WenmuZhou committed
269
270
271
272
273
274
275
276
277
            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
        elif det and not rec:
            dt_boxes, elapse = self.text_detector(img)
            if dt_boxes is None:
                return None
            return [box.tolist() for box in dt_boxes]
        else:
            if not isinstance(img, list):
                img = [img]
278
            if self.use_angle_cls and cls:
WenmuZhou's avatar
WenmuZhou committed
279
280
281
                img, cls_res, elapse = self.text_classifier(img)
                if not rec:
                    return cls_res
WenmuZhou's avatar
WenmuZhou committed
282
283
            rec_res, elapse = self.text_recognizer(img)
            return rec_res
284
285


286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
class PPStructure(OCRSystem):
    def __init__(self, **kwargs):
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
        if not params.show_log:
            logger.setLevel(logging.INFO)
        params.use_angle_cls = False
        # init model dir
        params.det_model_dir, det_url = confirm_model_dir_url(params.det_model_dir,
                                                              os.path.join(BASE_DIR, VERSION, 'structure', 'det'),
                                                              model_urls['det']['structure'])
        params.rec_model_dir, rec_url = confirm_model_dir_url(params.rec_model_dir,
                                                              os.path.join(BASE_DIR, VERSION, 'structure', 'rec'),
                                                              model_urls['rec']['structure']['url'])
        params.table_model_dir, table_url = confirm_model_dir_url(params.table_model_dir,
                                                                  os.path.join(BASE_DIR, VERSION, 'structure', 'table'),
                                                                  model_urls['table']['url'])
        # download model
        maybe_download(params.det_model_dir, det_url)
        maybe_download(params.rec_model_dir, rec_url)
        maybe_download(params.table_model_dir, table_url)

        if params.rec_char_dict_path is None:
            params.rec_char_type = 'EN'
            if os.path.exists(str(Path(__file__).parent / model_urls['rec']['structure']['dict_path'])):
                params.rec_char_dict_path = str(Path(__file__).parent / model_urls['rec']['structure']['dict_path'])
            else:
                params.rec_char_dict_path = str(Path(__file__).parent.parent / model_urls['rec']['structure']['dict_path'])
        if params.table_char_dict_path is None:
            if os.path.exists(str(Path(__file__).parent / model_urls['table']['dict_path'])):
                params.table_char_dict_path = str(Path(__file__).parent / model_urls['table']['dict_path'])
            else:
                params.table_char_dict_path = str(Path(__file__).parent.parent / model_urls['table']['dict_path'])

        print(params)
        super().__init__(params)

    def __call__(self, img):
        if isinstance(img, str):
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)

        res = super().__call__(img)
        return res


345
def main():
WenmuZhou's avatar
WenmuZhou committed
346
    # for cmd
WenmuZhou's avatar
WenmuZhou committed
347
    args = parse_args(mMain=True)
WenmuZhou's avatar
WenmuZhou committed
348
    image_dir = args.image_dir
349
    if is_link(image_dir):
WenmuZhou's avatar
WenmuZhou committed
350
351
352
353
        download_with_progressbar(image_dir, 'tmp.jpg')
        image_file_list = ['tmp.jpg']
    else:
        image_file_list = get_image_file_list(args.image_dir)
354
355
356
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
357
358
359
360
361
362
    if args.type=='ocr':
        engine = PaddleOCR(**(args.__dict__))
    elif args.type=='structure':
        engine = PPStructure(**(args.__dict__))
    else:
        raise NotImplementedError
WenmuZhou's avatar
WenmuZhou committed
363

364
    for img_path in image_file_list:
365
        img_name = os.path.basename(img_path).split('.')[0]
WenmuZhou's avatar
WenmuZhou committed
366
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
367
368
369
370
371
372
373
374
375
376
377
378
379
        if args.type == 'ocr':
            result = engine.ocr(img_path,
                                    det=args.det,
                                    rec=args.rec,
                                    cls=args.use_angle_cls)
            if result is not None:
                for line in result:
                    logger.info(line)
        elif args.type == 'structure':
            result = engine(img_path)
            for item in result:
                logger.info(item['res'])
            save_structure_res(result, args.output, img_name)