paddleocr.py 16.6 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys

__dir__ = os.path.dirname(__file__)
sys.path.append(os.path.join(__dir__, ''))

import cv2
import numpy as np
from pathlib import Path
import tarfile
import requests
from tqdm import tqdm

from tools.infer import predict_system
WenmuZhou's avatar
WenmuZhou committed
29
from ppocr.utils.logging import get_logger
WenmuZhou's avatar
WenmuZhou committed
30

WenmuZhou's avatar
WenmuZhou committed
31
logger = get_logger()
32
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
WenmuZhou's avatar
WenmuZhou committed
33
34
35

__all__ = ['PaddleOCR']

WenmuZhou's avatar
WenmuZhou committed
36
37
model_urls = {
    'det':
WenmuZhou's avatar
WenmuZhou committed
38
    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
39
40
41
    'rec': {
        'ch': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
42
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
43
44
45
46
            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
        },
        'en': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
47
48
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/en_dict.txt'
WenmuZhou's avatar
WenmuZhou committed
49
50
51
        },
        'french': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
52
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
53
54
55
56
            'dict_path': './ppocr/utils/dict/french_dict.txt'
        },
        'german': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
57
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
58
59
60
61
            'dict_path': './ppocr/utils/dict/german_dict.txt'
        },
        'korean': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
62
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
63
64
65
66
            'dict_path': './ppocr/utils/dict/korean_dict.txt'
        },
        'japan': {
            'url':
WenmuZhou's avatar
WenmuZhou committed
67
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
68
            'dict_path': './ppocr/utils/dict/japan_dict.txt'
tink2123's avatar
tink2123 committed
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
        },
        'chinese_cht': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
        },
        'ta': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/ta_dict.txt'
        },
        'te': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/te_dict.txt'
        },
        'ka': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/ka_dict.txt'
        },
        'latin': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/latin_dict.txt'
        },
        'arabic': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/arabic_dict.txt'
        },
        'cyrillic': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
        },
        'devanagari': {
            'url':
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
WenmuZhou's avatar
WenmuZhou committed
109
110
111
        }
    },
    'cls':
WenmuZhou's avatar
WenmuZhou committed
112
    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
WenmuZhou's avatar
WenmuZhou committed
113
114
115
}

SUPPORT_DET_MODEL = ['DB']
WenmuZhou's avatar
WenmuZhou committed
116
VERSION = 2.0
117
118
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
WenmuZhou's avatar
WenmuZhou committed
119
120
121
122
123
124
125
126
127
128
129
130


def download_with_progressbar(url, save_path):
    response = requests.get(url, stream=True)
    total_size_in_bytes = int(response.headers.get('content-length', 0))
    block_size = 1024  # 1 Kibibyte
    progress_bar = tqdm(total=total_size_in_bytes, unit='iB', unit_scale=True)
    with open(save_path, 'wb') as file:
        for data in response.iter_content(block_size):
            progress_bar.update(len(data))
            file.write(data)
    progress_bar.close()
WenmuZhou's avatar
WenmuZhou committed
131
132
    if total_size_in_bytes == 0 or progress_bar.n != total_size_in_bytes:
        logger.error("Something went wrong while downloading models")
WenmuZhou's avatar
WenmuZhou committed
133
134
135
        sys.exit(0)


136
def maybe_download(model_storage_directory, url):
WenmuZhou's avatar
WenmuZhou committed
137
    # using custom model
WenmuZhou's avatar
WenmuZhou committed
138
139
140
141
142
143
144
    tar_file_name_list = [
        'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel'
    ]
    if not os.path.exists(
            os.path.join(model_storage_directory, 'inference.pdiparams')
    ) or not os.path.exists(
            os.path.join(model_storage_directory, 'inference.pdmodel')):
145
146
147
148
149
150
        tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
        print('download {} to {}'.format(url, tmp_path))
        os.makedirs(model_storage_directory, exist_ok=True)
        download_with_progressbar(url, tmp_path)
        with tarfile.open(tmp_path, 'r') as tarObj:
            for member in tarObj.getmembers():
WenmuZhou's avatar
WenmuZhou committed
151
152
153
154
155
                filename = None
                for tar_file_name in tar_file_name_list:
                    if tar_file_name in member.name:
                        filename = tar_file_name
                if filename is None:
156
157
158
159
160
161
162
                    continue
                file = tarObj.extractfile(member)
                with open(
                        os.path.join(model_storage_directory, filename),
                        'wb') as f:
                    f.write(file.read())
        os.remove(tmp_path)
WenmuZhou's avatar
WenmuZhou committed
163
164


WenmuZhou's avatar
WenmuZhou committed
165
def parse_args(mMain=True, add_help=True):
WenmuZhou's avatar
WenmuZhou committed
166
167
168
169
170
    import argparse

    def str2bool(v):
        return v.lower() in ("true", "t", "1")

WenmuZhou's avatar
WenmuZhou committed
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
    if mMain:
        parser = argparse.ArgumentParser(add_help=add_help)
        # params for prediction engine
        parser.add_argument("--use_gpu", type=str2bool, default=True)
        parser.add_argument("--ir_optim", type=str2bool, default=True)
        parser.add_argument("--use_tensorrt", type=str2bool, default=False)
        parser.add_argument("--gpu_mem", type=int, default=8000)

        # params for text detector
        parser.add_argument("--image_dir", type=str)
        parser.add_argument("--det_algorithm", type=str, default='DB')
        parser.add_argument("--det_model_dir", type=str, default=None)
        parser.add_argument("--det_limit_side_len", type=float, default=960)
        parser.add_argument("--det_limit_type", type=str, default='max')

        # DB parmas
        parser.add_argument("--det_db_thresh", type=float, default=0.3)
        parser.add_argument("--det_db_box_thresh", type=float, default=0.5)
LDOUBLEV's avatar
LDOUBLEV committed
189
190
        parser.add_argument("--det_db_unclip_ratio", type=float, default=1.6)
        parser.add_argument("--use_dilation", type=bool, default=False)
WenmuZhou's avatar
WenmuZhou committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224

        # EAST parmas
        parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
        parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
        parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)

        # params for text recognizer
        parser.add_argument("--rec_algorithm", type=str, default='CRNN')
        parser.add_argument("--rec_model_dir", type=str, default=None)
        parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
        parser.add_argument("--rec_char_type", type=str, default='ch')
        parser.add_argument("--rec_batch_num", type=int, default=30)
        parser.add_argument("--max_text_length", type=int, default=25)
        parser.add_argument("--rec_char_dict_path", type=str, default=None)
        parser.add_argument("--use_space_char", type=bool, default=True)
        parser.add_argument("--drop_score", type=float, default=0.5)

        # params for text classifier
        parser.add_argument("--cls_model_dir", type=str, default=None)
        parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
        parser.add_argument("--label_list", type=list, default=['0', '180'])
        parser.add_argument("--cls_batch_num", type=int, default=30)
        parser.add_argument("--cls_thresh", type=float, default=0.9)

        parser.add_argument("--enable_mkldnn", type=bool, default=False)
        parser.add_argument("--use_zero_copy_run", type=bool, default=False)
        parser.add_argument("--use_pdserving", type=str2bool, default=False)

        parser.add_argument("--lang", type=str, default='ch')
        parser.add_argument("--det", type=str2bool, default=True)
        parser.add_argument("--rec", type=str2bool, default=True)
        parser.add_argument("--use_angle_cls", type=str2bool, default=False)
        return parser.parse_args()
    else:
WenmuZhou's avatar
WenmuZhou committed
225
226
227
228
229
230
231
232
233
234
235
236
        return argparse.Namespace(
            use_gpu=True,
            ir_optim=True,
            use_tensorrt=False,
            gpu_mem=8000,
            image_dir='',
            det_algorithm='DB',
            det_model_dir=None,
            det_limit_side_len=960,
            det_limit_type='max',
            det_db_thresh=0.3,
            det_db_box_thresh=0.5,
LDOUBLEV's avatar
LDOUBLEV committed
237
238
            det_db_unclip_ratio=1.6,
            use_dilation=False,
WenmuZhou's avatar
WenmuZhou committed
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
            det_east_score_thresh=0.8,
            det_east_cover_thresh=0.1,
            det_east_nms_thresh=0.2,
            rec_algorithm='CRNN',
            rec_model_dir=None,
            rec_image_shape="3, 32, 320",
            rec_char_type='ch',
            rec_batch_num=30,
            max_text_length=25,
            rec_char_dict_path=None,
            use_space_char=True,
            drop_score=0.5,
            cls_model_dir=None,
            cls_image_shape="3, 48, 192",
            label_list=['0', '180'],
            cls_batch_num=30,
            cls_thresh=0.9,
            enable_mkldnn=False,
            use_zero_copy_run=False,
            use_pdserving=False,
            lang='ch',
            det=True,
            rec=True,
            use_angle_cls=False)
WenmuZhou's avatar
WenmuZhou committed
263
264
265


class PaddleOCR(predict_system.TextSystem):
266
    def __init__(self, **kwargs):
WenmuZhou's avatar
WenmuZhou committed
267
268
269
270
271
        """
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        """
WenmuZhou's avatar
WenmuZhou committed
272
        postprocess_params = parse_args(mMain=False, add_help=False)
273
        postprocess_params.__dict__.update(**kwargs)
WenmuZhou's avatar
WenmuZhou committed
274
275
        self.use_angle_cls = postprocess_params.use_angle_cls
        lang = postprocess_params.lang
tink2123's avatar
tink2123 committed
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
        latin_lang = [
            'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'en', 'es', 'et', 'fr',
            'ga', 'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi',
            'ms', 'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin',
            'sk', 'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
        ]
        arabic_lang = ['ar', 'fa', 'ug', 'ur']
        cyrillic_lang = [
            'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd',
            'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
        ]
        devanagari_lang = [
            'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new',
            'gom', 'sa', 'bgc'
        ]
        if lang in latin_lang:
            lang = "latin"
        elif lang in arabic_lang:
            lang = "arabic"
        elif lang in cyrillic_lang:
            lang = "cyrillic"
        elif lang in devanagari_lang:
            lang = "devanagari"
WenmuZhou's avatar
WenmuZhou committed
299
300
        assert lang in model_urls[
            'rec'], 'param lang must in {}, but got {}'.format(
WenmuZhou's avatar
WenmuZhou committed
301
                model_urls['rec'].keys(), lang)
WenmuZhou's avatar
WenmuZhou committed
302
        use_inner_dict = False
WenmuZhou's avatar
WenmuZhou committed
303
        if postprocess_params.rec_char_dict_path is None:
WenmuZhou's avatar
WenmuZhou committed
304
            use_inner_dict = True
WenmuZhou's avatar
WenmuZhou committed
305
306
            postprocess_params.rec_char_dict_path = model_urls['rec'][lang][
                'dict_path']
WenmuZhou's avatar
WenmuZhou committed
307

308
309
        # init model dir
        if postprocess_params.det_model_dir is None:
WenmuZhou's avatar
WenmuZhou committed
310
311
            postprocess_params.det_model_dir = os.path.join(
                BASE_DIR, '{}/det'.format(VERSION))
312
        if postprocess_params.rec_model_dir is None:
WenmuZhou's avatar
WenmuZhou committed
313
            postprocess_params.rec_model_dir = os.path.join(
WenmuZhou's avatar
WenmuZhou committed
314
                BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
WenmuZhou's avatar
WenmuZhou committed
315
        if postprocess_params.cls_model_dir is None:
WenmuZhou's avatar
WenmuZhou committed
316
317
            postprocess_params.cls_model_dir = os.path.join(
                BASE_DIR, '{}/cls'.format(VERSION))
318
        print(postprocess_params)
WenmuZhou's avatar
WenmuZhou committed
319
        # download model
WenmuZhou's avatar
WenmuZhou committed
320
321
322
323
        maybe_download(postprocess_params.det_model_dir, model_urls['det'])
        maybe_download(postprocess_params.rec_model_dir,
                       model_urls['rec'][lang]['url'])
        maybe_download(postprocess_params.cls_model_dir, model_urls['cls'])
WenmuZhou's avatar
WenmuZhou committed
324
325
326
327
328
329
330

        if postprocess_params.det_algorithm not in SUPPORT_DET_MODEL:
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
            sys.exit(0)
        if postprocess_params.rec_algorithm not in SUPPORT_REC_MODEL:
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
WenmuZhou's avatar
WenmuZhou committed
331
332
333
        if use_inner_dict:
            postprocess_params.rec_char_dict_path = str(
                Path(__file__).parent / postprocess_params.rec_char_dict_path)
WenmuZhou's avatar
WenmuZhou committed
334
335
336
337

        # init det_model and rec_model
        super().__init__(postprocess_params)

WenmuZhou's avatar
WenmuZhou committed
338
    def ocr(self, img, det=True, rec=True, cls=False):
WenmuZhou's avatar
WenmuZhou committed
339
340
341
342
343
344
345
346
        """
        ocr with paddleocr
        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not, if false, only rec will be exec. default is True
            rec: use text recognition or not, if false, only det will be exec. default is True
        """
        assert isinstance(img, (np.ndarray, list, str))
WenmuZhou's avatar
WenmuZhou committed
347
348
349
        if isinstance(img, list) and det == True:
            logger.error('When input a list of images, det must be false')
            exit(0)
WenmuZhou's avatar
WenmuZhou committed
350
351
352
353
354
355
        if cls == False:
            self.use_angle_cls = False
        elif cls == True and self.use_angle_cls == False:
            logger.warning(
                'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
            )
WenmuZhou's avatar
WenmuZhou committed
356

WenmuZhou's avatar
WenmuZhou committed
357
        if isinstance(img, str):
WenmuZhou's avatar
WenmuZhou committed
358
359
360
361
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
WenmuZhou's avatar
WenmuZhou committed
362
363
364
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
365
366
367
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
WenmuZhou's avatar
WenmuZhou committed
368
369
370
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
WenmuZhou's avatar
WenmuZhou committed
371
372
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
WenmuZhou's avatar
WenmuZhou committed
373
374
375
376
377
378
379
380
381
382
383
        if det and rec:
            dt_boxes, rec_res = self.__call__(img)
            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
        elif det and not rec:
            dt_boxes, elapse = self.text_detector(img)
            if dt_boxes is None:
                return None
            return [box.tolist() for box in dt_boxes]
        else:
            if not isinstance(img, list):
                img = [img]
WenmuZhou's avatar
WenmuZhou committed
384
385
386
387
            if self.use_angle_cls:
                img, cls_res, elapse = self.text_classifier(img)
                if not rec:
                    return cls_res
WenmuZhou's avatar
WenmuZhou committed
388
389
            rec_res, elapse = self.text_recognizer(img)
            return rec_res
390
391
392


def main():
WenmuZhou's avatar
WenmuZhou committed
393
394
395
396
397
398
399
400
    # for cmd
    args = parse_args(mMain=True)
    image_dir = args.image_dir
    if image_dir.startswith('http'):
        download_with_progressbar(image_dir, 'tmp.jpg')
        image_file_list = ['tmp.jpg']
    else:
        image_file_list = get_image_file_list(args.image_dir)
401
402
403
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
WenmuZhou's avatar
WenmuZhou committed
404
405

    ocr_engine = PaddleOCR(**(args.__dict__))
406
    for img_path in image_file_list:
WenmuZhou's avatar
WenmuZhou committed
407
408
409
410
411
412
413
414
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
        result = ocr_engine.ocr(img_path,
                                det=args.det,
                                rec=args.rec,
                                cls=args.use_angle_cls)
        if result is not None:
            for line in result:
                logger.info(line)