paddleocr.py 11.3 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys

__dir__ = os.path.dirname(__file__)
sys.path.append(os.path.join(__dir__, ''))

import cv2
import numpy as np
from pathlib import Path

from tools.infer import predict_system
WenmuZhou's avatar
WenmuZhou committed
26
from ppocr.utils.logging import get_logger
WenmuZhou's avatar
WenmuZhou committed
27

WenmuZhou's avatar
WenmuZhou committed
28
logger = get_logger()
29
from ppocr.utils.utility import check_and_read_gif, get_image_file_list
30
from ppocr.utils.network import maybe_download, download_with_progressbar
WenmuZhou's avatar
WenmuZhou committed
31
from tools.infer.utility import draw_ocr, init_args, str2bool
WenmuZhou's avatar
WenmuZhou committed
32
33
34

__all__ = ['PaddleOCR']

WenmuZhou's avatar
WenmuZhou committed
35
model_urls = {
tink2123's avatar
tink2123 committed
36
37
    'det': {
        'ch':
38
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
tink2123's avatar
tink2123 committed
39
        'en':
40
            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tar'
tink2123's avatar
tink2123 committed
41
    },
WenmuZhou's avatar
WenmuZhou committed
42
43
44
    'rec': {
        'ch': {
            'url':
45
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
46
47
48
49
            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
        },
        'en': {
            'url':
50
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
51
            'dict_path': './ppocr/utils/en_dict.txt'
WenmuZhou's avatar
WenmuZhou committed
52
53
54
        },
        'french': {
            'url':
55
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
56
57
58
59
            'dict_path': './ppocr/utils/dict/french_dict.txt'
        },
        'german': {
            'url':
60
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
61
62
63
64
            'dict_path': './ppocr/utils/dict/german_dict.txt'
        },
        'korean': {
            'url':
65
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
66
67
68
69
            'dict_path': './ppocr/utils/dict/korean_dict.txt'
        },
        'japan': {
            'url':
70
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
WenmuZhou's avatar
WenmuZhou committed
71
            'dict_path': './ppocr/utils/dict/japan_dict.txt'
tink2123's avatar
tink2123 committed
72
73
74
        },
        'chinese_cht': {
            'url':
75
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
76
77
78
79
            'dict_path': './ppocr/utils/dict/chinese_cht_dict.txt'
        },
        'ta': {
            'url':
80
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
81
82
83
84
            'dict_path': './ppocr/utils/dict/ta_dict.txt'
        },
        'te': {
            'url':
85
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
86
87
88
89
            'dict_path': './ppocr/utils/dict/te_dict.txt'
        },
        'ka': {
            'url':
90
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
91
92
93
94
            'dict_path': './ppocr/utils/dict/ka_dict.txt'
        },
        'latin': {
            'url':
95
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
96
97
98
99
            'dict_path': './ppocr/utils/dict/latin_dict.txt'
        },
        'arabic': {
            'url':
100
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
101
102
103
104
            'dict_path': './ppocr/utils/dict/arabic_dict.txt'
        },
        'cyrillic': {
            'url':
105
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
106
107
108
109
            'dict_path': './ppocr/utils/dict/cyrillic_dict.txt'
        },
        'devanagari': {
            'url':
110
                'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tar',
tink2123's avatar
tink2123 committed
111
            'dict_path': './ppocr/utils/dict/devanagari_dict.txt'
WenmuZhou's avatar
WenmuZhou committed
112
113
114
        }
    },
    'cls':
115
        'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
WenmuZhou's avatar
WenmuZhou committed
116
117
118
}

SUPPORT_DET_MODEL = ['DB']
tink2123's avatar
tink2123 committed
119
VERSION = '2.1'
120
121
SUPPORT_REC_MODEL = ['CRNN']
BASE_DIR = os.path.expanduser("~/.paddleocr/")
WenmuZhou's avatar
WenmuZhou committed
122
123


WenmuZhou's avatar
WenmuZhou committed
124
def parse_args(mMain=True):
WenmuZhou's avatar
WenmuZhou committed
125
    import argparse
WenmuZhou's avatar
WenmuZhou committed
126
127
128
129
130
131
132
133
134
    parser = init_args()
    parser.add_help = mMain
    parser.add_argument("--lang", type=str, default='ch')
    parser.add_argument("--det", type=str2bool, default=True)
    parser.add_argument("--rec", type=str2bool, default=True)

    for action in parser._actions:
        if action.dest == 'rec_char_dict_path':
            action.default = None
WenmuZhou's avatar
WenmuZhou committed
135
    if mMain:
WenmuZhou's avatar
WenmuZhou committed
136
        return parser.parse_args()
WenmuZhou's avatar
WenmuZhou committed
137
    else:
138
        inference_args_dict = {}
WenmuZhou's avatar
WenmuZhou committed
139
140
        for action in parser._actions:
            inference_args_dict[action.dest] = action.default
141
        return argparse.Namespace(**inference_args_dict)
WenmuZhou's avatar
WenmuZhou committed
142
143
144


class PaddleOCR(predict_system.TextSystem):
145
    def __init__(self, **kwargs):
WenmuZhou's avatar
WenmuZhou committed
146
147
148
149
150
        """
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        """
151
152
153
154
        params = parse_args(mMain=False)
        params.__dict__.update(**kwargs)
        self.use_angle_cls = params.use_angle_cls
        lang = params.lang
tink2123's avatar
tink2123 committed
155
        latin_lang = [
tink2123's avatar
tink2123 committed
156
157
158
159
            'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga',
            'hr', 'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms',
            'mt', 'nl', 'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk',
            'sl', 'sq', 'sv', 'sw', 'tl', 'tr', 'uz', 'vi'
tink2123's avatar
tink2123 committed
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
        ]
        arabic_lang = ['ar', 'fa', 'ug', 'ur']
        cyrillic_lang = [
            'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd',
            'ava', 'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
        ]
        devanagari_lang = [
            'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new',
            'gom', 'sa', 'bgc'
        ]
        if lang in latin_lang:
            lang = "latin"
        elif lang in arabic_lang:
            lang = "arabic"
        elif lang in cyrillic_lang:
            lang = "cyrillic"
        elif lang in devanagari_lang:
            lang = "devanagari"
WenmuZhou's avatar
WenmuZhou committed
178
179
        assert lang in model_urls[
            'rec'], 'param lang must in {}, but got {}'.format(
180
            model_urls['rec'].keys(), lang)
tink2123's avatar
tink2123 committed
181
182
183
184
        if lang == "ch":
            det_lang = "ch"
        else:
            det_lang = "en"
WenmuZhou's avatar
WenmuZhou committed
185
        use_inner_dict = False
186
        if params.rec_char_dict_path is None:
WenmuZhou's avatar
WenmuZhou committed
187
            use_inner_dict = True
188
            params.rec_char_dict_path = model_urls['rec'][lang][
WenmuZhou's avatar
WenmuZhou committed
189
                'dict_path']
WenmuZhou's avatar
WenmuZhou committed
190

191
        # init model dir
192
193
        if params.det_model_dir is None:
            params.det_model_dir = os.path.join(BASE_DIR, VERSION,
tink2123's avatar
tink2123 committed
194
                                                            'det', det_lang)
195
196
        if params.rec_model_dir is None:
            params.rec_model_dir = os.path.join(BASE_DIR, VERSION,
tink2123's avatar
tink2123 committed
197
                                                            'rec', lang)
198
199
        if params.cls_model_dir is None:
            params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
WenmuZhou's avatar
WenmuZhou committed
200
        # download model
201
        maybe_download(params.det_model_dir,
tink2123's avatar
tink2123 committed
202
                       model_urls['det'][det_lang])
203
        maybe_download(params.rec_model_dir,
WenmuZhou's avatar
WenmuZhou committed
204
                       model_urls['rec'][lang]['url'])
205
        maybe_download(params.cls_model_dir, model_urls['cls'])
WenmuZhou's avatar
WenmuZhou committed
206

207
        if params.det_algorithm not in SUPPORT_DET_MODEL:
WenmuZhou's avatar
WenmuZhou committed
208
209
            logger.error('det_algorithm must in {}'.format(SUPPORT_DET_MODEL))
            sys.exit(0)
210
        if params.rec_algorithm not in SUPPORT_REC_MODEL:
WenmuZhou's avatar
WenmuZhou committed
211
212
            logger.error('rec_algorithm must in {}'.format(SUPPORT_REC_MODEL))
            sys.exit(0)
WenmuZhou's avatar
WenmuZhou committed
213
        if use_inner_dict:
214
215
            params.rec_char_dict_path = str(
                Path(__file__).parent / params.rec_char_dict_path)
WenmuZhou's avatar
WenmuZhou committed
216

217
        print(params)
WenmuZhou's avatar
WenmuZhou committed
218
        # init det_model and rec_model
219
        super().__init__(params)
WenmuZhou's avatar
WenmuZhou committed
220

221
    def ocr(self, img, det=True, rec=True, cls=True):
WenmuZhou's avatar
WenmuZhou committed
222
223
224
225
226
227
228
229
        """
        ocr with paddleocr
        args:
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not, if false, only rec will be exec. default is True
            rec: use text recognition or not, if false, only det will be exec. default is True
        """
        assert isinstance(img, (np.ndarray, list, str))
WenmuZhou's avatar
WenmuZhou committed
230
231
232
        if isinstance(img, list) and det == True:
            logger.error('When input a list of images, det must be false')
            exit(0)
233
        if cls == True and self.use_angle_cls == False:
WenmuZhou's avatar
WenmuZhou committed
234
235
236
            logger.warning(
                'Since the angle classifier is not initialized, the angle classifier will not be uesd during the forward process'
            )
WenmuZhou's avatar
WenmuZhou committed
237

WenmuZhou's avatar
WenmuZhou committed
238
        if isinstance(img, str):
WenmuZhou's avatar
WenmuZhou committed
239
240
241
242
            # download net image
            if img.startswith('http'):
                download_with_progressbar(img, 'tmp.jpg')
                img = 'tmp.jpg'
WenmuZhou's avatar
WenmuZhou committed
243
244
245
            image_file = img
            img, flag = check_and_read_gif(image_file)
            if not flag:
246
247
248
                with open(image_file, 'rb') as f:
                    np_arr = np.frombuffer(f.read(), dtype=np.uint8)
                    img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR)
WenmuZhou's avatar
WenmuZhou committed
249
250
251
            if img is None:
                logger.error("error in loading image:{}".format(image_file))
                return None
WenmuZhou's avatar
WenmuZhou committed
252
253
        if isinstance(img, np.ndarray) and len(img.shape) == 2:
            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
WenmuZhou's avatar
WenmuZhou committed
254
        if det and rec:
255
            dt_boxes, rec_res = self.__call__(img, cls)
WenmuZhou's avatar
WenmuZhou committed
256
257
258
259
260
261
262
263
264
            return [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
        elif det and not rec:
            dt_boxes, elapse = self.text_detector(img)
            if dt_boxes is None:
                return None
            return [box.tolist() for box in dt_boxes]
        else:
            if not isinstance(img, list):
                img = [img]
265
            if self.use_angle_cls and cls:
WenmuZhou's avatar
WenmuZhou committed
266
267
268
                img, cls_res, elapse = self.text_classifier(img)
                if not rec:
                    return cls_res
WenmuZhou's avatar
WenmuZhou committed
269
270
            rec_res, elapse = self.text_recognizer(img)
            return rec_res
271
272
273


def main():
WenmuZhou's avatar
WenmuZhou committed
274
    # for cmd
WenmuZhou's avatar
WenmuZhou committed
275
    args = parse_args(mMain=True)
WenmuZhou's avatar
WenmuZhou committed
276
277
278
279
280
281
    image_dir = args.image_dir
    if image_dir.startswith('http'):
        download_with_progressbar(image_dir, 'tmp.jpg')
        image_file_list = ['tmp.jpg']
    else:
        image_file_list = get_image_file_list(args.image_dir)
282
283
284
    if len(image_file_list) == 0:
        logger.error('no images find in {}'.format(args.image_dir))
        return
WenmuZhou's avatar
WenmuZhou committed
285
286

    ocr_engine = PaddleOCR(**(args.__dict__))
287
    for img_path in image_file_list:
WenmuZhou's avatar
WenmuZhou committed
288
289
290
291
292
293
294
295
        logger.info('{}{}{}'.format('*' * 10, img_path, '*' * 10))
        result = ocr_engine.ocr(img_path,
                                det=args.det,
                                rec=args.rec,
                                cls=args.use_angle_cls)
        if result is not None:
            for line in result:
                logger.info(line)