Merge remote-tracking branch 'upstream/dygraph' into dy3

b9098935 · Leif · 47752ddf · 0e32093f · b9098935 · b9098935
Commit b9098935 authored Dec 14, 2020 by Leif
12 changed files
--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
@@ -13,8 +13,8 @@ The detection and recognition models on the mobile and server sides are as follo

 | Model introduction     | Model name      | Recommended scene          | Detection model | Direction Classifier | Recognition model |
 | ------------ | --------------- | ----------------|---- | ---------- | -------- |
-| Ultra-lightweight Chinese OCR model（8.1M） | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_train.tar)      |
-| Universal Chinese OCR model（155.1M）   | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)          |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar)    |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_train.tar)  |
+| Ultra-lightweight Chinese OCR model (8.1M) | ch_ppocr_mobile_v2.0_xx |Mobile-side/Server-side|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_train.tar)|[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar) |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_pre.tar)      |
+| Universal Chinese OCR model (143M)   | ch_ppocr_server_v2.0_xx |Server-side |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_train.tar)          |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_train.tar)    |[inference model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar) / [pretrained model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_pre.tar)  |


 * If `wget` is not installed in the windows environment, you can copy the link to the browser to download when downloading the model, then uncompress it and place it in the corresponding directory.

--- a/doc/doc_en/recognition_en.md
+++ b/doc/doc_en/recognition_en.md
@@ -120,6 +120,9 @@ In `word_dict.txt`, there is a single word in each line, which maps characters a

 `ppocr/utils/dict/german_dict.txt` is a German dictionary with 131 characters

+`ppocr/utils/dict/en_dict.txt` is a English dictionary with 63 characters
+
+
 You can use it on demand.

 The current multi-language model is still in the demo stage and will continue to optimize the model and add languages. **You are very welcome to provide us with dictionaries and fonts in other languages**,
@@ -149,10 +152,10 @@ First download the pretrain model, you can download the trained model to finetun
 ```
 cd PaddleOCR/
 # Download the pre-trained model of MobileNetV3
-wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/rec_mv3_none_bilstm_ctc.tar
+wget -P ./pretrain_models/ https://paddleocr.bj.bcebos.com/dygraph_v2.0/en/rec_mv3_none_bilstm_ctc_v2.0_train.tar
 # Decompress model parameters
 cd pretrain_models
-tar -xf rec_mv3_none_bilstm_ctc.tar && rm -rf rec_mv3_none_bilstm_ctc.tar
+tar -xf rec_mv3_none_bilstm_ctc_v2.0_train.tar && rm -rf rec_mv3_none_bilstm_ctc_v2.0_train.tar
 ```

 Start training:
@@ -194,7 +197,6 @@ If the evaluation set is large, the test will be time-consuming. It is recommend
 | rec_mv3_tps_bilstm_attn.yml |  RARE |   Mobilenet_v3 large 0.5 |  tps   |  BiLSTM |  attention  |
 | rec_r34_vd_none_bilstm_ctc.yml |  CRNN |   Resnet34_vd |  None   |  BiLSTM |  ctc  |
 | rec_r34_vd_none_none_ctc.yml |  Rosetta |   Resnet34_vd |  None   |  None |  ctc  |
-| rec_r34_vd_tps_bilstm_attn.yml | RARE | Resnet34_vd | tps | BiLSTM | attention |
 | rec_r34_vd_tps_bilstm_ctc.yml | STARNet | Resnet34_vd | tps | BiLSTM | ctc |

 For training Chinese data, it is recommended to use

--- a/doc/imgs_results/2.jpg
+++ b/doc/imgs_results/2.jpg
--- a/doc/imgs_results/det_res_2.jpg
+++ b/doc/imgs_results/det_res_2.jpg
--- a/doc/imgs_results/det_res_img_10_db.jpg
+++ b/doc/imgs_results/det_res_img_10_db.jpg
--- a/paddleocr.py
+++ b/paddleocr.py
@@ -35,44 +35,45 @@ __all__ = ['PaddleOCR']

 model_urls = {
    'det':
-        'https://paddleocr.bj.bcebos.com/20-09-22/mobile/det/ch_ppocr_mobile_v1.1_det_infer.tar',
+    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar',
    'rec': {
        'ch': {
            'url':
-                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/rec/ch_ppocr_mobile_v1.1_rec_infer.tar',
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/ppocr_keys_v1.txt'
        },
        'en': {
            'url':
-                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/en/en_ppocr_mobile_v1.1_rec_infer.tar',
-            'dict_path': './ppocr/utils/ic15_dict.txt'
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tar',
+            'dict_path': './ppocr/utils/dict/en_dict.txt'
        },
        'french': {
            'url':
-                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/fr/french_ppocr_mobile_v1.1_rec_infer.tar',
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/french_dict.txt'
        },
        'german': {
            'url':
-                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/ge/german_ppocr_mobile_v1.1_rec_infer.tar',
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/german_dict.txt'
        },
        'korean': {
            'url':
-                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/kr/korean_ppocr_mobile_v1.1_rec_infer.tar',
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/korean_dict.txt'
        },
        'japan': {
            'url':
-                'https://paddleocr.bj.bcebos.com/20-09-22/mobile/jp/japan_ppocr_mobile_v1.1_rec_infer.tar',
+            'https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tar',
            'dict_path': './ppocr/utils/dict/japan_dict.txt'
        }
    },
    'cls':
-        'https://paddleocr.bj.bcebos.com/20-09-22/cls/ch_ppocr_mobile_v1.1_cls_infer.tar'
+    'https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tar'
 }

 SUPPORT_DET_MODEL = ['DB']
+VERSION = 2.0
 SUPPORT_REC_MODEL = ['CRNN']
 BASE_DIR = os.path.expanduser("~/.paddleocr/")

@@ -94,20 +95,24 @@ def download_with_progressbar(url, save_path):

 def maybe_download(model_storage_directory, url):
    # using custom model
-    if not os.path.exists(os.path.join(
-            model_storage_directory, 'model')) or not os.path.exists(
-        os.path.join(model_storage_directory, 'params')):
+    tar_file_name_list = [
+        'inference.pdiparams', 'inference.pdiparams.info', 'inference.pdmodel'
+    ]
+    if not os.path.exists(
+            os.path.join(model_storage_directory, 'inference.pdiparams')
+    ) or not os.path.exists(
+            os.path.join(model_storage_directory, 'inference.pdmodel')):
        tmp_path = os.path.join(model_storage_directory, url.split('/')[-1])
        print('download {} to {}'.format(url, tmp_path))
        os.makedirs(model_storage_directory, exist_ok=True)
        download_with_progressbar(url, tmp_path)
        with tarfile.open(tmp_path, 'r') as tarObj:
            for member in tarObj.getmembers():
-                if "model" in member.name:
-                    filename = 'model'
-                elif "params" in member.name:
-                    filename = 'params'
-                else:
+                filename = None
+                for tar_file_name in tar_file_name_list:
+                    if tar_file_name in member.name:
+                        filename = tar_file_name
+                if filename is None:
                    continue
                file = tarObj.extractfile(member)
                with open(
@@ -176,7 +181,8 @@ def parse_args(mMain=True, add_help=True):
        parser.add_argument("--use_angle_cls", type=str2bool, default=False)
        return parser.parse_args()
    else:
-        return argparse.Namespace(use_gpu=True,
+        return argparse.Namespace(
+            use_gpu=True,
            ir_optim=True,
            use_tensorrt=False,
            gpu_mem=8000,
@@ -211,8 +217,7 @@ def parse_args(mMain=True, add_help=True):
            lang='ch',
            det=True,
            rec=True,
-                                  use_angle_cls=False
-                                  )
+            use_angle_cls=False)


 class PaddleOCR(predict_system.TextSystem):
@@ -235,12 +240,14 @@ class PaddleOCR(predict_system.TextSystem):

        # init model dir
        if postprocess_params.det_model_dir is None:
-            postprocess_params.det_model_dir = os.path.join(BASE_DIR, 'det')
+            postprocess_params.det_model_dir = os.path.join(
+                BASE_DIR, '{}/det'.format(VERSION))
        if postprocess_params.rec_model_dir is None:
            postprocess_params.rec_model_dir = os.path.join(
-                BASE_DIR, 'rec/{}'.format(lang))
+                BASE_DIR, '{}/rec/{}'.format(VERSION, lang))
        if postprocess_params.cls_model_dir is None:
-            postprocess_params.cls_model_dir = os.path.join(BASE_DIR, 'cls')
+            postprocess_params.cls_model_dir = os.path.join(
+                BASE_DIR, '{}/cls'.format(VERSION))
        print(postprocess_params)
        # download model
        maybe_download(postprocess_params.det_model_dir, model_urls['det'])

--- a/ppocr/data/imaug/rec_img_aug.py
+++ b/ppocr/data/imaug/rec_img_aug.py
@@ -35,12 +35,13 @@ from .text_image_aug import tia_perspective, tia_stretch, tia_distort


 class RecAug(object):
-    def __init__(self, use_tia=True, **kwargsz):
+    def __init__(self, use_tia=True, aug_prob=0.4, **kwargs):
        self.use_tia = use_tia
+        self.aug_prob = aug_prob

    def __call__(self, data):
        img = data['image']
-        img = warp(img, 10, self.use_tia)
+        img = warp(img, 10, self.use_tia, self.aug_prob)
        data['image'] = img
        return data

@@ -329,7 +330,7 @@ def get_warpAffine(config):
    return rz


-def warp(img, ang, use_tia=True):
+def warp(img, ang, use_tia=True, prob=0.4):
    """
    warp
    """
@@ -338,8 +339,6 @@ def warp(img, ang, use_tia=True):
    config.make(w, h, ang)
    new_img = img

-    prob = 0.4
-
    if config.distort:
        img_height, img_width = img.shape[0:2]
        if random.random() <= prob and img_height >= 20 and img_width >= 20:

--- a/ppocr/optimizer/__init__.py
+++ b/ppocr/optimizer/__init__.py
@@ -16,8 +16,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals
-
 import copy
+import paddle

 __all__ = ['build_optimizer']

@@ -49,7 +49,13 @@ def build_optimizer(config, epochs, step_each_epoch, parameters):

    # step3 build optimizer
    optim_name = config.pop('name')
+    if 'clip_norm' in config:
+        clip_norm = config.pop('clip_norm')
+        grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
+    else:
+        grad_clip = None
    optim = getattr(optimizer, optim_name)(learning_rate=lr,
                                           weight_decay=reg,
+                                           grad_clip=grad_clip,
                                           **config)
    return optim(parameters), lr
--- a/ppocr/optimizer/optimizer.py
+++ b/ppocr/optimizer/optimizer.py
@@ -30,18 +30,25 @@ class Momentum(object):
        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
    """

-    def __init__(self, learning_rate, momentum, weight_decay=None, **args):
+    def __init__(self,
+                 learning_rate,
+                 momentum,
+                 weight_decay=None,
+                 grad_clip=None,
+                 **args):
        super(Momentum, self).__init__()
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip

    def __call__(self, parameters):
        opt = optim.Momentum(
            learning_rate=self.learning_rate,
            momentum=self.momentum,
-            parameters=parameters,
-            weight_decay=self.weight_decay)
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            parameters=parameters)
        return opt


@@ -96,10 +103,11 @@ class RMSProp(object):

    def __init__(self,
                 learning_rate,
-                 momentum,
+                 momentum=0.0,
                 rho=0.95,
                 epsilon=1e-6,
                 weight_decay=None,
+                 grad_clip=None,
                 **args):
        super(RMSProp, self).__init__()
        self.learning_rate = learning_rate
@@ -107,6 +115,7 @@ class RMSProp(object):
        self.rho = rho
        self.epsilon = epsilon
        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip

    def __call__(self, parameters):
        opt = optim.RMSProp(
@@ -115,5 +124,6 @@ class RMSProp(object):
            rho=self.rho,
            epsilon=self.epsilon,
            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
            parameters=parameters)
        return opt
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -40,7 +40,7 @@ class DBPostProcess(object):
        self.max_candidates = max_candidates
        self.unclip_ratio = unclip_ratio
        self.min_size = 3
-        self.dilation_kernel = None if not use_dilation else [[1, 1], [1, 1]]
+        self.dilation_kernel = None if not use_dilation else np.array([[1, 1], [1, 1]])

    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
        '''

--- a/ppocr/utils/ic15_dict.txt
+++ b/ppocr/utils/ic15_dict.txt
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -63,6 +63,7 @@ class TextDetector(object):
            postprocess_params["box_thresh"] = args.det_db_box_thresh
            postprocess_params["max_candidates"] = 1000
            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
+            postprocess_params["use_dilation"] = True
        else:
            logger.info("unknown det_algorithm:{}".format(self.det_algorithm))
            sys.exit(0)
@@ -111,7 +112,7 @@ class TextDetector(object):
            box = self.clip_det_res(box, img_height, img_width)
            rect_width = int(np.linalg.norm(box[0] - box[1]))
            rect_height = int(np.linalg.norm(box[0] - box[3]))
-            if rect_width <= 10 or rect_height <= 10:
+            if rect_width <= 3 or rect_height <= 3:
                continue
            dt_boxes_new.append(box)
        dt_boxes = np.array(dt_boxes_new)
@@ -186,4 +187,4 @@ if __name__ == "__main__":
        cv2.imwrite(img_path, src_im)
        logger.info("The visualized image saved in {}".format(img_path))
    if count > 1:
-        logger.info("Avg Time:", total_time / (count - 1))
+        logger.info("Avg Time: {}".format(total_time / (count - 1)))