Merge https://github.com/PaddlePaddle/PaddleOCR into dygraph

9ded14fa · weishengyu · 1f9d6d7f · ccfc7544 · 9ded14fa · 1f9d6d7f
Commit 9ded14fa authored Jan 11, 2021 by weishengyu
19 changed files
--- a/doc/doc_en/quickstart_en.md
+++ b/doc/doc_en/quickstart_en.md
@@ -99,5 +99,5 @@ For more text detection and recognition tandem reasoning, please refer to the do
 In addition, the tutorial also provides other deployment methods for the Chinese OCR model:
 - [Server-side C++ inference](../../deploy/cpp_infer/readme_en.md)
- [Service deployment](../../deploy/pdserving/readme_en.md)
+- [Service deployment](../../deploy/hubserving)
- [End-to-end deployment](../../deploy/lite/readme_en.md)
+- [End-to-end deployment](https://github.com/PaddlePaddle/PaddleOCR/tree/develop/deploy/lite)
--- a/doc/joinus.PNG
+++ b/doc/joinus.PNG
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
@@ -119,10 +119,10 @@ class DetResizeForTest(object):
        if 'image_shape' in kwargs:
            self.image_shape = kwargs['image_shape']
            self.resize_type = 1
-        if 'limit_side_len' in kwargs:
+        elif 'limit_side_len' in kwargs:
            self.limit_side_len = kwargs['limit_side_len']
            self.limit_type = kwargs.get('limit_type', 'min')
-        if 'resize_long' in kwargs:
+        elif 'resize_long' in kwargs:
            self.resize_type = 2
            self.resize_long = kwargs.get('resize_long', 960)
        else:

--- a/ppocr/losses/det_basic_loss.py
+++ b/ppocr/losses/det_basic_loss.py
@@ -45,7 +45,6 @@ class BalanceLoss(nn.Layer):
        self.balance_loss = balance_loss
        self.main_loss_type = main_loss_type
        self.negative_ratio = negative_ratio
-        self.main_loss_type = main_loss_type
        self.return_origin = return_origin
        self.eps = eps

--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
@@ -19,7 +19,6 @@ from __future__ import print_function
 import paddle
 from paddle import nn
 from .det_basic_loss import DiceLoss
-import paddle.fluid as fluid
 import numpy as np
@@ -27,9 +26,7 @@ class SASTLoss(nn.Layer):
    """
    """
-    def __init__(self,
+    def __init__(self, eps=1e-6, **kwargs):
-                 eps=1e-6,
-                 **kwargs):
        super(SASTLoss, self).__init__()
        self.dice_loss = DiceLoss(eps=eps)
@@ -53,10 +50,12 @@ class SASTLoss(nn.Layer):
        score_loss = 1.0 - 2 * intersection / (union + 1e-5)
        #border loss
-        l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
+        l_border_split, l_border_norm = paddle.split(
+            l_border, num_or_sections=[4, 1], axis=1)
        f_border_split = f_border
        border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
-        l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
+        l_border_norm_split = paddle.expand(
+            x=l_border_norm, shape=border_ex_shape)
        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
@@ -72,7 +71,8 @@ class SASTLoss(nn.Layer):
                    (paddle.sum(l_border_score * l_border_mask) + 1e-5)
        #tvo_loss
-        l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
+        l_tvo_split, l_tvo_norm = paddle.split(
+            l_tvo, num_or_sections=[8, 1], axis=1)
        f_tvo_split = f_tvo
        tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
        l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
@@ -91,7 +91,8 @@ class SASTLoss(nn.Layer):
                    (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
        #tco_loss
-        l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
+        l_tco_split, l_tco_norm = paddle.split(
+            l_tco, num_or_sections=[2, 1], axis=1)
        f_tco_split = f_tco
        tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
        l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
@@ -109,7 +110,6 @@ class SASTLoss(nn.Layer):
        tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
                    (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
        # total loss
        tvo_lw, tco_lw = 1.5, 1.5
        score_lw, border_lw = 1.0, 1.0

--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -26,6 +26,8 @@ class RecMetric(object):
        all_num = 0
        norm_edit_dis = 0.0
        for (pred, pred_conf), (target, _) in zip(preds, labels):
+            pred = pred.replace(" ", "")
+            target = target.replace(" ", "")
            norm_edit_dis += Levenshtein.distance(pred, target) / max(
                len(pred), len(target))
            if pred == target:

--- a/ppocr/modeling/transforms/tps.py
+++ b/ppocr/modeling/transforms/tps.py
@@ -16,6 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+import math
 import paddle
 from paddle import nn, ParamAttr
 from paddle.nn import functional as F
@@ -88,11 +89,14 @@ class LocalizationNetwork(nn.Layer):
            in_channels = num_filters
            self.block_list.append(pool)
        name = "loc_fc1"
+        stdv = 1.0 / math.sqrt(num_filters_list[-1] * 1.0)
        self.fc1 = nn.Linear(
            in_channels,
            fc_dim,
            weight_attr=ParamAttr(
-                learning_rate=loc_lr, name=name + "_w"),
+                learning_rate=loc_lr,
+                name=name + "_w",
+                initializer=nn.initializer.Uniform(-stdv, stdv)),
            bias_attr=ParamAttr(name=name + '.b_0'),
            name=name)

--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@@ -18,6 +18,7 @@ from __future__ import print_function
 from __future__ import unicode_literals
 from paddle.optimizer import lr
+from .lr_scheduler import CyclicalCosineDecay
 class Linear(object):
@@ -46,7 +47,7 @@ class Linear(object):
        self.end_lr = end_lr
        self.power = power
        self.last_epoch = last_epoch
-        self.warmup_epoch = warmup_epoch * step_each_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
    def __call__(self):
        learning_rate = lr.PolynomialDecay(
@@ -87,7 +88,7 @@ class Cosine(object):
        self.learning_rate = learning_rate
        self.T_max = step_each_epoch * epochs
        self.last_epoch = last_epoch
-        self.warmup_epoch = warmup_epoch * step_each_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
    def __call__(self):
        learning_rate = lr.CosineAnnealingDecay(
@@ -129,7 +130,7 @@ class Step(object):
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.last_epoch = last_epoch
-        self.warmup_epoch = warmup_epoch * step_each_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
    def __call__(self):
        learning_rate = lr.StepDecay(
@@ -168,7 +169,7 @@ class Piecewise(object):
        self.boundaries = [step_each_epoch * e for e in decay_epochs]
        self.values = values
        self.last_epoch = last_epoch
-        self.warmup_epoch = warmup_epoch * step_each_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
    def __call__(self):
        learning_rate = lr.PiecewiseDecay(
@@ -183,3 +184,45 @@ class Piecewise(object):
                end_lr=self.values[0],
                last_epoch=self.last_epoch)
        return learning_rate
+class CyclicalCosine(object):
+    """
+    Cyclical cosine learning rate decay
+    Args:
+        learning_rate(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        cycle(int): period of the cosine learning rate
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+    def __init__(self,
+                 learning_rate,
+                 step_each_epoch,
+                 epochs,
+                 cycle,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(CyclicalCosine, self).__init__()
+        self.learning_rate = learning_rate
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = round(warmup_epoch * step_each_epoch)
+        self.cycle = round(cycle * step_each_epoch)
+    def __call__(self):
+        learning_rate = CyclicalCosineDecay(
+            learning_rate=self.learning_rate,
+            T_max=self.T_max,
+            cycle=self.cycle,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr.LinearWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.learning_rate,
+                last_epoch=self.last_epoch)
+        return learning_rate
--- a/ppocr/optimizer/lr_scheduler.py
+++ b/ppocr/optimizer/lr_scheduler.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+from paddle.optimizer.lr import LRScheduler
+class CyclicalCosineDecay(LRScheduler):
+    def __init__(self,
+                 learning_rate,
+                 T_max,
+                 cycle=1,
+                 last_epoch=-1,
+                 eta_min=0.0,
+                 verbose=False):
+        """
+        Cyclical cosine learning rate decay
+        A learning rate which can be referred in https://arxiv.org/pdf/2012.12645.pdf
+        Args:
+            learning rate(float): learning rate
+            T_max(int): maximum epoch num
+            cycle(int): period of the cosine decay
+            last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+            eta_min(float): minimum learning rate during training
+            verbose(bool): whether to print learning rate for each epoch
+        """
+        super(CyclicalCosineDecay, self).__init__(learning_rate, last_epoch,
+                                                  verbose)
+        self.cycle = cycle
+        self.eta_min = eta_min
+    def get_lr(self):
+        if self.last_epoch == 0:
+            return self.base_lr
+        reletive_epoch = self.last_epoch % self.cycle
+        lr = self.eta_min + 0.5 * (self.base_lr - self.eta_min) * \
+                (1 + math.cos(math.pi * reletive_epoch / self.cycle))
+        return lr
--- a/ppocr/utils/utility.py
+++ b/ppocr/utils/utility.py
@@ -57,7 +57,7 @@ def get_image_file_list(img_file):
    elif os.path.isdir(img_file):
        for single_file in os.listdir(img_file):
            file_path = os.path.join(img_file, single_file)
-            if imghdr.what(file_path) in img_end:
+            if os.path.isfile(file_path) and imghdr.what(file_path) in img_end:
                imgs_lists.append(file_path)
    if len(imgs_lists) == 0:
        raise Exception("not found any img file in {}".format(img_file))

--- a/tools/infer/predict_cls.py
+++ b/tools/infer/predict_cls.py
@@ -18,13 +18,14 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import cv2
 import copy
 import numpy as np
 import math
 import time
 import traceback
-import paddle.fluid as fluid
 import tools.infer.utility as utility
 from ppocr.postprocess import build_post_process
@@ -39,7 +40,6 @@ class TextClassifier(object):
        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
        self.cls_batch_num = args.cls_batch_num
        self.cls_thresh = args.cls_thresh
-        self.use_zero_copy_run = args.use_zero_copy_run
        postprocess_params = {
            'name': 'ClsPostProcess',
            "label_list": args.label_list,
@@ -99,12 +99,8 @@ class TextClassifier(object):
            norm_img_batch = norm_img_batch.copy()
            starttime = time.time()
-            if self.use_zero_copy_run:
            self.input_tensor.copy_from_cpu(norm_img_batch)
-                self.predictor.zero_copy_run()
+            self.predictor.run()
-            else:
-                norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
-                self.predictor.run([norm_img_batch])
            prob_out = self.output_tensors[0].copy_to_cpu()
            cls_result = self.postprocess_op(prob_out)
            elapse += time.time() - starttime
@@ -143,10 +139,11 @@ def main(args):
            "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
        exit()
    for ino in range(len(img_list)):
-        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], cls_res[
+        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
-            ino]))
+                                               cls_res[ino]))
    logger.info("Total predict time for {} images, cost: {:.3f}".format(
        len(img_list), predict_time))
 if __name__ == "__main__":
    main(utility.parse_args())
--- a/tools/infer/predict_det.py
+++ b/tools/infer/predict_det.py
@@ -18,11 +18,12 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import cv2
 import numpy as np
 import time
 import sys
-import paddle
 import tools.infer.utility as utility
 from ppocr.utils.logging import get_logger
@@ -37,7 +38,6 @@ class TextDetector(object):
    def __init__(self, args):
        self.args = args
        self.det_algorithm = args.det_algorithm
-        self.use_zero_copy_run = args.use_zero_copy_run
        pre_process_list = [{
            'DetResizeForTest': {
                'limit_side_len': args.det_limit_side_len,
@@ -72,7 +72,9 @@ class TextDetector(object):
            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
        elif self.det_algorithm == "SAST":
            pre_process_list[0] = {
-                'DetResizeForTest': {'resize_long': args.det_limit_side_len}
+                'DetResizeForTest': {
+                    'resize_long': args.det_limit_side_len
+                }
            }
            postprocess_params['name'] = 'SASTPostProcess'
            postprocess_params["score_thresh"] = args.det_sast_score_thresh
@@ -161,12 +163,8 @@ class TextDetector(object):
        img = img.copy()
        starttime = time.time()
-        if self.use_zero_copy_run:
        self.input_tensor.copy_from_cpu(img)
-            self.predictor.zero_copy_run()
+        self.predictor.run()
-        else:
-            im = paddle.fluid.core.PaddleTensor(img)
-            self.predictor.run([im])
        outputs = []
        for output_tensor in self.output_tensors:
            output = output_tensor.copy_to_cpu()

--- a/tools/infer/predict_rec.py
+++ b/tools/infer/predict_rec.py
@@ -18,12 +18,13 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import cv2
 import numpy as np
 import math
 import time
 import traceback
-import paddle.fluid as fluid
 import tools.infer.utility as utility
 from ppocr.postprocess import build_post_process
@@ -39,7 +40,6 @@ class TextRecognizer(object):
        self.character_type = args.rec_char_type
        self.rec_batch_num = args.rec_batch_num
        self.rec_algorithm = args.rec_algorithm
-        self.use_zero_copy_run = args.use_zero_copy_run
        postprocess_params = {
            'name': 'CTCLabelDecode',
            "character_type": args.rec_char_type,
@@ -101,12 +101,8 @@ class TextRecognizer(object):
            norm_img_batch = np.concatenate(norm_img_batch)
            norm_img_batch = norm_img_batch.copy()
            starttime = time.time()
-            if self.use_zero_copy_run:
            self.input_tensor.copy_from_cpu(norm_img_batch)
-                self.predictor.zero_copy_run()
+            self.predictor.run()
-            else:
-                norm_img_batch = fluid.core.PaddleTensor(norm_img_batch)
-                self.predictor.run([norm_img_batch])
            outputs = []
            for output_tensor in self.output_tensors:
                output = output_tensor.copy_to_cpu()
@@ -145,8 +141,8 @@ def main(args):
            "Please set --rec_image_shape='3,32,100' and --rec_char_type='en' ")
        exit()
    for ino in range(len(img_list)):
-        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino], rec_res[
+        logger.info("Predicts of {}:{}".format(valid_image_file_list[ino],
-            ino]))
+                                               rec_res[ino]))
    logger.info("Total predict time for {} images, cost: {:.3f}".format(
        len(img_list), predict_time))

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -18,6 +18,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import cv2
 import copy
 import numpy as np

--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -20,8 +20,7 @@ import numpy as np
 import json
 from PIL import Image, ImageDraw, ImageFont
 import math
-from paddle.fluid.core import AnalysisConfig
+from paddle import inference
-from paddle.fluid.core import create_paddle_predictor
 def parse_args():
@@ -34,7 +33,7 @@ def parse_args():
    parser.add_argument("--ir_optim", type=str2bool, default=True)
    parser.add_argument("--use_tensorrt", type=str2bool, default=False)
    parser.add_argument("--use_fp16", type=str2bool, default=False)
-    parser.add_argument("--gpu_mem", type=int, default=8000)
+    parser.add_argument("--gpu_mem", type=int, default=500)
    # params for text detector
    parser.add_argument("--image_dir", type=str)
@@ -63,7 +62,7 @@ def parse_args():
    parser.add_argument("--rec_model_dir", type=str)
    parser.add_argument("--rec_image_shape", type=str, default="3, 32, 320")
    parser.add_argument("--rec_char_type", type=str, default='ch')
-    parser.add_argument("--rec_batch_num", type=int, default=1)
+    parser.add_argument("--rec_batch_num", type=int, default=6)
    parser.add_argument("--max_text_length", type=int, default=25)
    parser.add_argument(
        "--rec_char_dict_path",
@@ -83,8 +82,6 @@ def parse_args():
    parser.add_argument("--cls_thresh", type=float, default=0.9)
    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
-    parser.add_argument("--use_zero_copy_run", type=str2bool, default=False)
    parser.add_argument("--use_pdserving", type=str2bool, default=False)
    return parser.parse_args()
@@ -110,14 +107,14 @@ def create_predictor(args, mode, logger):
        logger.info("not find params file path {}".format(params_file_path))
        sys.exit(0)
-    config = AnalysisConfig(model_file_path, params_file_path)
+    config = inference.Config(model_file_path, params_file_path)
    if args.use_gpu:
        config.enable_use_gpu(args.gpu_mem, 0)
        if args.use_tensorrt:
            config.enable_tensorrt_engine(
-                precision_mode=AnalysisConfig.Precision.Half
+                precision_mode=inference.PrecisionType.Half
-                if args.use_fp16 else AnalysisConfig.Precision.Float32,
+                if args.use_fp16 else inference.PrecisionType.Float32,
                max_batch_size=args.max_batch_size)
    else:
        config.disable_gpu()
@@ -126,24 +123,23 @@ def create_predictor(args, mode, logger):
            # cache 10 different shapes for mkldnn to avoid memory leak
            config.set_mkldnn_cache_capacity(10)
            config.enable_mkldnn()
+            args.rec_batch_num = 1
    # config.enable_memory_optim()
    config.disable_glog_info()
-    if args.use_zero_copy_run:
    config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
    config.switch_use_feed_fetch_ops(False)
-    else:
-        config.switch_use_feed_fetch_ops(True)
-    predictor = create_paddle_predictor(config)
+    # create predictor
+    predictor = inference.create_predictor(config)
    input_names = predictor.get_input_names()
    for name in input_names:
-        input_tensor = predictor.get_input_tensor(name)
+        input_tensor = predictor.get_input_handle(name)
    output_names = predictor.get_output_names()
    output_tensors = []
    for output_name in output_names:
-        output_tensor = predictor.get_output_tensor(output_name)
+        output_tensor = predictor.get_output_handle(output_name)
        output_tensors.append(output_tensor)
    return predictor, input_tensor, output_tensors

--- a/tools/infer_cls.py
+++ b/tools/infer_cls.py
@@ -25,6 +25,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import paddle
 from ppocr.data import create_operators, transform

--- a/tools/infer_det.py
+++ b/tools/infer_det.py
@@ -25,6 +25,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import cv2
 import json
 import paddle

--- a/tools/infer_rec.py
+++ b/tools/infer_rec.py
@@ -25,6 +25,8 @@ __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+os.environ["FLAGS_allocator_strategy"] = 'auto_growth'
 import paddle
 from ppocr.data import create_operators, transform

--- a/tools/program.py
+++ b/tools/program.py
@@ -131,7 +131,7 @@ def check_gpu(use_gpu):
          "model on CPU"
    try:
-        if use_gpu and not paddle.fluid.is_compiled_with_cuda():
+        if use_gpu and not paddle.is_compiled_with_cuda():
            print(err)
            sys.exit(1)
    except Exception as e:
@@ -179,9 +179,9 @@ def train(config,
    if 'start_epoch' in best_model_dict:
        start_epoch = best_model_dict['start_epoch']
    else:
-        start_epoch = 0
+        start_epoch = 1
-    for epoch in range(start_epoch, epoch_num):
+    for epoch in range(start_epoch, epoch_num + 1):
        if epoch > 0:
            train_dataloader = build_dataloader(config, 'Train', device, logger)
        train_batch_cost = 0.0