Merge pull request #1 from PaddlePaddle/develop

update-2020-7-17

Merge pull request #1 from PaddlePaddle/develop
update-2020-7-17
26219d5f · shaohua.zhang · GitHub · 0e8a3417 · 311c5997 · 26219d5f
Unverified Commit 26219d5f authored Jul 17, 2020 by shaohua.zhang Committed by GitHub Jul 17, 2020
20 changed files
--- a/doc/tricks/long_text_examples.jpg
+++ b/doc/tricks/long_text_examples.jpg
--- a/ppocr/data/det/dataset_traversal.py
+++ b/ppocr/data/det/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.

 import os
+import sys
 import math
 import random
 import functools
@@ -42,6 +43,10 @@ class TrainReader(object):
            img_num = len(label_infor_list)
            img_id_list = list(range(img_num))
            random.shuffle(img_id_list)
+            if sys.platform == "win32":
+                print("multiprocess is not fully compatible with Windows."
+                      "num_workers will be 1.")
+                self.num_workers = 1
            for img_id in range(process_id, img_num, self.num_workers):
                label_infor = label_infor_list[img_id_list[img_id]]
                outs = self.process(label_infor)
@@ -56,8 +61,6 @@ class TrainReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs

        return batch_iter_reader

@@ -92,8 +95,10 @@ class EvalTestReader(object):
            for img_path in img_list:
                img = cv2.imread(img_path)
                if img is None:
-                    logger.info("load image error:" + img_path)
+                    logger.info("{} does not exist!".format(img_path))
                    continue
+                elif len(list(img.shape)) == 2 or img.shape[2] == 1:
+                    img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                outs = process_function(img)
                outs.append(img_path)
                batch_outs.append(outs)

--- a/ppocr/data/det/db_process.py
+++ b/ppocr/data/det/db_process.py
@@ -17,6 +17,8 @@ import cv2
 import numpy as np
 import json
 import sys
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()

 from .data_augment import AugmentData
 from .random_crop_data import RandomCropData
@@ -25,6 +27,10 @@ from .make_border_map import MakeBorderMap


 class DBProcessTrain(object):
+    """
+    DB pre-process for Train mode
+    """
+
    def __init__(self, params):
        self.img_set_dir = params['img_set_dir']
        self.image_shape = params['image_shape']
@@ -96,7 +102,10 @@ class DBProcessTrain(object):
        img_path, gt_label = self.convert_label_infor(label_infor)
        imgvalue = cv2.imread(img_path)
        if imgvalue is None:
+            logger.info("{} does not exist!".format(img_path))
            return None
+        if len(list(imgvalue.shape)) == 2 or imgvalue.shape[2] == 1:
+            imgvalue = cv2.cvtColor(imgvalue, cv2.COLOR_GRAY2BGR)
        data = self.make_data_dict(imgvalue, gt_label)
        data = AugmentData(data)
        data = RandomCropData(data, self.image_shape[1:])
@@ -109,11 +118,15 @@ class DBProcessTrain(object):


 class DBProcessTest(object):
+    """
+    DB pre-process for Test mode
+    """
+
    def __init__(self, params):
        super(DBProcessTest, self).__init__()
        self.resize_type = 0
-        if 'det_image_shape' in params:
-            self.image_shape = params['det_image_shape']
+        if 'test_image_shape' in params:
+            self.image_shape = params['test_image_shape']
            # print(self.image_shape)
            self.resize_type = 1
        if 'max_side_len' in params:
@@ -124,6 +137,10 @@ class DBProcessTest(object):
    def resize_image_type0(self, im):
        """
        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
        """
        max_side_len = self.max_side_len
        h, w, _ = im.shape
@@ -177,8 +194,12 @@ class DBProcessTest(object):
        img_std = [0.229, 0.224, 0.225]
        im = im.astype(np.float32, copy=False)
        im = im / 255
-        im -= img_mean
-        im /= img_std
+        im[:, :, 0] -= img_mean[0]
+        im[:, :, 1] -= img_mean[1]
+        im[:, :, 2] -= img_mean[2]
+        im[:, :, 0] /= img_std[0]
+        im[:, :, 1] /= img_std[1]
+        im[:, :, 2] /= img_std[2]
        channel_swap = (2, 0, 1)
        im = im.transpose(channel_swap)
        return im

--- a/ppocr/data/det/east_process.py
+++ b/ppocr/data/det/east_process.py
@@ -455,17 +455,23 @@ class EASTProcessTrain(object):
 class EASTProcessTest(object):
    def __init__(self, params):
        super(EASTProcessTest, self).__init__()
+        self.resize_type = 0
+        if 'test_image_shape' in params:
+            self.image_shape = params['test_image_shape']
+            # print(self.image_shape)
+            self.resize_type = 1
        if 'max_side_len' in params:
            self.max_side_len = params['max_side_len']
        else:
            self.max_side_len = 2400

-    def resize_image(self, im):
+    def resize_image_type0(self, im):
        """
        resize image to a size multiple of 32 which is required by the network
-        :param im: the resized image
-        :param max_side_len: limit of max image size to avoid out of memory in gpu
-        :return: the resized image and the resize ratio
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
        """
        max_side_len = self.max_side_len
        h, w, _ = im.shape
@@ -495,13 +501,30 @@ class EASTProcessTest(object):
            resize_w = 32
        else:
            resize_w = (resize_w // 32 - 1) * 32
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        except:
+            print(im.shape, resize_w, resize_h)
+            sys.exit(0)
        ratio_h = resize_h / float(h)
        ratio_w = resize_w / float(w)
        return im, (ratio_h, ratio_w)

+    def resize_image_type1(self, im):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = im.shape[:2]  # (h, w, c)
+        im = cv2.resize(im, (int(resize_w), int(resize_h)))
+        ratio_h = float(resize_h) / ori_h
+        ratio_w = float(resize_w) / ori_w
+        return im, (ratio_h, ratio_w)
+
    def __call__(self, im):
-        im, (ratio_h, ratio_w) = self.resize_image(im)
+        if self.resize_type == 0:
+            im, (ratio_h, ratio_w) = self.resize_image_type0(im)
+        else:
+            im, (ratio_h, ratio_w) = self.resize_image_type1(im)
        img_mean = [0.485, 0.456, 0.406]
        img_std = [0.229, 0.224, 0.225]
        im = im[:, :, ::-1].astype(np.float32)

--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
@@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
    reader_function = params['reader_function']
    function = create_module(reader_function)(params)
    if mode == "train":
+        if sys.platform == "win32":
+            return function(0)
        readers = []
        num_workers = params['num_workers']
        for process_id in range(num_workers):

--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.

 import os
+import sys
 import math
 import random
 import numpy as np
@@ -40,10 +41,25 @@ class LMDBReader(object):
        self.loss_type = params['loss_type']
        self.max_text_length = params['max_text_length']
        self.mode = params['mode']
+        self.drop_last = False
+        self.use_tps = False
+        if "tps" in params:
+            self.ues_tps = True
+        self.use_distort = False
+        if "distort" in params:
+            self.use_distort = params['distort'] and params['use_gpu']
+            if not params['use_gpu']:
+                logger.info(
+                    "Distort operation can only support in GPU. Distort will be set to False."
+                )
        if params['mode'] == 'train':
            self.batch_size = params['train_batch_size_per_card']
+            self.drop_last = True
        else:
            self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = False
+            self.use_distort = False
+        self.infer_img = params['infer_img']

    def load_hierarchical_lmdb_dataset(self):
        lmdb_sets = {}
@@ -97,33 +113,52 @@ class LMDBReader(object):
            process_id = 0

        def sample_iter_reader():
-            lmdb_sets = self.load_hierarchical_lmdb_dataset()
-            if process_id == 0:
-                self.print_lmdb_sets_info(lmdb_sets)
-            cur_index_sets = [1 + process_id] * len(lmdb_sets)
-            while True:
-                finish_read_num = 0
-                for dataset_idx in range(len(lmdb_sets)):
-                    cur_index = cur_index_sets[dataset_idx]
-                    if cur_index > lmdb_sets[dataset_idx]['num_samples']:
-                        finish_read_num += 1
-                    else:
-                        sample_info = self.get_lmdb_sample_info(
-                            lmdb_sets[dataset_idx]['txn'], cur_index)
-                        cur_index_sets[dataset_idx] += self.num_workers
-                        if sample_info is None:
-                            continue
-                        img, label = sample_info
-                        outs = process_image(img, self.image_shape, label,
-                                             self.char_ops, self.loss_type,
-                                             self.max_text_length)
-                        if outs is None:
-                            continue
-                        yield outs
-
-                if finish_read_num == len(lmdb_sets):
-                    break
-            self.close_lmdb_dataset(lmdb_sets)
+            if self.mode != 'train' and self.infer_img is not None:
+                image_file_list = get_image_file_list(self.infer_img)
+                for single_img in image_file_list:
+                    img = cv2.imread(single_img)
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+                    norm_img = process_image(
+                        img=img,
+                        image_shape=self.image_shape,
+                        char_ops=self.char_ops,
+                        tps=self.use_tps,
+                        infer_mode=True)
+                    yield norm_img
+            else:
+                lmdb_sets = self.load_hierarchical_lmdb_dataset()
+                if process_id == 0:
+                    self.print_lmdb_sets_info(lmdb_sets)
+                cur_index_sets = [1 + process_id] * len(lmdb_sets)
+                while True:
+                    finish_read_num = 0
+                    for dataset_idx in range(len(lmdb_sets)):
+                        cur_index = cur_index_sets[dataset_idx]
+                        if cur_index > lmdb_sets[dataset_idx]['num_samples']:
+                            finish_read_num += 1
+                        else:
+                            sample_info = self.get_lmdb_sample_info(
+                                lmdb_sets[dataset_idx]['txn'], cur_index)
+                            cur_index_sets[dataset_idx] += self.num_workers
+                            if sample_info is None:
+                                continue
+                            img, label = sample_info
+                            outs = process_image(
+                                img=img,
+                                image_shape=self.image_shape,
+                                label=label,
+                                char_ops=self.char_ops,
+                                loss_type=self.loss_type,
+                                max_text_length=self.max_text_length,
+                                distort=self.use_distort)
+                            if outs is None:
+                                continue
+                            yield outs
+
+                    if finish_read_num == len(lmdb_sets):
+                        break
+                self.close_lmdb_dataset(lmdb_sets)

        def batch_iter_reader():
            batch_outs = []
@@ -132,10 +167,13 @@ class LMDBReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
+            if not self.drop_last:
+                if len(batch_outs) != 0:
+                    yield batch_outs

-        return batch_iter_reader
+        if self.infer_img is None:
+            return batch_iter_reader
+        return sample_iter_reader


 class SimpleReader(object):
@@ -152,26 +190,42 @@ class SimpleReader(object):
        self.loss_type = params['loss_type']
        self.max_text_length = params['max_text_length']
        self.mode = params['mode']
+        self.infer_img = params['infer_img']
+        self.use_tps = False
+        if "tps" in params:
+            self.use_tps = True
+        self.use_distort = False
+        if "distort" in params:
+            self.use_distort = params['distort'] and params['use_gpu']
+            if not params['use_gpu']:
+                logger.info(
+                    "Distort operation can only support in GPU.Distort will be set to False."
+                )
        if params['mode'] == 'train':
            self.batch_size = params['train_batch_size_per_card']
-        elif params['mode'] == 'eval':
-            self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = True
        else:
-            self.batch_size = 1
-            self.infer_img = params['infer_img']
+            self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = False
+            self.use_distort = False

    def __call__(self, process_id):
        if self.mode != 'train':
            process_id = 0

        def sample_iter_reader():
-            if self.mode == 'test':
+            if self.mode != 'train' and self.infer_img is not None:
                image_file_list = get_image_file_list(self.infer_img)
                for single_img in image_file_list:
                    img = cv2.imread(single_img)
-                    if img.shape[-1]==1 or len(list(img.shape))==2:
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    norm_img = process_image(img, self.image_shape)
+                    norm_img = process_image(
+                        img=img,
+                        image_shape=self.image_shape,
+                        char_ops=self.char_ops,
+                        tps=self.use_tps,
+                        infer_mode=True)
                    yield norm_img
            else:
                with open(self.label_file_path, "rb") as fin:
@@ -179,20 +233,30 @@ class SimpleReader(object):
                img_num = len(label_infor_list)
                img_id_list = list(range(img_num))
                random.shuffle(img_id_list)
+                if sys.platform == "win32":
+                    print("multiprocess is not fully compatible with Windows."
+                          "num_workers will be 1.")
+                    self.num_workers = 1
                for img_id in range(process_id, img_num, self.num_workers):
                    label_infor = label_infor_list[img_id_list[img_id]]
                    substr = label_infor.decode('utf-8').strip("\n").split("\t")
                    img_path = self.img_set_dir + "/" + substr[0]
                    img = cv2.imread(img_path)
-                    if img.shape[-1]==1 or len(list(img.shape))==2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
                    if img is None:
                        logger.info("{} does not exist!".format(img_path))
                        continue
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
                    label = substr[1]
-                    outs = process_image(img, self.image_shape, label,
-                                         self.char_ops, self.loss_type,
-                                         self.max_text_length)
+                    outs = process_image(
+                        img=img,
+                        image_shape=self.image_shape,
+                        label=label,
+                        char_ops=self.char_ops,
+                        loss_type=self.loss_type,
+                        max_text_length=self.max_text_length,
+                        distort=self.use_distort)
                    if outs is None:
                        continue
                    yield outs
@@ -204,9 +268,10 @@ class SimpleReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
+            if not self.drop_last:
+                if len(batch_outs) != 0:
+                    yield batch_outs

-        if self.mode != 'test':
+        if self.infer_img is None:
            return batch_iter_reader
        return sample_iter_reader
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
@@ -15,6 +15,9 @@
 import math
 import cv2
 import numpy as np
+import random
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()


 def get_bounding_box_rect(pos):
@@ -48,6 +51,32 @@ def resize_norm_img(img, image_shape):
    return padding_im


+def resize_norm_img_chinese(img, image_shape):
+    imgC, imgH, imgW = image_shape
+    # todo: change to 0 and modified image shape
+    max_wh_ratio = 0
+    h, w = img.shape[0], img.shape[1]
+    ratio = w * 1.0 / h
+    max_wh_ratio = max(max_wh_ratio, ratio)
+    imgW = int(32 * max_wh_ratio)
+    if math.ceil(imgH * ratio) > imgW:
+        resized_w = imgW
+    else:
+        resized_w = int(math.ceil(imgH * ratio))
+    resized_image = cv2.resize(img, (resized_w, imgH))
+    resized_image = resized_image.astype('float32')
+    if image_shape[0] == 1:
+        resized_image = resized_image / 255
+        resized_image = resized_image[np.newaxis, :]
+    else:
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+    resized_image -= 0.5
+    resized_image /= 0.5
+    padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    padding_im[:, :, 0:resized_w] = resized_image
+    return padding_im
+
+
 def get_img_data(value):
    """get_img_data"""
    if not value:
@@ -61,18 +90,280 @@ def get_img_data(value):
    return imgori


+def flag():
+    """
+    flag
+    """
+    return 1 if random.random() > 0.5000001 else -1
+
+
+def cvtColor(img):
+    """
+    cvtColor
+    """
+    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+    delta = 0.001 * random.random() * flag()
+    hsv[:, :, 2] = hsv[:, :, 2] * (1 + delta)
+    new_img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+    return new_img
+
+
+def blur(img):
+    """
+    blur
+    """
+    h, w, _ = img.shape
+    if h > 10 and w > 10:
+        return cv2.GaussianBlur(img, (5, 5), 1)
+    else:
+        return img
+
+
+def jitter(img):
+    """
+    jitter
+    """
+    w, h, _ = img.shape
+    if h > 10 and w > 10:
+        thres = min(w, h)
+        s = int(random.random() * thres * 0.01)
+        src_img = img.copy()
+        for i in range(s):
+            img[i:, i:, :] = src_img[:w - i, :h - i, :]
+        return img
+    else:
+        return img
+
+
+def add_gasuss_noise(image, mean=0, var=0.1):
+    """
+    Gasuss noise
+    """
+
+    noise = np.random.normal(mean, var**0.5, image.shape)
+    out = image + 0.5 * noise
+    out = np.clip(out, 0, 255)
+    out = np.uint8(out)
+    return out
+
+
+def get_crop(image):
+    """
+    random crop
+    """
+    h, w, _ = image.shape
+    top_min = 1
+    top_max = 8
+    top_crop = int(random.randint(top_min, top_max))
+    top_crop = min(top_crop, h - 1)
+    crop_img = image.copy()
+    ratio = random.randint(0, 1)
+    if ratio:
+        crop_img = crop_img[top_crop:h, :, :]
+    else:
+        crop_img = crop_img[0:h - top_crop, :, :]
+    return crop_img
+
+
+class Config:
+    """
+    Config
+    """
+
+    def __init__(self, ):
+        self.anglex = random.random() * 30
+        self.angley = random.random() * 15
+        self.anglez = random.random() * 10
+        self.fov = 42
+        self.r = 0
+        self.shearx = random.random() * 0.3
+        self.sheary = random.random() * 0.05
+        self.borderMode = cv2.BORDER_REPLICATE
+
+    def make(self, w, h, ang):
+        """
+        make
+        """
+        self.anglex = random.random() * 5 * flag()
+        self.angley = random.random() * 5 * flag()
+        self.anglez = -1 * random.random() * int(ang) * flag()
+        self.fov = 42
+        self.r = 0
+        self.shearx = 0
+        self.sheary = 0
+        self.borderMode = cv2.BORDER_REPLICATE
+        self.w = w
+        self.h = h
+
+        self.perspective = True
+        self.crop = True
+        self.affine = False
+        self.reverse = True
+        self.noise = True
+        self.jitter = True
+        self.blur = True
+        self.color = True
+
+
+def rad(x):
+    """
+    rad
+    """
+    return x * np.pi / 180
+
+
+def get_warpR(config):
+    """
+    get_warpR
+    """
+    anglex, angley, anglez, fov, w, h, r = \
+        config.anglex, config.angley, config.anglez, config.fov, config.w, config.h, config.r
+    if w > 69 and w < 112:
+        anglex = anglex * 1.5
+
+    z = np.sqrt(w**2 + h**2) / 2 / np.tan(rad(fov / 2))
+    # Homogeneous coordinate transformation matrix
+    rx = np.array([[1, 0, 0, 0],
+                   [0, np.cos(rad(anglex)), -np.sin(rad(anglex)), 0], [
+                       0,
+                       -np.sin(rad(anglex)),
+                       np.cos(rad(anglex)),
+                       0,
+                   ], [0, 0, 0, 1]], np.float32)
+    ry = np.array([[np.cos(rad(angley)), 0, np.sin(rad(angley)), 0],
+                   [0, 1, 0, 0], [
+                       -np.sin(rad(angley)),
+                       0,
+                       np.cos(rad(angley)),
+                       0,
+                   ], [0, 0, 0, 1]], np.float32)
+    rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0, 0],
+                   [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0, 0],
+                   [0, 0, 1, 0], [0, 0, 0, 1]], np.float32)
+    r = rx.dot(ry).dot(rz)
+    # generate 4 points
+    pcenter = np.array([h / 2, w / 2, 0, 0], np.float32)
+    p1 = np.array([0, 0, 0, 0], np.float32) - pcenter
+    p2 = np.array([w, 0, 0, 0], np.float32) - pcenter
+    p3 = np.array([0, h, 0, 0], np.float32) - pcenter
+    p4 = np.array([w, h, 0, 0], np.float32) - pcenter
+    dst1 = r.dot(p1)
+    dst2 = r.dot(p2)
+    dst3 = r.dot(p3)
+    dst4 = r.dot(p4)
+    list_dst = np.array([dst1, dst2, dst3, dst4])
+    org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32)
+    dst = np.zeros((4, 2), np.float32)
+    # Project onto the image plane
+    dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0]
+    dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1]
+
+    warpR = cv2.getPerspectiveTransform(org, dst)
+
+    dst1, dst2, dst3, dst4 = dst
+    r1 = int(min(dst1[1], dst2[1]))
+    r2 = int(max(dst3[1], dst4[1]))
+    c1 = int(min(dst1[0], dst3[0]))
+    c2 = int(max(dst2[0], dst4[0]))
+
+    try:
+        ratio = min(1.0 * h / (r2 - r1), 1.0 * w / (c2 - c1))
+
+        dx = -c1
+        dy = -r1
+        T1 = np.float32([[1., 0, dx], [0, 1., dy], [0, 0, 1.0 / ratio]])
+        ret = T1.dot(warpR)
+    except:
+        ratio = 1.0
+        T1 = np.float32([[1., 0, 0], [0, 1., 0], [0, 0, 1.]])
+        ret = T1
+    return ret, (-r1, -c1), ratio, dst
+
+
+def get_warpAffine(config):
+    """
+    get_warpAffine
+    """
+    anglez = config.anglez
+    rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0],
+                   [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0]], np.float32)
+    return rz
+
+
+def warp(img, ang):
+    """
+    warp
+    """
+    h, w, _ = img.shape
+    config = Config()
+    config.make(w, h, ang)
+    new_img = img
+
+    if config.perspective:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            warpR, (r1, c1), ratio, dst = get_warpR(config)
+            new_w = int(np.max(dst[:, 0])) - int(np.min(dst[:, 0]))
+            new_img = cv2.warpPerspective(
+                new_img,
+                warpR, (int(new_w * ratio), h),
+                borderMode=config.borderMode)
+    if config.crop:
+        img_height, img_width = img.shape[0:2]
+        tp = random.randint(1, 100)
+        if tp >= 50 and img_height >= 20 and img_width >= 20:
+            new_img = get_crop(new_img)
+    if config.affine:
+        warpT = get_warpAffine(config)
+        new_img = cv2.warpAffine(
+            new_img, warpT, (w, h), borderMode=config.borderMode)
+    if config.blur:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = blur(new_img)
+    if config.color:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = cvtColor(new_img)
+    if config.jitter:
+        new_img = jitter(new_img)
+    if config.noise:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = add_gasuss_noise(new_img)
+    if config.reverse:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = 255 - new_img
+    return new_img
+
+
 def process_image(img,
                  image_shape,
                  label=None,
                  char_ops=None,
                  loss_type=None,
-                  max_text_length=None):
-    norm_img = resize_norm_img(img, image_shape)
+                  max_text_length=None,
+                  tps=None,
+                  infer_mode=False,
+                  distort=False):
+    if distort:
+        img = warp(img, 10)
+    if infer_mode and char_ops.character_type == "ch" and not tps:
+        norm_img = resize_norm_img_chinese(img, image_shape)
+    else:
+        norm_img = resize_norm_img(img, image_shape)
+
    norm_img = norm_img[np.newaxis, :]
    if label is not None:
-        char_num = char_ops.get_char_num()
+        # char_num = char_ops.get_char_num()
        text = char_ops.encode(label)
        if len(text) == 0 or len(text) > max_text_length:
+            logger.info(
+                "Warning in ppocr/data/rec/img_tools.py:line362: Wrong data type."
+                "Excepted string with length between 1 and {}, but "
+                "got '{}'. Label is '{}'".format(max_text_length,
+                                                 len(text), label))
            return None
        else:
            if loss_type == "ctc":

--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
@@ -59,16 +59,23 @@ class DetModel(object):
        return: (image, corresponding label, dataloader)
        """
        image_shape = deepcopy(self.image_shape)
+        if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
+            raise Exception("The size of the image must be divisible by 4, "
+                            "received image shape is {}, please reset the "
+                            "Global.image_shape in the yml file".format(
+                                image_shape))
+
        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
        if mode == "train":
            if self.algorithm == "EAST":
+                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
                score = fluid.layers.data(
-                    name='score', shape=[1, 128, 128], dtype='float32')
+                    name='score', shape=[1, h, w], dtype='float32')
                geo = fluid.layers.data(
-                    name='geo', shape=[9, 128, 128], dtype='float32')
+                    name='geo', shape=[9, h, w], dtype='float32')
                mask = fluid.layers.data(
-                    name='mask', shape=[1, 128, 128], dtype='float32')
+                    name='mask', shape=[1, h, w], dtype='float32')
                feed_list = [image, score, geo, mask]
                labels = {'score': score, 'geo': geo, 'mask': mask}
            elif self.algorithm == "DB":
@@ -109,7 +116,10 @@ class DetModel(object):
        """
        image, labels, loader = self.create_feed(mode)
        conv_feas = self.backbone(image)
-        predicts = self.head(conv_feas)
+        if self.algorithm == "DB":
+            predicts = self.head(conv_feas, mode)
+        else:
+            predicts = self.head(conv_feas)
        if mode == "train":
            losses = self.loss(predicts, labels)
            return loader, losses

--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@@ -30,6 +30,8 @@ class RecModel(object):
        global_params = params['Global']
        char_num = global_params['char_ops'].get_char_num()
        global_params['char_num'] = char_num
+        self.char_type = global_params['character_type']
+        self.infer_img = global_params['infer_img']
        if "TPS" in params:
            tps_params = deepcopy(params["TPS"])
            tps_params.update(global_params)
@@ -60,8 +62,8 @@ class RecModel(object):
    def create_feed(self, mode):
        image_shape = deepcopy(self.image_shape)
        image_shape.insert(0, -1)
-        image = fluid.data(name='image', shape=image_shape, dtype='float32')
        if mode == "train":
+            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            if self.loss_type == "attention":
                label_in = fluid.data(
                    name='label_in',
@@ -86,6 +88,16 @@ class RecModel(object):
                use_double_buffer=True,
                iterable=False)
        else:
+            if self.char_type == "ch" and self.infer_img:
+                image_shape[-1] = -1
+                if self.tps != None:
+                    logger.info(
+                        "WARNRNG!!!\n"
+                        "TPS does not support variable shape in chinese!"
+                        "We set img_shape to be the same , it may affect the inference effect"
+                    )
+                    image_shape = deepcopy(self.image_shape)
+            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            labels = None
            loader = None
        return image, labels, loader
@@ -110,7 +122,11 @@ class RecModel(object):
            return loader, outputs
        elif mode == "export":
            predict = predicts['predict']
-            predict = fluid.layers.softmax(predict)
+            if self.loss_type == "ctc":
+                predict = fluid.layers.softmax(predict)
            return [image, {'decoded_out': decoded_out, 'predicts': predict}]
        else:
-            return loader, {'decoded_out': decoded_out}
+            predict = predicts['predict']
+            if self.loss_type == "ctc":
+                predict = fluid.layers.softmax(predict)
+            return loader, {'decoded_out': decoded_out, 'predicts': predict}
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@@ -115,7 +115,6 @@ class DBHead(object):
        initializer = fluid.initializer.Uniform(-stdv, stdv)
        bias_attr = fluid.ParamAttr(
            regularizer=regularizer,
-            gradient_clip=gradient_clip,
            initializer=initializer,
            name=name + "_b_attr")
        return bias_attr
@@ -196,7 +195,7 @@ class DBHead(object):
        fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
        shrink_maps = self.binarize(fuse)
        if mode != "train":
-            return {"maps", shrink_maps}
+            return {"maps": shrink_maps}
        threshold_maps = self.thresh(fuse)
        binary_maps = self.step_function(shrink_maps, threshold_maps)
        y = fluid.layers.concat(

--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
@@ -18,6 +18,7 @@ from __future__ import print_function

 import paddle.fluid as fluid
 from ..common_functions import conv_bn_layer, deconv_bn_layer
+from collections import OrderedDict


 class EASTHead(object):
@@ -110,7 +111,7 @@ class EASTHead(object):
    def __call__(self, inputs):
        f_common = self.unet_fusion(inputs)
        f_score, f_geo = self.detector_header(f_common)
-        predicts = {}
+        predicts = OrderedDict()
        predicts['f_score'] = f_score
        predicts['f_geo'] = f_geo
        return predicts
--- a/ppocr/modeling/heads/rec_attention_head.py
+++ b/ppocr/modeling/heads/rec_attention_head.py
@@ -123,6 +123,8 @@ class AttentionPredict(object):

        full_ids = fluid.layers.fill_constant_batch_size_like(
            input=init_state, shape=[-1, 1], dtype='int64', value=1)
+        full_scores = fluid.layers.fill_constant_batch_size_like(
+            input=init_state, shape=[-1, 1], dtype='float32', value=1)

        cond = layers.less_than(x=counter, y=array_len)
        while_op = layers.While(cond=cond)
@@ -171,6 +173,9 @@ class AttentionPredict(object):
            new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
            fluid.layers.assign(new_ids, full_ids)

+            new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1)
+            fluid.layers.assign(new_scores, full_scores)
+            
            layers.increment(x=counter, value=1, in_place=True)

            # update the memories
@@ -184,7 +189,7 @@ class AttentionPredict(object):
            length_cond = layers.less_than(x=counter, y=array_len)
            finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)
-        return full_ids
+        return full_ids, full_scores

    def __call__(self, inputs, labels=None, mode=None):
        encoder_features = self.encoder(inputs)
@@ -223,10 +228,10 @@ class AttentionPredict(object):
                decoder_size, char_num)
            _, decoded_out = layers.topk(input=predict, k=1)
            decoded_out = layers.lod_reset(decoded_out, y=label_out)
-            predicts = {'predict': predict, 'decoded_out': decoded_out}
+            predicts = {'predict':predict, 'decoded_out':decoded_out}
        else:
-            ids = self.gru_attention_infer(
+            ids, predict = self.gru_attention_infer(
                decoder_boot, self.max_length, char_num, word_vector_dim,
                encoded_vector, encoded_proj, decoder_size)
-            predicts = {'decoded_out': ids}
+            predicts = {'predict':predict, 'decoded_out':ids}
        return predicts
--- a/ppocr/modeling/losses/__init__.py
+++ b/ppocr/modeling/losses/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/stns/__init__.py
+++ b/ppocr/modeling/stns/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
@@ -15,6 +15,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle.fluid as fluid
+from paddle.fluid.regularizer import L2Decay
+
+from ppocr.utils.utility import initial_logger
+
+logger = initial_logger()


 def AdamDecay(params, parameter_list=None):
@@ -28,9 +33,24 @@ def AdamDecay(params, parameter_list=None):
    base_lr = params['base_lr']
    beta1 = params['beta1']
    beta2 = params['beta2']
+    l2_decay = params.get("l2_decay", 0.0)
+
+    if 'decay' in params:
+        params = params['decay']
+        decay_mode = params['function']
+        step_each_epoch = params['step_each_epoch']
+        total_epoch = params['total_epoch']
+        if decay_mode == "cosine_decay":
+            base_lr = fluid.layers.cosine_decay(
+                learning_rate=base_lr,
+                step_each_epoch=step_each_epoch,
+                epochs=total_epoch)
+        else:
+            logger.info("Only support Cosine decay currently")
    optimizer = fluid.optimizer.Adam(
        learning_rate=base_lr,
        beta1=beta1,
        beta2=beta2,
+        regularization=L2Decay(regularization_coeff=l2_decay),
        parameter_list=parameter_list)
    return optimizer
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -35,6 +35,7 @@ class DBPostProcess(object):
        self.thresh = params['thresh']
        self.box_thresh = params['box_thresh']
        self.max_candidates = params['max_candidates']
+        self.unclip_ratio = params['unclip_ratio']
        self.min_size = 3

    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
@@ -46,7 +47,8 @@ class DBPostProcess(object):
        bitmap = _bitmap
        height, width = bitmap.shape

-        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
+                                cv2.CHAIN_APPROX_SIMPLE)
        if len(outs) == 3:
            img, contours, _ = outs[0], outs[1], outs[2]
        elif len(outs) == 2:
@@ -83,7 +85,8 @@ class DBPostProcess(object):
            scores[index] = score
        return boxes, scores

-    def unclip(self, box, unclip_ratio=2.0):
+    def unclip(self, box):
+        unclip_ratio = self.unclip_ratio
        poly = Polygon(box)
        distance = poly.area * unclip_ratio / poly.length
        offset = pyclipper.PyclipperOffset()

--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@@ -20,6 +20,12 @@ import numpy as np
 from .locality_aware_nms import nms_locality
 import cv2

+import os
+import sys
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+

 class EASTPostPocess(object):
    """
@@ -30,6 +36,11 @@ class EASTPostPocess(object):
        self.score_thresh = params['score_thresh']
        self.cover_thresh = params['cover_thresh']
        self.nms_thresh = params['nms_thresh']
+        
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True

    def restore_rectangle_quad(self, origin, geometry):
        """
@@ -66,7 +77,11 @@ class EASTPostPocess(object):
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
-        boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
+        if self.is_python35:
+            import lanms
+            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
+        else:
+            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
        if boxes.shape[0] == 0:
            return []
        # Here we filter some low score boxes by the average score map,