Merge pull request #2 from PaddlePaddle/develop

mergepaddleocr

Merge pull request #2 from PaddlePaddle/develop
mergepaddleocr
c1d19ce2 · zhoujun · GitHub · 56c6c3ae · bad9f6cd · c1d19ce2
Unverified Commit c1d19ce2 authored Aug 13, 2020 by zhoujun Committed by GitHub Aug 13, 2020
20 changed files
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.

 import os
+import sys
 import math
 import random
 import numpy as np
@@ -22,6 +23,7 @@ import string
 import lmdb

 from ppocr.utils.utility import initial_logger
+from ppocr.utils.utility import get_image_file_list
 logger = initial_logger()

 from .img_tools import process_image, get_img_data
@@ -39,10 +41,25 @@ class LMDBReader(object):
        self.loss_type = params['loss_type']
        self.max_text_length = params['max_text_length']
        self.mode = params['mode']
+        self.drop_last = False
+        self.use_tps = False
+        if "tps" in params:
+            self.ues_tps = True
+        self.use_distort = False
+        if "distort" in params:
+            self.use_distort = params['distort'] and params['use_gpu']
+            if not params['use_gpu']:
+                logger.info(
+                    "Distort operation can only support in GPU. Distort will be set to False."
+                )
        if params['mode'] == 'train':
            self.batch_size = params['train_batch_size_per_card']
+            self.drop_last = True
        else:
            self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = False
+            self.use_distort = False
+        self.infer_img = params['infer_img']

    def load_hierarchical_lmdb_dataset(self):
        lmdb_sets = {}
@@ -96,33 +113,52 @@ class LMDBReader(object):
            process_id = 0

        def sample_iter_reader():
-            lmdb_sets = self.load_hierarchical_lmdb_dataset()
-            if process_id == 0:
-                self.print_lmdb_sets_info(lmdb_sets)
-            cur_index_sets = [1 + process_id] * len(lmdb_sets)
-            while True:
-                finish_read_num = 0
-                for dataset_idx in range(len(lmdb_sets)):
-                    cur_index = cur_index_sets[dataset_idx]
-                    if cur_index > lmdb_sets[dataset_idx]['num_samples']:
-                        finish_read_num += 1
-                    else:
-                        sample_info = self.get_lmdb_sample_info(
-                            lmdb_sets[dataset_idx]['txn'], cur_index)
-                        cur_index_sets[dataset_idx] += self.num_workers
-                        if sample_info is None:
-                            continue
-                        img, label = sample_info
-                        outs = process_image(img, self.image_shape, label,
-                                             self.char_ops, self.loss_type,
-                                             self.max_text_length)
-                        if outs is None:
-                            continue
-                        yield outs
-
-                if finish_read_num == len(lmdb_sets):
-                    break
-            self.close_lmdb_dataset(lmdb_sets)
+            if self.mode != 'train' and self.infer_img is not None:
+                image_file_list = get_image_file_list(self.infer_img)
+                for single_img in image_file_list:
+                    img = cv2.imread(single_img)
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+                    norm_img = process_image(
+                        img=img,
+                        image_shape=self.image_shape,
+                        char_ops=self.char_ops,
+                        tps=self.use_tps,
+                        infer_mode=True)
+                    yield norm_img
+            else:
+                lmdb_sets = self.load_hierarchical_lmdb_dataset()
+                if process_id == 0:
+                    self.print_lmdb_sets_info(lmdb_sets)
+                cur_index_sets = [1 + process_id] * len(lmdb_sets)
+                while True:
+                    finish_read_num = 0
+                    for dataset_idx in range(len(lmdb_sets)):
+                        cur_index = cur_index_sets[dataset_idx]
+                        if cur_index > lmdb_sets[dataset_idx]['num_samples']:
+                            finish_read_num += 1
+                        else:
+                            sample_info = self.get_lmdb_sample_info(
+                                lmdb_sets[dataset_idx]['txn'], cur_index)
+                            cur_index_sets[dataset_idx] += self.num_workers
+                            if sample_info is None:
+                                continue
+                            img, label = sample_info
+                            outs = process_image(
+                                img=img,
+                                image_shape=self.image_shape,
+                                label=label,
+                                char_ops=self.char_ops,
+                                loss_type=self.loss_type,
+                                max_text_length=self.max_text_length,
+                                distort=self.use_distort)
+                            if outs is None:
+                                continue
+                            yield outs
+
+                    if finish_read_num == len(lmdb_sets):
+                        break
+                self.close_lmdb_dataset(lmdb_sets)

        def batch_iter_reader():
            batch_outs = []
@@ -131,10 +167,13 @@ class LMDBReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
+            if not self.drop_last:
+                if len(batch_outs) != 0:
+                    yield batch_outs

-        return batch_iter_reader
+        if self.infer_img is None:
+            return batch_iter_reader
+        return sample_iter_reader


 class SimpleReader(object):
@@ -143,50 +182,100 @@ class SimpleReader(object):
            self.num_workers = 1
        else:
            self.num_workers = params['num_workers']
-        self.img_set_dir = params['img_set_dir']
-        self.label_file_path = params['label_file_path']
+        if params['mode'] != 'test':
+            self.img_set_dir = params['img_set_dir']
+            self.label_file_path = params['label_file_path']
+        self.use_gpu = params['use_gpu']
        self.char_ops = params['char_ops']
        self.image_shape = params['image_shape']
        self.loss_type = params['loss_type']
        self.max_text_length = params['max_text_length']
        self.mode = params['mode']
+        self.infer_img = params['infer_img']
+        self.use_tps = False
+        if "tps" in params:
+            self.use_tps = True
+        self.use_distort = False
+        if "distort" in params:
+            self.use_distort = params['distort'] and params['use_gpu']
+            if not params['use_gpu']:
+                logger.info(
+                    "Distort operation can only support in GPU.Distort will be set to False."
+                )
        if params['mode'] == 'train':
            self.batch_size = params['train_batch_size_per_card']
-        elif params['mode'] == 'eval':
-            self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = True
        else:
-            self.batch_size = 1
-            self.infer_img = params['infer_img']
+            self.batch_size = params['test_batch_size_per_card']
+            self.drop_last = False
+            self.use_distort = False

    def __call__(self, process_id):
        if self.mode != 'train':
            process_id = 0

+        def get_device_num():
+            if self.use_gpu:
+                gpus = os.environ.get("CUDA_VISIBLE_DEVICES", 1)
+                gpu_num = len(gpus.split(','))
+                return gpu_num
+            else:
+                cpu_num = os.environ.get("CPU_NUM", 1)
+                return int(cpu_num)
+
        def sample_iter_reader():
-            if self.mode == 'test':
-                print("infer_img:", self.infer_img)
-                img = cv2.imread(self.infer_img)
-                norm_img = process_image(img, self.image_shape)
-                yield norm_img
-            with open(self.label_file_path, "rb") as fin:
-                label_infor_list = fin.readlines()
-            img_num = len(label_infor_list)
-            img_id_list = list(range(img_num))
-            random.shuffle(img_id_list)
-            for img_id in range(process_id, img_num, self.num_workers):
-                label_infor = label_infor_list[img_id_list[img_id]]
-                substr = label_infor.decode('utf-8').strip("\n").split("\t")
-                img_path = self.img_set_dir + "/" + substr[0]
-                img = cv2.imread(img_path)
-                if img is None:
-                    continue
-                label = substr[1]
-                outs = process_image(img, self.image_shape, label,
-                                     self.char_ops, self.loss_type,
-                                     self.max_text_length)
-                if outs is None:
-                    continue
-                yield outs
+            if self.mode != 'train' and self.infer_img is not None:
+                image_file_list = get_image_file_list(self.infer_img)
+                for single_img in image_file_list:
+                    img = cv2.imread(single_img)
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+                    norm_img = process_image(
+                        img=img,
+                        image_shape=self.image_shape,
+                        char_ops=self.char_ops,
+                        tps=self.use_tps,
+                        infer_mode=True)
+                    yield norm_img
+            else:
+                with open(self.label_file_path, "rb") as fin:
+                    label_infor_list = fin.readlines()
+                img_num = len(label_infor_list)
+                img_id_list = list(range(img_num))
+                random.shuffle(img_id_list)
+                if sys.platform == "win32" and self.num_workers != 1:
+                    print("multiprocess is not fully compatible with Windows."
+                          "num_workers will be 1.")
+                    self.num_workers = 1
+                if self.batch_size * get_device_num(
+                ) * self.num_workers > img_num:
+                    raise Exception(
+                        "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
+                        format(img_num, self.batch_size * get_device_num() *
+                               self.num_workers))
+                for img_id in range(process_id, img_num, self.num_workers):
+                    label_infor = label_infor_list[img_id_list[img_id]]
+                    substr = label_infor.decode('utf-8').strip("\n").split("\t")
+                    img_path = self.img_set_dir + "/" + substr[0]
+                    img = cv2.imread(img_path)
+                    if img is None:
+                        logger.info("{} does not exist!".format(img_path))
+                        continue
+                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
+                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
+                    label = substr[1]
+                    outs = process_image(
+                        img=img,
+                        image_shape=self.image_shape,
+                        label=label,
+                        char_ops=self.char_ops,
+                        loss_type=self.loss_type,
+                        max_text_length=self.max_text_length,
+                        distort=self.use_distort)
+                    if outs is None:
+                        continue
+                    yield outs

        def batch_iter_reader():
            batch_outs = []
@@ -195,7 +284,10 @@ class SimpleReader(object):
                if len(batch_outs) == self.batch_size:
                    yield batch_outs
                    batch_outs = []
-            if len(batch_outs) != 0:
-                yield batch_outs
+            if not self.drop_last:
+                if len(batch_outs) != 0:
+                    yield batch_outs

-        return batch_iter_reader
+        if self.infer_img is None:
+            return batch_iter_reader
+        return sample_iter_reader
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
@@ -15,6 +15,9 @@
 import math
 import cv2
 import numpy as np
+import random
+from ppocr.utils.utility import initial_logger
+logger = initial_logger()


 def get_bounding_box_rect(pos):
@@ -48,6 +51,32 @@ def resize_norm_img(img, image_shape):
    return padding_im


+def resize_norm_img_chinese(img, image_shape):
+    imgC, imgH, imgW = image_shape
+    # todo: change to 0 and modified image shape
+    max_wh_ratio = 0
+    h, w = img.shape[0], img.shape[1]
+    ratio = w * 1.0 / h
+    max_wh_ratio = max(max_wh_ratio, ratio)
+    imgW = int(32 * max_wh_ratio)
+    if math.ceil(imgH * ratio) > imgW:
+        resized_w = imgW
+    else:
+        resized_w = int(math.ceil(imgH * ratio))
+    resized_image = cv2.resize(img, (resized_w, imgH))
+    resized_image = resized_image.astype('float32')
+    if image_shape[0] == 1:
+        resized_image = resized_image / 255
+        resized_image = resized_image[np.newaxis, :]
+    else:
+        resized_image = resized_image.transpose((2, 0, 1)) / 255
+    resized_image -= 0.5
+    resized_image /= 0.5
+    padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
+    padding_im[:, :, 0:resized_w] = resized_image
+    return padding_im
+
+
 def get_img_data(value):
    """get_img_data"""
    if not value:
@@ -61,18 +90,280 @@ def get_img_data(value):
    return imgori


+def flag():
+    """
+    flag
+    """
+    return 1 if random.random() > 0.5000001 else -1
+
+
+def cvtColor(img):
+    """
+    cvtColor
+    """
+    hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
+    delta = 0.001 * random.random() * flag()
+    hsv[:, :, 2] = hsv[:, :, 2] * (1 + delta)
+    new_img = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
+    return new_img
+
+
+def blur(img):
+    """
+    blur
+    """
+    h, w, _ = img.shape
+    if h > 10 and w > 10:
+        return cv2.GaussianBlur(img, (5, 5), 1)
+    else:
+        return img
+
+
+def jitter(img):
+    """
+    jitter
+    """
+    w, h, _ = img.shape
+    if h > 10 and w > 10:
+        thres = min(w, h)
+        s = int(random.random() * thres * 0.01)
+        src_img = img.copy()
+        for i in range(s):
+            img[i:, i:, :] = src_img[:w - i, :h - i, :]
+        return img
+    else:
+        return img
+
+
+def add_gasuss_noise(image, mean=0, var=0.1):
+    """
+    Gasuss noise
+    """
+
+    noise = np.random.normal(mean, var**0.5, image.shape)
+    out = image + 0.5 * noise
+    out = np.clip(out, 0, 255)
+    out = np.uint8(out)
+    return out
+
+
+def get_crop(image):
+    """
+    random crop
+    """
+    h, w, _ = image.shape
+    top_min = 1
+    top_max = 8
+    top_crop = int(random.randint(top_min, top_max))
+    top_crop = min(top_crop, h - 1)
+    crop_img = image.copy()
+    ratio = random.randint(0, 1)
+    if ratio:
+        crop_img = crop_img[top_crop:h, :, :]
+    else:
+        crop_img = crop_img[0:h - top_crop, :, :]
+    return crop_img
+
+
+class Config:
+    """
+    Config
+    """
+
+    def __init__(self, ):
+        self.anglex = random.random() * 30
+        self.angley = random.random() * 15
+        self.anglez = random.random() * 10
+        self.fov = 42
+        self.r = 0
+        self.shearx = random.random() * 0.3
+        self.sheary = random.random() * 0.05
+        self.borderMode = cv2.BORDER_REPLICATE
+
+    def make(self, w, h, ang):
+        """
+        make
+        """
+        self.anglex = random.random() * 5 * flag()
+        self.angley = random.random() * 5 * flag()
+        self.anglez = -1 * random.random() * int(ang) * flag()
+        self.fov = 42
+        self.r = 0
+        self.shearx = 0
+        self.sheary = 0
+        self.borderMode = cv2.BORDER_REPLICATE
+        self.w = w
+        self.h = h
+
+        self.perspective = True
+        self.crop = True
+        self.affine = False
+        self.reverse = True
+        self.noise = True
+        self.jitter = True
+        self.blur = True
+        self.color = True
+
+
+def rad(x):
+    """
+    rad
+    """
+    return x * np.pi / 180
+
+
+def get_warpR(config):
+    """
+    get_warpR
+    """
+    anglex, angley, anglez, fov, w, h, r = \
+        config.anglex, config.angley, config.anglez, config.fov, config.w, config.h, config.r
+    if w > 69 and w < 112:
+        anglex = anglex * 1.5
+
+    z = np.sqrt(w**2 + h**2) / 2 / np.tan(rad(fov / 2))
+    # Homogeneous coordinate transformation matrix
+    rx = np.array([[1, 0, 0, 0],
+                   [0, np.cos(rad(anglex)), -np.sin(rad(anglex)), 0], [
+                       0,
+                       -np.sin(rad(anglex)),
+                       np.cos(rad(anglex)),
+                       0,
+                   ], [0, 0, 0, 1]], np.float32)
+    ry = np.array([[np.cos(rad(angley)), 0, np.sin(rad(angley)), 0],
+                   [0, 1, 0, 0], [
+                       -np.sin(rad(angley)),
+                       0,
+                       np.cos(rad(angley)),
+                       0,
+                   ], [0, 0, 0, 1]], np.float32)
+    rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0, 0],
+                   [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0, 0],
+                   [0, 0, 1, 0], [0, 0, 0, 1]], np.float32)
+    r = rx.dot(ry).dot(rz)
+    # generate 4 points
+    pcenter = np.array([h / 2, w / 2, 0, 0], np.float32)
+    p1 = np.array([0, 0, 0, 0], np.float32) - pcenter
+    p2 = np.array([w, 0, 0, 0], np.float32) - pcenter
+    p3 = np.array([0, h, 0, 0], np.float32) - pcenter
+    p4 = np.array([w, h, 0, 0], np.float32) - pcenter
+    dst1 = r.dot(p1)
+    dst2 = r.dot(p2)
+    dst3 = r.dot(p3)
+    dst4 = r.dot(p4)
+    list_dst = np.array([dst1, dst2, dst3, dst4])
+    org = np.array([[0, 0], [w, 0], [0, h], [w, h]], np.float32)
+    dst = np.zeros((4, 2), np.float32)
+    # Project onto the image plane
+    dst[:, 0] = list_dst[:, 0] * z / (z - list_dst[:, 2]) + pcenter[0]
+    dst[:, 1] = list_dst[:, 1] * z / (z - list_dst[:, 2]) + pcenter[1]
+
+    warpR = cv2.getPerspectiveTransform(org, dst)
+
+    dst1, dst2, dst3, dst4 = dst
+    r1 = int(min(dst1[1], dst2[1]))
+    r2 = int(max(dst3[1], dst4[1]))
+    c1 = int(min(dst1[0], dst3[0]))
+    c2 = int(max(dst2[0], dst4[0]))
+
+    try:
+        ratio = min(1.0 * h / (r2 - r1), 1.0 * w / (c2 - c1))
+
+        dx = -c1
+        dy = -r1
+        T1 = np.float32([[1., 0, dx], [0, 1., dy], [0, 0, 1.0 / ratio]])
+        ret = T1.dot(warpR)
+    except:
+        ratio = 1.0
+        T1 = np.float32([[1., 0, 0], [0, 1., 0], [0, 0, 1.]])
+        ret = T1
+    return ret, (-r1, -c1), ratio, dst
+
+
+def get_warpAffine(config):
+    """
+    get_warpAffine
+    """
+    anglez = config.anglez
+    rz = np.array([[np.cos(rad(anglez)), np.sin(rad(anglez)), 0],
+                   [-np.sin(rad(anglez)), np.cos(rad(anglez)), 0]], np.float32)
+    return rz
+
+
+def warp(img, ang):
+    """
+    warp
+    """
+    h, w, _ = img.shape
+    config = Config()
+    config.make(w, h, ang)
+    new_img = img
+
+    if config.perspective:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            warpR, (r1, c1), ratio, dst = get_warpR(config)
+            new_w = int(np.max(dst[:, 0])) - int(np.min(dst[:, 0]))
+            new_img = cv2.warpPerspective(
+                new_img,
+                warpR, (int(new_w * ratio), h),
+                borderMode=config.borderMode)
+    if config.crop:
+        img_height, img_width = img.shape[0:2]
+        tp = random.randint(1, 100)
+        if tp >= 50 and img_height >= 20 and img_width >= 20:
+            new_img = get_crop(new_img)
+    if config.affine:
+        warpT = get_warpAffine(config)
+        new_img = cv2.warpAffine(
+            new_img, warpT, (w, h), borderMode=config.borderMode)
+    if config.blur:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = blur(new_img)
+    if config.color:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = cvtColor(new_img)
+    if config.jitter:
+        new_img = jitter(new_img)
+    if config.noise:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = add_gasuss_noise(new_img)
+    if config.reverse:
+        tp = random.randint(1, 100)
+        if tp >= 50:
+            new_img = 255 - new_img
+    return new_img
+
+
 def process_image(img,
                  image_shape,
                  label=None,
                  char_ops=None,
                  loss_type=None,
-                  max_text_length=None):
-    norm_img = resize_norm_img(img, image_shape)
+                  max_text_length=None,
+                  tps=None,
+                  infer_mode=False,
+                  distort=False):
+    if distort:
+        img = warp(img, 10)
+    if infer_mode and char_ops.character_type == "ch" and not tps:
+        norm_img = resize_norm_img_chinese(img, image_shape)
+    else:
+        norm_img = resize_norm_img(img, image_shape)
+
    norm_img = norm_img[np.newaxis, :]
    if label is not None:
-        char_num = char_ops.get_char_num()
+        # char_num = char_ops.get_char_num()
        text = char_ops.encode(label)
        if len(text) == 0 or len(text) > max_text_length:
+            logger.info(
+                "Warning in ppocr/data/rec/img_tools.py: Wrong data type."
+                "Excepted string with length between 1 and {}, but "
+                "got '{}'. Label is '{}'".format(max_text_length,
+                                                 len(text), label))
            return None
        else:
            if loss_type == "ctc":

--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
@@ -59,16 +59,23 @@ class DetModel(object):
        return: (image, corresponding label, dataloader)
        """
        image_shape = deepcopy(self.image_shape)
+        if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
+            raise Exception("The size of the image must be divisible by 4, "
+                            "received image shape is {}, please reset the "
+                            "Global.image_shape in the yml file".format(
+                                image_shape))
+
        image = fluid.layers.data(
            name='image', shape=image_shape, dtype='float32')
        if mode == "train":
            if self.algorithm == "EAST":
+                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
                score = fluid.layers.data(
-                    name='score', shape=[1, 128, 128], dtype='float32')
+                    name='score', shape=[1, h, w], dtype='float32')
                geo = fluid.layers.data(
-                    name='geo', shape=[9, 128, 128], dtype='float32')
+                    name='geo', shape=[9, h, w], dtype='float32')
                mask = fluid.layers.data(
-                    name='mask', shape=[1, 128, 128], dtype='float32')
+                    name='mask', shape=[1, h, w], dtype='float32')
                feed_list = [image, score, geo, mask]
                labels = {'score': score, 'geo': geo, 'mask': mask}
            elif self.algorithm == "DB":
@@ -109,7 +116,10 @@ class DetModel(object):
        """
        image, labels, loader = self.create_feed(mode)
        conv_feas = self.backbone(image)
-        predicts = self.head(conv_feas)
+        if self.algorithm == "DB":
+            predicts = self.head(conv_feas, mode)
+        else:
+            predicts = self.head(conv_feas)
        if mode == "train":
            losses = self.loss(predicts, labels)
            return loader, losses

--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
@@ -30,6 +30,8 @@ class RecModel(object):
        global_params = params['Global']
        char_num = global_params['char_ops'].get_char_num()
        global_params['char_num'] = char_num
+        self.char_type = global_params['character_type']
+        self.infer_img = global_params['infer_img']
        if "TPS" in params:
            tps_params = deepcopy(params["TPS"])
            tps_params.update(global_params)
@@ -60,8 +62,8 @@ class RecModel(object):
    def create_feed(self, mode):
        image_shape = deepcopy(self.image_shape)
        image_shape.insert(0, -1)
-        image = fluid.data(name='image', shape=image_shape, dtype='float32')
        if mode == "train":
+            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            if self.loss_type == "attention":
                label_in = fluid.data(
                    name='label_in',
@@ -86,6 +88,16 @@ class RecModel(object):
                use_double_buffer=True,
                iterable=False)
        else:
+            if self.char_type == "ch" and self.infer_img:
+                image_shape[-1] = -1
+                if self.tps != None:
+                    logger.info(
+                        "WARNRNG!!!\n"
+                        "TPS does not support variable shape in chinese!"
+                        "We set img_shape to be the same , it may affect the inference effect"
+                    )
+                    image_shape = deepcopy(self.image_shape)
+            image = fluid.data(name='image', shape=image_shape, dtype='float32')
            labels = None
            loader = None
        return image, labels, loader
@@ -109,6 +121,12 @@ class RecModel(object):
                decoded_out, 'label':label}
            return loader, outputs
        elif mode == "export":
-            return [image, {'decoded_out': decoded_out}]
+            predict = predicts['predict']
+            if self.loss_type == "ctc":
+                predict = fluid.layers.softmax(predict)
+            return [image, {'decoded_out': decoded_out, 'predicts': predict}]
        else:
-            return loader, {'decoded_out': decoded_out}
+            predict = predicts['predict']
+            if self.loss_type == "ctc":
+                predict = fluid.layers.softmax(predict)
+            return loader, {'decoded_out': decoded_out, 'predicts': predict}
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
@@ -31,16 +31,28 @@ __all__ = [

 class MobileNetV3():
    def __init__(self, params):
-        self.scale = params['scale']
-        model_name = params['model_name']
+        self.scale = params.get("scale", 0.5)
+        model_name = params.get("model_name", "small")
+        large_stride = params.get("large_stride", [1, 2, 2, 2])
+        small_stride = params.get("small_stride", [2, 2, 2, 2])
+
+        assert isinstance(large_stride, list), "large_stride type must " \
+            "be list but got {}".format(type(large_stride))
+        assert isinstance(small_stride, list), "small_stride type must " \
+            "be list but got {}".format(type(small_stride))
+        assert len(large_stride) == 4, "large_stride length must be " \
+            "4 but got {}".format(len(large_stride))
+        assert len(small_stride) == 4, "small_stride length must be " \
+            "4 but got {}".format(len(small_stride))
+
        self.inplanes = 16
        if model_name == "large":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, 'relu', 1],
-                [3, 64, 24, False, 'relu', (2, 1)],
+                [3, 16, 16, False, 'relu', large_stride[0]],
+                [3, 64, 24, False, 'relu', (large_stride[1], 1)],
                [3, 72, 24, False, 'relu', 1],
-                [5, 72, 40, True, 'relu', (2, 1)],
+                [5, 72, 40, True, 'relu', (large_stride[2], 1)],
                [5, 120, 40, True, 'relu', 1],
                [5, 120, 40, True, 'relu', 1],
                [3, 240, 80, False, 'hard_swish', 1],
@@ -49,7 +61,7 @@ class MobileNetV3():
                [3, 184, 80, False, 'hard_swish', 1],
                [3, 480, 112, True, 'hard_swish', 1],
                [3, 672, 112, True, 'hard_swish', 1],
-                [5, 672, 160, True, 'hard_swish', (2, 1)],
+                [5, 672, 160, True, 'hard_swish', (large_stride[3], 1)],
                [5, 960, 160, True, 'hard_swish', 1],
                [5, 960, 160, True, 'hard_swish', 1],
            ]
@@ -58,15 +70,15 @@ class MobileNetV3():
        elif model_name == "small":
            self.cfg = [
                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, 'relu', (2, 1)],
-                [3, 72, 24, False, 'relu', (2, 1)],
+                [3, 16, 16, True, 'relu', (small_stride[0], 1)],
+                [3, 72, 24, False, 'relu', (small_stride[1], 1)],
                [3, 88, 24, False, 'relu', 1],
-                [5, 96, 40, True, 'hard_swish', (2, 1)],
+                [5, 96, 40, True, 'hard_swish', (small_stride[2], 1)],
                [5, 240, 40, True, 'hard_swish', 1],
                [5, 240, 40, True, 'hard_swish', 1],
                [5, 120, 48, True, 'hard_swish', 1],
                [5, 144, 48, True, 'hard_swish', 1],
-                [5, 288, 96, True, 'hard_swish', (2, 1)],
+                [5, 288, 96, True, 'hard_swish', (small_stride[3], 1)],
                [5, 576, 96, True, 'hard_swish', 1],
                [5, 576, 96, True, 'hard_swish', 1],
            ]
@@ -78,7 +90,7 @@ class MobileNetV3():

        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
        assert self.scale in supported_scale, \
-            "supported scale are {} but input scale is {}".format(supported_scale, scale)
+            "supported scales are {} but input scale is {}".format(supported_scale, self.scale)

    def __call__(self, input):
        scale = self.scale

--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@@ -115,7 +115,6 @@ class DBHead(object):
        initializer = fluid.initializer.Uniform(-stdv, stdv)
        bias_attr = fluid.ParamAttr(
            regularizer=regularizer,
-            gradient_clip=gradient_clip,
            initializer=initializer,
            name=name + "_b_attr")
        return bias_attr
@@ -196,7 +195,7 @@ class DBHead(object):
        fuse = fluid.layers.concat(input=[p5, p4, p3, p2], axis=1)
        shrink_maps = self.binarize(fuse)
        if mode != "train":
-            return shrink_maps
+            return {"maps": shrink_maps}
        threshold_maps = self.thresh(fuse)
        binary_maps = self.step_function(shrink_maps, threshold_maps)
        y = fluid.layers.concat(

--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
@@ -18,6 +18,7 @@ from __future__ import print_function

 import paddle.fluid as fluid
 from ..common_functions import conv_bn_layer, deconv_bn_layer
+from collections import OrderedDict


 class EASTHead(object):
@@ -110,7 +111,7 @@ class EASTHead(object):
    def __call__(self, inputs):
        f_common = self.unet_fusion(inputs)
        f_score, f_geo = self.detector_header(f_common)
-        predicts = {}
+        predicts = OrderedDict()
        predicts['f_score'] = f_score
        predicts['f_geo'] = f_geo
        return predicts
--- a/ppocr/modeling/heads/rec_attention_head.py
+++ b/ppocr/modeling/heads/rec_attention_head.py
@@ -123,6 +123,8 @@ class AttentionPredict(object):

        full_ids = fluid.layers.fill_constant_batch_size_like(
            input=init_state, shape=[-1, 1], dtype='int64', value=1)
+        full_scores = fluid.layers.fill_constant_batch_size_like(
+            input=init_state, shape=[-1, 1], dtype='float32', value=1)

        cond = layers.less_than(x=counter, y=array_len)
        while_op = layers.While(cond=cond)
@@ -171,6 +173,9 @@ class AttentionPredict(object):
            new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
            fluid.layers.assign(new_ids, full_ids)

+            new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1)
+            fluid.layers.assign(new_scores, full_scores)
+            
            layers.increment(x=counter, value=1, in_place=True)

            # update the memories
@@ -184,7 +189,7 @@ class AttentionPredict(object):
            length_cond = layers.less_than(x=counter, y=array_len)
            finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
            layers.logical_and(x=length_cond, y=finish_cond, out=cond)
-        return full_ids
+        return full_ids, full_scores

    def __call__(self, inputs, labels=None, mode=None):
        encoder_features = self.encoder(inputs)
@@ -223,10 +228,10 @@ class AttentionPredict(object):
                decoder_size, char_num)
            _, decoded_out = layers.topk(input=predict, k=1)
            decoded_out = layers.lod_reset(decoded_out, y=label_out)
-            predicts = {'predict': predict, 'decoded_out': decoded_out}
+            predicts = {'predict':predict, 'decoded_out':decoded_out}
        else:
-            ids = self.gru_attention_infer(
+            ids, predict = self.gru_attention_infer(
                decoder_boot, self.max_length, char_num, word_vector_dim,
                encoded_vector, encoded_proj, decoder_size)
-            predicts = {'decoded_out': ids}
+            predicts = {'predict':predict, 'decoded_out':ids}
        return predicts
--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@@ -32,6 +32,7 @@ class CTCPredict(object):
        self.char_num = params['char_num']
        self.encoder = SequenceEncoder(params)
        self.encoder_type = params['encoder_type']
+        self.fc_decay = params.get("fc_decay", 0.0004)

    def __call__(self, inputs, labels=None, mode=None):
        encoder_features = self.encoder(inputs)
@@ -39,7 +40,7 @@ class CTCPredict(object):
            encoder_features = fluid.layers.concat(encoder_features, axis=1)
        name = "ctc_fc"
        para_attr, bias_attr = get_para_bias_attr(
-            l2_decay=0.0004, k=encoder_features.shape[1], name=name)
+            l2_decay=self.fc_decay, k=encoder_features.shape[1], name=name)
        predict = fluid.layers.fc(input=encoder_features,
                                  size=self.char_num + 1,
                                  param_attr=para_attr,

--- a/ppocr/modeling/losses/__init__.py
+++ b/ppocr/modeling/losses/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/modeling/stns/__init__.py
+++ b/ppocr/modeling/stns/__init__.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
@@ -15,6 +15,11 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 import paddle.fluid as fluid
+from paddle.fluid.regularizer import L2Decay
+
+from ppocr.utils.utility import initial_logger
+
+logger = initial_logger()


 def AdamDecay(params, parameter_list=None):
@@ -28,9 +33,35 @@ def AdamDecay(params, parameter_list=None):
    base_lr = params['base_lr']
    beta1 = params['beta1']
    beta2 = params['beta2']
+    l2_decay = params.get("l2_decay", 0.0)
+
+    if 'decay' in params:
+        supported_decay_mode = ["cosine_decay", "piecewise_decay"]
+        params = params['decay']
+        decay_mode = params['function']
+        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
+            supported_decay_mode, decay_mode)
+
+        if decay_mode == "cosine_decay":
+            step_each_epoch = params['step_each_epoch']
+            total_epoch = params['total_epoch']
+            base_lr = fluid.layers.cosine_decay(
+                learning_rate=base_lr,
+                step_each_epoch=step_each_epoch,
+                epochs=total_epoch)
+        elif decay_mode == "piecewise_decay":
+            boundaries = params["boundaries"]
+            decay_rate = params["decay_rate"]
+            values = [
+                base_lr * decay_rate**idx
+                for idx in range(len(boundaries) + 1)
+            ]
+            base_lr = fluid.layers.piecewise_decay(boundaries, values)
+
    optimizer = fluid.optimizer.Adam(
        learning_rate=base_lr,
        beta1=beta1,
        beta2=beta2,
+        regularization=L2Decay(regularization_coeff=l2_decay),
        parameter_list=parameter_list)
    return optimizer
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -35,6 +35,7 @@ class DBPostProcess(object):
        self.thresh = params['thresh']
        self.box_thresh = params['box_thresh']
        self.max_candidates = params['max_candidates']
+        self.unclip_ratio = params['unclip_ratio']
        self.min_size = 3

    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
@@ -46,9 +47,12 @@ class DBPostProcess(object):
        bitmap = _bitmap
        height, width = bitmap.shape

-        # img, contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
-        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
-                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
+                                cv2.CHAIN_APPROX_SIMPLE)
+        if len(outs) == 3:
+            img, contours, _ = outs[0], outs[1], outs[2]
+        elif len(outs) == 2:
+            contours, _ = outs[0], outs[1]

        num_contours = min(len(contours), self.max_candidates)
        boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
@@ -81,7 +85,8 @@ class DBPostProcess(object):
            scores[index] = score
        return boxes, scores

-    def unclip(self, box, unclip_ratio=1.5):
+    def unclip(self, box):
+        unclip_ratio = self.unclip_ratio
        poly = Polygon(box)
        distance = poly.area * unclip_ratio / poly.length
        offset = pyclipper.PyclipperOffset()
@@ -128,6 +133,7 @@ class DBPostProcess(object):

    def __call__(self, outs_dict, ratio_list):
        pred = outs_dict['maps']
+
        pred = pred[:, 0, :, :]
        segmentation = pred > self.thresh


--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
@@ -20,6 +20,12 @@ import numpy as np
 from .locality_aware_nms import nms_locality
 import cv2

+import os
+import sys
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+

 class EASTPostPocess(object):
    """
@@ -30,6 +36,11 @@ class EASTPostPocess(object):
        self.score_thresh = params['score_thresh']
        self.cover_thresh = params['cover_thresh']
        self.nms_thresh = params['nms_thresh']
+        
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True

    def restore_rectangle_quad(self, origin, geometry):
        """
@@ -66,7 +77,11 @@ class EASTPostPocess(object):
        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
        boxes[:, :8] = text_box_restored.reshape((-1, 8))
        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
-        boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
+        if self.is_python35:
+            import lanms
+            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
+        else:
+            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
        if boxes.shape[0] == 0:
            return []
        # Here we filter some low score boxes by the average score map, 

--- a/ppocr/postprocess/lanms/.gitignore
+++ b/ppocr/postprocess/lanms/.gitignore
+adaptor.so
--- a/ppocr/postprocess/lanms/.ycm_extra_conf.py
+++ b/ppocr/postprocess/lanms/.ycm_extra_conf.py
+#!/usr/bin/env python
+#
+# Copyright (C) 2014  Google Inc.
+#
+# This file is part of YouCompleteMe.
+#
+# YouCompleteMe is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# YouCompleteMe is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with YouCompleteMe.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import sys
+import glob
+import ycm_core
+
+# These are the compilation flags that will be used in case there's no
+# compilation database set (by default, one is not set).
+# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
+sys.path.append(os.path.dirname(__file__))
+
+
+BASE_DIR = os.path.dirname(os.path.realpath(__file__))
+
+from plumbum.cmd import python_config
+
+
+flags = [
+    '-Wall',
+    '-Wextra',
+    '-Wnon-virtual-dtor',
+    '-Winvalid-pch',
+    '-Wno-unused-local-typedefs',
+    '-std=c++11',
+    '-x', 'c++',
+    '-Iinclude',
+] + python_config('--cflags').split()
+
+
+# Set this to the absolute path to the folder (NOT the file!) containing the
+# compile_commands.json file to use that instead of 'flags'. See here for
+# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
+#
+# Most projects will NOT need to set this to anything; you can just change the
+# 'flags' list of compilation flags.
+compilation_database_folder = ''
+
+if os.path.exists( compilation_database_folder ):
+  database = ycm_core.CompilationDatabase( compilation_database_folder )
+else:
+  database = None
+
+SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
+
+def DirectoryOfThisScript():
+  return os.path.dirname( os.path.abspath( __file__ ) )
+
+
+def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
+  if not working_directory:
+    return list( flags )
+  new_flags = []
+  make_next_absolute = False
+  path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
+  for flag in flags:
+    new_flag = flag
+
+    if make_next_absolute:
+      make_next_absolute = False
+      if not flag.startswith( '/' ):
+        new_flag = os.path.join( working_directory, flag )
+
+    for path_flag in path_flags:
+      if flag == path_flag:
+        make_next_absolute = True
+        break
+
+      if flag.startswith( path_flag ):
+        path = flag[ len( path_flag ): ]
+        new_flag = path_flag + os.path.join( working_directory, path )
+        break
+
+    if new_flag:
+      new_flags.append( new_flag )
+  return new_flags
+
+
+def IsHeaderFile( filename ):
+  extension = os.path.splitext( filename )[ 1 ]
+  return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
+
+
+def GetCompilationInfoForFile( filename ):
+  # The compilation_commands.json file generated by CMake does not have entries
+  # for header files. So we do our best by asking the db for flags for a
+  # corresponding source file, if any. If one exists, the flags for that file
+  # should be good enough.
+  if IsHeaderFile( filename ):
+    basename = os.path.splitext( filename )[ 0 ]
+    for extension in SOURCE_EXTENSIONS:
+      replacement_file = basename + extension
+      if os.path.exists( replacement_file ):
+        compilation_info = database.GetCompilationInfoForFile(
+          replacement_file )
+        if compilation_info.compiler_flags_:
+          return compilation_info
+    return None
+  return database.GetCompilationInfoForFile( filename )
+
+
+# This is the entry point; this function is called by ycmd to produce flags for
+# a file.
+def FlagsForFile( filename, **kwargs ):
+  if database:
+    # Bear in mind that compilation_info.compiler_flags_ does NOT return a
+    # python list, but a "list-like" StringVec object
+    compilation_info = GetCompilationInfoForFile( filename )
+    if not compilation_info:
+      return None
+
+    final_flags = MakeRelativePathsInFlagsAbsolute(
+      compilation_info.compiler_flags_,
+      compilation_info.compiler_working_dir_ )
+  else:
+    relative_to = DirectoryOfThisScript()
+    final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
+
+  return {
+    'flags': final_flags,
+    'do_cache': True
+  }
+
--- a/ppocr/postprocess/lanms/Makefile
+++ b/ppocr/postprocess/lanms/Makefile
+CXXFLAGS = -I include  -std=c++11 -O3 $(shell python3-config --cflags)
+LDFLAGS = $(shell python3-config --ldflags)
+
+DEPS = lanms.h $(shell find include -xtype f)
+CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
+
+LIB_SO = adaptor.so
+
+$(LIB_SO): $(CXX_SOURCES) $(DEPS)
+	$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
+
+clean:
+	rm -rf $(LIB_SO)