dygraph first commit

aad3093a · WenmuZhou · 10f7e519 · aad3093a · aad3093a · aad3093a
Commit aad3093a authored Oct 13, 2020 by WenmuZhou
20 changed files
--- a/ppocr/data/imaug/operators.py
+++ b/ppocr/data/imaug/operators.py
+"""
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import sys
+import six
+import cv2
+import numpy as np
+class DecodeImage(object):
+    """ decode image """
+    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
+        self.img_mode = img_mode
+        self.channel_first = channel_first
+    def __call__(self, data):
+        img = data['image']
+        if six.PY2:
+            assert type(img) is str and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        else:
+            assert type(img) is bytes and len(
+                img) > 0, "invalid input 'img' in DecodeImage"
+        img = np.frombuffer(img, dtype='uint8')
+        img = cv2.imdecode(img, 1)
+        if self.img_mode == 'GRAY':
+            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+        elif self.img_mode == 'RGB':
+            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
+            img = img[:, :, ::-1]
+        if self.channel_first:
+            img = img.transpose((2, 0, 1))
+        data['image'] = img
+        return data
+class NormalizeImage(object):
+    """ normalize image such as substract mean, divide std
+    """
+    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
+        if isinstance(scale, str):
+            scale = eval(scale)
+        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
+        mean = mean if mean is not None else [0.485, 0.456, 0.406]
+        std = std if std is not None else [0.229, 0.224, 0.225]
+        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
+        self.mean = np.array(mean).reshape(shape).astype('float32')
+        self.std = np.array(std).reshape(shape).astype('float32')
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        assert isinstance(img,
+                          np.ndarray), "invalid input 'img' in NormalizeImage"
+        data['image'] = (
+            img.astype('float32') * self.scale - self.mean) / self.std
+        return data
+class ToCHWImage(object):
+    """ convert hwc image to chw image
+    """
+    def __init__(self, **kwargs):
+        pass
+    def __call__(self, data):
+        img = data['image']
+        from PIL import Image
+        if isinstance(img, Image.Image):
+            img = np.array(img)
+        data['image'] = img.transpose((2, 0, 1))
+        return data
+class keepKeys(object):
+    def __init__(self, keep_keys, **kwargs):
+        self.keep_keys = keep_keys
+    def __call__(self, data):
+        data_list = []
+        for key in self.keep_keys:
+            data_list.append(data[key])
+        return data_list
+class DetResizeForTest(object):
+    def __init__(self, **kwargs):
+        super(DetResizeForTest, self).__init__()
+        self.resize_type = 0
+        if 'image_shape' in kwargs:
+            self.image_shape = kwargs['image_shape']
+            self.resize_type = 1
+        if 'limit_side_len' in kwargs:
+            self.limit_side_len = kwargs['limit_side_len']
+            self.limit_type = kwargs.get('limit_type', 'min')
+        else:
+            self.limit_side_len = 736
+            self.limit_type = 'min'
+    def __call__(self, data):
+        img = data['image']
+        if self.resize_type == 0:
+            img, shape = self.resize_image_type0(img)
+        else:
+            img, shape = self.resize_image_type1(img)
+        data['image'] = img
+        data['shape'] = shape
+        return data
+    def resize_image_type1(self, img):
+        resize_h, resize_w = self.image_shape
+        ori_h, ori_w = img.shape[:2]  # (h, w, c)
+        img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        return img, np.array([ori_h, ori_w])
+    def resize_image_type0(self, img):
+        """
+        resize image to a size multiple of 32 which is required by the network
+        args:
+            img(array): array with shape [h, w, c]
+        return(tuple):
+            img, (ratio_h, ratio_w)
+        """
+        limit_side_len = self.limit_side_len
+        h, w, _ = img.shape
+        # limit the max side
+        if self.limit_type == 'max':
+            if max(h, w) > limit_side_len:
+                if h > w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        else:
+            if min(h, w) < limit_side_len:
+                if h < w:
+                    ratio = float(limit_side_len) / h
+                else:
+                    ratio = float(limit_side_len) / w
+            else:
+                ratio = 1.
+        resize_h = int(h * ratio)
+        resize_w = int(w * ratio)
+        resize_h = int(round(resize_h / 32) * 32)
+        resize_w = int(round(resize_w / 32) * 32)
+        try:
+            if int(resize_w) <= 0 or int(resize_h) <= 0:
+                return None, (None, None)
+            img = cv2.resize(img, (int(resize_w), int(resize_h)))
+        except:
+            print(img.shape, resize_w, resize_h)
+            sys.exit(0)
+        return img, np.array([h, w])
--- a/ppocr/data/det/random_crop_data.py
+++ b/ppocr/data/det/random_crop_data.py
@@ -108,48 +108,103 @@ def crop_area(im, text_polys, min_crop_side_ratio, max_tries):
    return 0, 0, w, h
-def RandomCropData(data, size):
+class EastRandomCropData(object):
-    max_tries = 10
+    def __init__(self,
-    min_crop_side_ratio = 0.1
+                 size=(640, 640),
-    require_original_image = False
+                 max_tries=10,
-    keep_ratio = True
+                 min_crop_side_ratio=0.1,
+                 keep_ratio=True,
-    im = data['image']
+                 **kwargs):
-    text_polys = data['polys']
+        self.size = size
-    ignore_tags = data['ignore_tags']
+        self.max_tries = max_tries
-    texts = data['texts']
+        self.min_crop_side_ratio = min_crop_side_ratio
-    all_care_polys = [
+        self.keep_ratio = keep_ratio
-        text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
-    ]
+    def __call__(self, data):
-    # 计算crop区域
+        img = data['image']
-    crop_x, crop_y, crop_w, crop_h = crop_area(im, all_care_polys,
+        text_polys = data['polys']
-                                               min_crop_side_ratio, max_tries)
+        ignore_tags = data['ignore_tags']
-    # crop 图片 保持比例填充
+        texts = data['texts']
-    scale_w = size[0] / crop_w
+        all_care_polys = [
-    scale_h = size[1] / crop_h
+            text_polys[i] for i, tag in enumerate(ignore_tags) if not tag
-    scale = min(scale_w, scale_h)
+        ]
-    h = int(crop_h * scale)
+        # 计算crop区域
-    w = int(crop_w * scale)
+        crop_x, crop_y, crop_w, crop_h = crop_area(
-    if keep_ratio:
+            img, all_care_polys, self.min_crop_side_ratio, self.max_tries)
-        padimg = np.zeros((size[1], size[0], im.shape[2]), im.dtype)
+        # crop 图片 保持比例填充
-        padimg[:h, :w] = cv2.resize(
+        scale_w = self.size[0] / crop_w
-            im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
+        scale_h = self.size[1] / crop_h
-        img = padimg
+        scale = min(scale_w, scale_h)
-    else:
+        h = int(crop_h * scale)
-        img = cv2.resize(im[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
+        w = int(crop_w * scale)
-                         tuple(size))
+        if self.keep_ratio:
-    # crop 文本框
+            padimg = np.zeros((self.size[1], self.size[0], img.shape[2]),
-    text_polys_crop = []
+                              img.dtype)
-    ignore_tags_crop = []
+            padimg[:h, :w] = cv2.resize(
-    texts_crop = []
+                img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w], (w, h))
-    for poly, text, tag in zip(text_polys, texts, ignore_tags):
+            img = padimg
-        poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+        else:
-        if not is_poly_outside_rect(poly, 0, 0, w, h):
+            img = cv2.resize(
-            text_polys_crop.append(poly)
+                img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w],
-            ignore_tags_crop.append(tag)
+                tuple(self.size))
-            texts_crop.append(text)
+        # crop 文本框
-    data['image'] = img
+        text_polys_crop = []
-    data['polys'] = np.array(text_polys_crop)
+        ignore_tags_crop = []
-    data['ignore_tags'] = ignore_tags_crop
+        texts_crop = []
-    data['texts'] = texts_crop
+        for poly, text, tag in zip(text_polys, texts, ignore_tags):
-    return data
+            poly = ((poly - (crop_x, crop_y)) * scale).tolist()
+            if not is_poly_outside_rect(poly, 0, 0, w, h):
+                text_polys_crop.append(poly)
+                ignore_tags_crop.append(tag)
+                texts_crop.append(text)
+        data['image'] = img
+        data['polys'] = np.array(text_polys_crop)
+        data['ignore_tags'] = ignore_tags_crop
+        data['texts'] = texts_crop
+        return data
+class PSERandomCrop(object):
+    def __init__(self, size, **kwargs):
+        self.size = size
+    def __call__(self, data):
+        imgs = data['imgs']
+        h, w = imgs[0].shape[0:2]
+        th, tw = self.size
+        if w == tw and h == th:
+            return imgs
+        # label中存在文本实例，并且按照概率进行裁剪，使用threshold_label_map控制
+        if np.max(imgs[2]) > 0 and random.random() > 3 / 8:
+            # 文本实例的左上角点
+            tl = np.min(np.where(imgs[2] > 0), axis=1) - self.size
+            tl[tl < 0] = 0
+            # 文本实例的右下角点
+            br = np.max(np.where(imgs[2] > 0), axis=1) - self.size
+            br[br < 0] = 0
+            # 保证选到右下角点时，有足够的距离进行crop
+            br[0] = min(br[0], h - th)
+            br[1] = min(br[1], w - tw)
+            for _ in range(50000):
+                i = random.randint(tl[0], br[0])
+                j = random.randint(tl[1], br[1])
+                # 保证shrink_label_map有文本
+                if imgs[1][i:i + th, j:j + tw].sum() <= 0:
+                    continue
+                else:
+                    break
+        else:
+            i = random.randint(0, h - th)
+            j = random.randint(0, w - tw)
+        # return i, j, th, tw
+        for idx in range(len(imgs)):
+            if len(imgs[idx].shape) == 3:
+                imgs[idx] = imgs[idx][i:i + th, j:j + tw, :]
+            else:
+                imgs[idx] = imgs[idx][i:i + th, j:j + tw]
+        data['imgs'] = imgs
+        return data
--- a/ppocr/data/rec/img_tools.py
+++ b/ppocr/data/rec/img_tools.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-#limitations under the License.
+# limitations under the License.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 import math
 import cv2
 import numpy as np
 import random
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
+from .text_image_aug import tia_perspective, tia_stretch, tia_distort
-def get_bounding_box_rect(pos):
-    left = min(pos[0])
+class RecAug(object):
-    right = max(pos[0])
+    def __init__(self, **kwargsz):
-    top = min(pos[1])
+        pass
-    bottom = max(pos[1])
-    return [left, top, right, bottom]
+    def __call__(self, data):
+        img = data['image']
+        img = warp(img, 10)
+        data['image'] = img
+        return data
+class RecResizeImg(object):
+    def __init__(self,
+                 image_shape,
+                 infer_mode=False,
+                 character_type='ch',
+                 use_tps=False,
+                 **kwargs):
+        self.image_shape = image_shape
+        self.infer_mode = infer_mode
+        self.character_type = character_type
+        self.use_tps = use_tps
+    def __call__(self, data):
+        img = data['image']
+        if self.infer_mode and self.character_type == "ch" and not self.use_tps:
+            norm_img = resize_norm_img_chinese(img, self.image_shape)
+        else:
+            norm_img = resize_norm_img(img, self.image_shape)
+        data['image'] = norm_img
+        return data
 def resize_norm_img(img, image_shape):
@@ -77,19 +116,6 @@ def resize_norm_img_chinese(img, image_shape):
    return padding_im
-def get_img_data(value):
-    """get_img_data"""
-    if not value:
-        return None
-    imgdata = np.frombuffer(value, dtype='uint8')
-    if imgdata is None:
-        return None
-    imgori = cv2.imdecode(imgdata, 1)
-    if imgori is None:
-        return None
-    return imgori
 def flag():
    """
    flag
@@ -196,6 +222,9 @@ class Config:
        self.h = h
        self.perspective = True
+        self.stretch = True
+        self.distort = True
        self.crop = True
        self.affine = False
        self.reverse = True
@@ -299,168 +328,39 @@ def warp(img, ang):
    config.make(w, h, ang)
    new_img = img
+    prob = 0.4
+    if config.distort:
+        img_height, img_width = img.shape[0:2]
+        if random.random() <= prob and img_height >= 20 and img_width >= 20:
+            new_img = tia_distort(new_img, random.randint(3, 6))
+    if config.stretch:
+        img_height, img_width = img.shape[0:2]
+        if random.random() <= prob and img_height >= 20 and img_width >= 20:
+            new_img = tia_stretch(new_img, random.randint(3, 6))
    if config.perspective:
-        tp = random.randint(1, 100)
+        if random.random() <= prob:
-        if tp >= 50:
+            new_img = tia_perspective(new_img)
-            warpR, (r1, c1), ratio, dst = get_warpR(config)
-            new_w = int(np.max(dst[:, 0])) - int(np.min(dst[:, 0]))
-            new_img = cv2.warpPerspective(
-                new_img,
-                warpR, (int(new_w * ratio), h),
-                borderMode=config.borderMode)
    if config.crop:
        img_height, img_width = img.shape[0:2]
-        tp = random.randint(1, 100)
+        if random.random() <= prob and img_height >= 20 and img_width >= 20:
-        if tp >= 50 and img_height >= 20 and img_width >= 20:
            new_img = get_crop(new_img)
-    if config.affine:
-        warpT = get_warpAffine(config)
-        new_img = cv2.warpAffine(
-            new_img, warpT, (w, h), borderMode=config.borderMode)
    if config.blur:
-        tp = random.randint(1, 100)
+        if random.random() <= prob:
-        if tp >= 50:
            new_img = blur(new_img)
    if config.color:
-        tp = random.randint(1, 100)
+        if random.random() <= prob:
-        if tp >= 50:
            new_img = cvtColor(new_img)
    if config.jitter:
        new_img = jitter(new_img)
    if config.noise:
-        tp = random.randint(1, 100)
+        if random.random() <= prob:
-        if tp >= 50:
            new_img = add_gasuss_noise(new_img)
    if config.reverse:
-        tp = random.randint(1, 100)
+        if random.random() <= prob:
-        if tp >= 50:
            new_img = 255 - new_img
    return new_img
-def process_image(img,
-                  image_shape,
-                  label=None,
-                  char_ops=None,
-                  loss_type=None,
-                  max_text_length=None,
-                  tps=None,
-                  infer_mode=False,
-                  distort=False):
-    if distort:
-        img = warp(img, 10)
-    if infer_mode and char_ops.character_type == "ch" and not tps:
-        norm_img = resize_norm_img_chinese(img, image_shape)
-    else:
-        norm_img = resize_norm_img(img, image_shape)
-    norm_img = norm_img[np.newaxis, :]
-    if label is not None:
-        # char_num = char_ops.get_char_num()
-        text = char_ops.encode(label)
-        if len(text) == 0 or len(text) > max_text_length:
-            logger.info(
-                "Warning in ppocr/data/rec/img_tools.py: Wrong data type."
-                "Excepted string with length between 1 and {}, but "
-                "got '{}'. Label is '{}'".format(max_text_length,
-                                                 len(text), label))
-            return None
-        else:
-            if loss_type == "ctc":
-                text = text.reshape(-1, 1)
-                return (norm_img, text)
-            elif loss_type == "attention":
-                beg_flag_idx = char_ops.get_beg_end_flag_idx("beg")
-                end_flag_idx = char_ops.get_beg_end_flag_idx("end")
-                beg_text = np.append(beg_flag_idx, text)
-                end_text = np.append(text, end_flag_idx)
-                beg_text = beg_text.reshape(-1, 1)
-                end_text = end_text.reshape(-1, 1)
-                return (norm_img, beg_text, end_text)
-            else:
-                assert False, "Unsupport loss_type %s in process_image"\
-                    % loss_type
-    return (norm_img)
-def resize_norm_img_srn(img, image_shape):
-    imgC, imgH, imgW = image_shape
-    img_black = np.zeros((imgH, imgW))
-    im_hei = img.shape[0]
-    im_wid = img.shape[1]
-    if im_wid <= im_hei * 1:
-        img_new = cv2.resize(img, (imgH * 1, imgH))
-    elif im_wid <= im_hei * 2:
-        img_new = cv2.resize(img, (imgH * 2, imgH))
-    elif im_wid <= im_hei * 3:
-        img_new = cv2.resize(img, (imgH * 3, imgH))
-    else:
-        img_new = cv2.resize(img, (imgW, imgH))
-    img_np = np.asarray(img_new)
-    img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
-    img_black[:, 0:img_np.shape[1]] = img_np
-    img_black = img_black[:, :, np.newaxis]
-    row, col, c = img_black.shape
-    c = 1
-    return np.reshape(img_black, (c, row, col)).astype(np.float32)
-def srn_other_inputs(image_shape,
-                     num_heads,
-                     max_text_length,
-                     char_num):
-    imgC, imgH, imgW = image_shape
-    feature_dim = int((imgH / 8) * (imgW / 8))
-    encoder_word_pos = np.array(range(0, feature_dim)).reshape((feature_dim, 1)).astype('int64')
-    gsrm_word_pos = np.array(range(0, max_text_length)).reshape((max_text_length, 1)).astype('int64')
-    lbl_weight = np.array([int(char_num-1)] * max_text_length).reshape((-1,1)).astype('int64')
-    gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length)) 
-    gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape([-1, 1, max_text_length, max_text_length])
-    gsrm_slf_attn_bias1 = np.tile(gsrm_slf_attn_bias1, [1, num_heads, 1, 1]) * [-1e9] 
-    gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape([-1, 1, max_text_length, max_text_length])
-    gsrm_slf_attn_bias2 = np.tile(gsrm_slf_attn_bias2, [1, num_heads, 1, 1]) * [-1e9] 
-    encoder_word_pos = encoder_word_pos[np.newaxis, :]
-    gsrm_word_pos = gsrm_word_pos[np.newaxis, :]
-    return [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2]
-def process_image_srn(img,
-                      image_shape,
-                      num_heads,
-                      max_text_length,
-                      label=None,
-                      char_ops=None,
-                      loss_type=None):
-    norm_img = resize_norm_img_srn(img, image_shape)
-    norm_img = norm_img[np.newaxis, :]
-    char_num = char_ops.get_char_num()
-    [lbl_weight, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \
-        srn_other_inputs(image_shape, num_heads, max_text_length,char_num)
-    if label is not None:
-        text = char_ops.encode(label)
-        if len(text) == 0 or len(text) > max_text_length:
-            return None
-        else:
-            if loss_type == "srn":
-                text_padded = [int(char_num-1)] * max_text_length
-                for i in range(len(text)):
-                    text_padded[i] = text[i]
-                    lbl_weight[i] = [1.0]
-                text_padded = np.array(text_padded)
-                text = text_padded.reshape(-1, 1)
-                return (norm_img, text,encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2,lbl_weight)
-            else:
-                assert False, "Unsupport loss_type %s in process_image"\
-                    % loss_type
-    return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2)
--- a/ppocr/data/rec/__init__.py
+++ b/ppocr/data/rec/__init__.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
-#     http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .augment import tia_perspective, tia_distort, tia_stretch
+__all__ = ['tia_distort', 'tia_stretch', 'tia_perspective']
--- a/ppocr/data/imaug/text_image_aug/augment.py
+++ b/ppocr/data/imaug/text_image_aug/augment.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+from .warp_mls import WarpMLS
+def tia_distort(src, segment=4):
+    img_h, img_w = src.shape[:2]
+    cut = img_w // segment
+    thresh = cut // 3
+    src_pts = list()
+    dst_pts = list()
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+    dst_pts.append([np.random.randint(thresh), np.random.randint(thresh)])
+    dst_pts.append(
+        [img_w - np.random.randint(thresh), np.random.randint(thresh)])
+    dst_pts.append(
+        [img_w - np.random.randint(thresh), img_h - np.random.randint(thresh)])
+    dst_pts.append(
+        [np.random.randint(thresh), img_h - np.random.randint(thresh)])
+    half_thresh = thresh * 0.5
+    for cut_idx in np.arange(1, segment, 1):
+        src_pts.append([cut * cut_idx, 0])
+        src_pts.append([cut * cut_idx, img_h])
+        dst_pts.append([
+            cut * cut_idx + np.random.randint(thresh) - half_thresh,
+            np.random.randint(thresh) - half_thresh
+        ])
+        dst_pts.append([
+            cut * cut_idx + np.random.randint(thresh) - half_thresh,
+            img_h + np.random.randint(thresh) - half_thresh
+        ])
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+    return dst
+def tia_stretch(src, segment=4):
+    img_h, img_w = src.shape[:2]
+    cut = img_w // segment
+    thresh = cut * 4 // 5
+    src_pts = list()
+    dst_pts = list()
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+    dst_pts.append([0, 0])
+    dst_pts.append([img_w, 0])
+    dst_pts.append([img_w, img_h])
+    dst_pts.append([0, img_h])
+    half_thresh = thresh * 0.5
+    for cut_idx in np.arange(1, segment, 1):
+        move = np.random.randint(thresh) - half_thresh
+        src_pts.append([cut * cut_idx, 0])
+        src_pts.append([cut * cut_idx, img_h])
+        dst_pts.append([cut * cut_idx + move, 0])
+        dst_pts.append([cut * cut_idx + move, img_h])
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+    return dst
+def tia_perspective(src):
+    img_h, img_w = src.shape[:2]
+    thresh = img_h // 2
+    src_pts = list()
+    dst_pts = list()
+    src_pts.append([0, 0])
+    src_pts.append([img_w, 0])
+    src_pts.append([img_w, img_h])
+    src_pts.append([0, img_h])
+    dst_pts.append([0, np.random.randint(thresh)])
+    dst_pts.append([img_w, np.random.randint(thresh)])
+    dst_pts.append([img_w, img_h - np.random.randint(thresh)])
+    dst_pts.append([0, img_h - np.random.randint(thresh)])
+    trans = WarpMLS(src, src_pts, dst_pts, img_w, img_h)
+    dst = trans.generate()
+    return dst
\ No newline at end of file
--- a/ppocr/data/imaug/text_image_aug/warp_mls.py
+++ b/ppocr/data/imaug/text_image_aug/warp_mls.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import numpy as np
+class WarpMLS:
+    def __init__(self, src, src_pts, dst_pts, dst_w, dst_h, trans_ratio=1.):
+        self.src = src
+        self.src_pts = src_pts
+        self.dst_pts = dst_pts
+        self.pt_count = len(self.dst_pts)
+        self.dst_w = dst_w
+        self.dst_h = dst_h
+        self.trans_ratio = trans_ratio
+        self.grid_size = 100
+        self.rdx = np.zeros((self.dst_h, self.dst_w))
+        self.rdy = np.zeros((self.dst_h, self.dst_w))
+    @staticmethod
+    def __bilinear_interp(x, y, v11, v12, v21, v22):
+        return (v11 * (1 - y) + v12 * y) * (1 - x) + (v21 *
+                                                      (1 - y) + v22 * y) * x
+    def generate(self):
+        self.calc_delta()
+        return self.gen_img()
+    def calc_delta(self):
+        w = np.zeros(self.pt_count, dtype=np.float32)
+        if self.pt_count < 2:
+            return
+        i = 0
+        while 1:
+            if self.dst_w <= i < self.dst_w + self.grid_size - 1:
+                i = self.dst_w - 1
+            elif i >= self.dst_w:
+                break
+            j = 0
+            while 1:
+                if self.dst_h <= j < self.dst_h + self.grid_size - 1:
+                    j = self.dst_h - 1
+                elif j >= self.dst_h:
+                    break
+                sw = 0
+                swp = np.zeros(2, dtype=np.float32)
+                swq = np.zeros(2, dtype=np.float32)
+                new_pt = np.zeros(2, dtype=np.float32)
+                cur_pt = np.array([i, j], dtype=np.float32)
+                k = 0
+                for k in range(self.pt_count):
+                    if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+                        break
+                    w[k] = 1. / (
+                        (i - self.dst_pts[k][0]) * (i - self.dst_pts[k][0]) +
+                        (j - self.dst_pts[k][1]) * (j - self.dst_pts[k][1]))
+                    sw += w[k]
+                    swp = swp + w[k] * np.array(self.dst_pts[k])
+                    swq = swq + w[k] * np.array(self.src_pts[k])
+                if k == self.pt_count - 1:
+                    pstar = 1 / sw * swp
+                    qstar = 1 / sw * swq
+                    miu_s = 0
+                    for k in range(self.pt_count):
+                        if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+                            continue
+                        pt_i = self.dst_pts[k] - pstar
+                        miu_s += w[k] * np.sum(pt_i * pt_i)
+                    cur_pt -= pstar
+                    cur_pt_j = np.array([-cur_pt[1], cur_pt[0]])
+                    for k in range(self.pt_count):
+                        if i == self.dst_pts[k][0] and j == self.dst_pts[k][1]:
+                            continue
+                        pt_i = self.dst_pts[k] - pstar
+                        pt_j = np.array([-pt_i[1], pt_i[0]])
+                        tmp_pt = np.zeros(2, dtype=np.float32)
+                        tmp_pt[0] = np.sum(pt_i * cur_pt) * self.src_pts[k][0] - \
+                                    np.sum(pt_j * cur_pt) * self.src_pts[k][1]
+                        tmp_pt[1] = -np.sum(pt_i * cur_pt_j) * self.src_pts[k][0] + \
+                                    np.sum(pt_j * cur_pt_j) * self.src_pts[k][1]
+                        tmp_pt *= (w[k] / miu_s)
+                        new_pt += tmp_pt
+                    new_pt += qstar
+                else:
+                    new_pt = self.src_pts[k]
+                self.rdx[j, i] = new_pt[0] - i
+                self.rdy[j, i] = new_pt[1] - j
+                j += self.grid_size
+            i += self.grid_size
+    def gen_img(self):
+        src_h, src_w = self.src.shape[:2]
+        dst = np.zeros_like(self.src, dtype=np.float32)
+        for i in np.arange(0, self.dst_h, self.grid_size):
+            for j in np.arange(0, self.dst_w, self.grid_size):
+                ni = i + self.grid_size
+                nj = j + self.grid_size
+                w = h = self.grid_size
+                if ni >= self.dst_h:
+                    ni = self.dst_h - 1
+                    h = ni - i + 1
+                if nj >= self.dst_w:
+                    nj = self.dst_w - 1
+                    w = nj - j + 1
+                di = np.reshape(np.arange(h), (-1, 1))
+                dj = np.reshape(np.arange(w), (1, -1))
+                delta_x = self.__bilinear_interp(
+                    di / h, dj / w, self.rdx[i, j], self.rdx[i, nj],
+                    self.rdx[ni, j], self.rdx[ni, nj])
+                delta_y = self.__bilinear_interp(
+                    di / h, dj / w, self.rdy[i, j], self.rdy[i, nj],
+                    self.rdy[ni, j], self.rdy[ni, nj])
+                nx = j + dj + delta_x * self.trans_ratio
+                ny = i + di + delta_y * self.trans_ratio
+                nx = np.clip(nx, 0, src_w - 1)
+                ny = np.clip(ny, 0, src_h - 1)
+                nxi = np.array(np.floor(nx), dtype=np.int32)
+                nyi = np.array(np.floor(ny), dtype=np.int32)
+                nxi1 = np.array(np.ceil(nx), dtype=np.int32)
+                nyi1 = np.array(np.ceil(ny), dtype=np.int32)
+                if len(self.src.shape) == 3:
+                    x = np.tile(np.expand_dims(ny - nyi, axis=-1), (1, 1, 3))
+                    y = np.tile(np.expand_dims(nx - nxi, axis=-1), (1, 1, 3))
+                else:
+                    x = ny - nyi
+                    y = nx - nxi
+                dst[i:i + h, j:j + w] = self.__bilinear_interp(
+                    x, y, self.src[nyi, nxi], self.src[nyi, nxi1],
+                    self.src[nyi1, nxi], self.src[nyi1, nxi1])
+        dst = np.clip(dst, 0, 255)
+        dst = np.array(dst, dtype=np.uint8)
+        return dst
\ No newline at end of file
--- a/ppocr/data/reader_main.py
+++ b/ppocr/data/reader_main.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import os
-import random
-import numpy as np
-import paddle
-from ppocr.utils.utility import create_module
-from copy import deepcopy
-from .rec.img_tools import process_image
-import cv2
-import sys
-import signal
-# handle terminate reader process, do not print stack frame
-def _reader_quit(signum, frame):
-    print("Reader process exit.")
-    sys.exit()
-def _term_group(sig_num, frame):
-    print('pid {} terminated, terminate group '
-          '{}...'.format(os.getpid(), os.getpgrp()))
-    os.killpg(os.getpgid(os.getpid()), signal.SIGKILL)
-signal.signal(signal.SIGTERM, _reader_quit)
-signal.signal(signal.SIGINT, _term_group)
-def reader_main(config=None, mode=None):
-    """Create a reader for trainning
-    Args:
-        settings: arguments
-    Returns:
-        train reader
-    """
-    assert mode in ["train", "eval", "test"],\
-        "Nonsupport mode:{}".format(mode)
-    global_params = config['Global']
-    if mode == "train":
-        params = deepcopy(config['TrainReader'])
-    elif mode == "eval":
-        params = deepcopy(config['EvalReader'])
-    else:
-        params = deepcopy(config['TestReader'])
-    params['mode'] = mode
-    params.update(global_params)
-    reader_function = params['reader_function']
-    function = create_module(reader_function)(params)
-    if mode == "train":
-        if sys.platform == "win32":
-            return function(0)
-        readers = []
-        num_workers = params['num_workers']
-        for process_id in range(num_workers):
-            readers.append(function(process_id))
-        return paddle.reader.multiprocess_reader(readers, False)
-    else:
-        return function(mode)
--- a/ppocr/data/rec/dataset_traversal.py
+++ b/ppocr/data/rec/dataset_traversal.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-import os
-import sys
-import math
-import random
-import numpy as np
-import cv2
-import string
-import lmdb
-from ppocr.utils.utility import initial_logger
-from ppocr.utils.utility import get_image_file_list
-logger = initial_logger()
-from .img_tools import process_image, process_image_srn, get_img_data
-class LMDBReader(object):
-    def __init__(self, params):
-        if params['mode'] != 'train':
-            self.num_workers = 1
-        else:
-            self.num_workers = params['num_workers']
-        self.lmdb_sets_dir = params['lmdb_sets_dir']
-        self.char_ops = params['char_ops']
-        self.image_shape = params['image_shape']
-        self.loss_type = params['loss_type']
-        self.max_text_length = params['max_text_length']
-        self.mode = params['mode']
-        self.drop_last = False
-        self.use_tps = False
-        self.num_heads = None
-        if "num_heads" in params:
-            self.num_heads = params['num_heads']
-        if "tps" in params:
-            self.ues_tps = True
-        self.use_distort = False
-        if "distort" in params:
-            self.use_distort = params['distort'] and params['use_gpu']
-            if not params['use_gpu']:
-                logger.info(
-                    "Distort operation can only support in GPU. Distort will be set to False."
-                )
-        if params['mode'] == 'train':
-            self.batch_size = params['train_batch_size_per_card']
-            self.drop_last = True
-        else:
-            self.batch_size = params['test_batch_size_per_card']
-            self.drop_last = False
-            self.use_distort = False
-        self.infer_img = params['infer_img']
-    def load_hierarchical_lmdb_dataset(self):
-        lmdb_sets = {}
-        dataset_idx = 0
-        for dirpath, dirnames, filenames in os.walk(self.lmdb_sets_dir + '/'):
-            if not dirnames:
-                env = lmdb.open(
-                    dirpath,
-                    max_readers=32,
-                    readonly=True,
-                    lock=False,
-                    readahead=False,
-                    meminit=False)
-                txn = env.begin(write=False)
-                num_samples = int(txn.get('num-samples'.encode()))
-                lmdb_sets[dataset_idx] = {"dirpath":dirpath, "env":env, \
-                    "txn":txn, "num_samples":num_samples}
-                dataset_idx += 1
-        return lmdb_sets
-    def print_lmdb_sets_info(self, lmdb_sets):
-        lmdb_info_strs = []
-        for dataset_idx in range(len(lmdb_sets)):
-            tmp_str = " %s:%d," % (lmdb_sets[dataset_idx]['dirpath'],
-                                   lmdb_sets[dataset_idx]['num_samples'])
-            lmdb_info_strs.append(tmp_str)
-        lmdb_info_strs = ''.join(lmdb_info_strs)
-        logger.info("DataSummary:" + lmdb_info_strs)
-        return
-    def close_lmdb_dataset(self, lmdb_sets):
-        for dataset_idx in lmdb_sets:
-            lmdb_sets[dataset_idx]['env'].close()
-        return
-    def get_lmdb_sample_info(self, txn, index):
-        label_key = 'label-%09d'.encode() % index
-        label = txn.get(label_key)
-        if label is None:
-            return None
-        label = label.decode('utf-8')
-        img_key = 'image-%09d'.encode() % index
-        imgbuf = txn.get(img_key)
-        img = get_img_data(imgbuf)
-        if img is None:
-            return None
-        return img, label
-    def __call__(self, process_id):
-        if self.mode != 'train':
-            process_id = 0
-        def sample_iter_reader():
-            if self.mode != 'train' and self.infer_img is not None:
-                image_file_list = get_image_file_list(self.infer_img)
-                for single_img in image_file_list:
-                    img = cv2.imread(single_img)
-                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    if self.loss_type == 'srn':
-                        norm_img = process_image_srn(
-                            img=img,
-                            image_shape=self.image_shape,
-                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length)
-                    else:
-                        norm_img = process_image(
-                            img=img,
-                            image_shape=self.image_shape,
-                            char_ops=self.char_ops,
-                            tps=self.use_tps,
-                            infer_mode=True)
-                    yield norm_img
-            else:
-                lmdb_sets = self.load_hierarchical_lmdb_dataset()
-                if process_id == 0:
-                    self.print_lmdb_sets_info(lmdb_sets)
-                cur_index_sets = [1 + process_id] * len(lmdb_sets)
-                while True:
-                    finish_read_num = 0
-                    for dataset_idx in range(len(lmdb_sets)):
-                        cur_index = cur_index_sets[dataset_idx]
-                        if cur_index > lmdb_sets[dataset_idx]['num_samples']:
-                            finish_read_num += 1
-                        else:
-                            sample_info = self.get_lmdb_sample_info(
-                                lmdb_sets[dataset_idx]['txn'], cur_index)
-                            cur_index_sets[dataset_idx] += self.num_workers
-                            if sample_info is None:
-                                continue
-                            img, label = sample_info
-                            outs = []
-                            if self.loss_type == "srn":
-                                outs = process_image_srn(
-                                    img=img,
-                                    image_shape=self.image_shape,
-                                    num_heads=self.num_heads,
-                                    max_text_length=self.max_text_length,
-                                    label=label,
-                                    char_ops=self.char_ops,
-                                    loss_type=self.loss_type)
-                            else:
-                                outs = process_image(
-                                    img=img,
-                                    image_shape=self.image_shape,
-                                    label=label,
-                                    char_ops=self.char_ops,
-                                    loss_type=self.loss_type,
-                                    max_text_length=self.max_text_length)
-                            if outs is None:
-                                continue
-                            yield outs
-                    if finish_read_num == len(lmdb_sets):
-                        break
-                self.close_lmdb_dataset(lmdb_sets)
-        def batch_iter_reader():
-            batch_outs = []
-            for outs in sample_iter_reader():
-                batch_outs.append(outs)
-                if len(batch_outs) == self.batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if not self.drop_last:
-                if len(batch_outs) != 0:
-                    yield batch_outs
-        if self.infer_img is None:
-            return batch_iter_reader
-        return sample_iter_reader
-class SimpleReader(object):
-    def __init__(self, params):
-        if params['mode'] != 'train':
-            self.num_workers = 1
-        else:
-            self.num_workers = params['num_workers']
-        if params['mode'] != 'test':
-            self.img_set_dir = params['img_set_dir']
-            self.label_file_path = params['label_file_path']
-        self.use_gpu = params['use_gpu']
-        self.char_ops = params['char_ops']
-        self.image_shape = params['image_shape']
-        self.loss_type = params['loss_type']
-        self.max_text_length = params['max_text_length']
-        self.mode = params['mode']
-        self.infer_img = params['infer_img']
-        self.use_tps = False
-        if "num_heads" in params:
-            self.num_heads = params['num_heads']
-        if "tps" in params:
-            self.use_tps = True
-        self.use_distort = False
-        if "distort" in params:
-            self.use_distort = params['distort'] and params['use_gpu']
-            if not params['use_gpu']:
-                logger.info(
-                    "Distort operation can only support in GPU.Distort will be set to False."
-                )
-        if params['mode'] == 'train':
-            self.batch_size = params['train_batch_size_per_card']
-            self.drop_last = True
-        else:
-            self.batch_size = params['test_batch_size_per_card']
-            self.drop_last = False
-            self.use_distort = False
-    def __call__(self, process_id):
-        if self.mode != 'train':
-            process_id = 0
-        def get_device_num():
-            if self.use_gpu:
-                gpus = os.environ.get("CUDA_VISIBLE_DEVICES", '1')
-                gpu_num = len(gpus.split(','))
-                return gpu_num
-            else:
-                cpu_num = os.environ.get("CPU_NUM", 1)
-                return int(cpu_num)
-        def sample_iter_reader():
-            if self.mode != 'train' and self.infer_img is not None:
-                image_file_list = get_image_file_list(self.infer_img)
-                for single_img in image_file_list:
-                    img = cv2.imread(single_img)
-                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    if self.loss_type == 'srn':
-                        norm_img = process_image_srn(
-                            img=img,
-                            image_shape=self.image_shape,
-                            char_ops=self.char_ops,
-                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length)
-                    else:
-                        norm_img = process_image(
-                            img=img,
-                            image_shape=self.image_shape,
-                            char_ops=self.char_ops,
-                            tps=self.use_tps,
-                            infer_mode=True)
-                    yield norm_img
-            else:
-                with open(self.label_file_path, "rb") as fin:
-                    label_infor_list = fin.readlines()
-                img_num = len(label_infor_list)
-                img_id_list = list(range(img_num))
-                random.shuffle(img_id_list)
-                if sys.platform == "win32" and self.num_workers != 1:
-                    print("multiprocess is not fully compatible with Windows."
-                          "num_workers will be 1.")
-                    self.num_workers = 1
-                if self.batch_size * get_device_num(
-                ) * self.num_workers > img_num:
-                    raise Exception(
-                        "The number of the whole data ({}) is smaller than the batch_size * devices_num * num_workers ({})".
-                        format(img_num, self.batch_size * get_device_num() *
-                               self.num_workers))
-                for img_id in range(process_id, img_num, self.num_workers):
-                    label_infor = label_infor_list[img_id_list[img_id]]
-                    substr = label_infor.decode('utf-8').strip("\n").split("\t")
-                    img_path = self.img_set_dir + "/" + substr[0]
-                    img = cv2.imread(img_path)
-                    if img is None:
-                        logger.info("{} does not exist!".format(img_path))
-                        continue
-                    if img.shape[-1] == 1 or len(list(img.shape)) == 2:
-                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-                    label = substr[1]
-                    if self.loss_type == "srn":
-                        outs = process_image_srn(
-                            img=img,
-                            image_shape=self.image_shape,
-                            num_heads=self.num_heads,
-                            max_text_length=self.max_text_length,
-                            label=label,
-                            char_ops=self.char_ops,
-                            loss_type=self.loss_type)
-                    else:
-                        outs = process_image(
-                            img=img,
-                            image_shape=self.image_shape,
-                            label=label,
-                            char_ops=self.char_ops,
-                            loss_type=self.loss_type,
-                            max_text_length=self.max_text_length,
-                            distort=self.use_distort)
-                    if outs is None:
-                        continue
-                    yield outs
-        def batch_iter_reader():
-            batch_outs = []
-            for outs in sample_iter_reader():
-                batch_outs.append(outs)
-                if len(batch_outs) == self.batch_size:
-                    yield batch_outs
-                    batch_outs = []
-            if not self.drop_last:
-                if len(batch_outs) != 0:
-                    yield batch_outs
-        if self.infer_img is None:
-            return batch_iter_reader
-        return sample_iter_reader
--- a/ppocr/metrics/DetMetric.py
+++ b/ppocr/metrics/DetMetric.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+__all__ = ['DetMetric']
+from .eval_det_iou import DetectionIoUEvaluator
+class DetMetric(object):
+    def __init__(self, main_indicator='hmean', **kwargs):
+        self.evaluator = DetectionIoUEvaluator()
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, preds, batch, **kwargs):
+        '''
+       batch: a list produced by dataloaders.
+           image: np.ndarray  of shape (N, C, H, W).
+           ratio_list: np.ndarray  of shape(N,2)
+           polygons: np.ndarray  of shape (N, K, 4, 2), the polygons of objective regions.
+           ignore_tags: np.ndarray  of shape (N, K), indicates whether a region is ignorable or not.
+       preds: a list of dict produced by post process
+            points: np.ndarray of shape (N, K, 4, 2), the polygons of objective regions.
+       '''
+        gt_polyons_batch = batch[2]
+        ignore_tags_batch = batch[3]
+        for pred, gt_polyons, ignore_tags in zip(preds, gt_polyons_batch,
+                                                 ignore_tags_batch):
+            # prepare gt
+            gt_info_list = [{
+                'points': gt_polyon,
+                'text': '',
+                'ignore': ignore_tag
+            } for gt_polyon, ignore_tag in zip(gt_polyons, ignore_tags)]
+            # prepare det
+            det_info_list = [{
+                'points': det_polyon,
+                'text': ''
+            } for det_polyon in pred['points']]
+            result = self.evaluator.evaluate_image(gt_info_list, det_info_list)
+            self.results.append(result)
+    def get_metric(self):
+        """
+        return metircs {
+                 'precision': 0,
+                 'recall': 0,
+                 'hmean': 0
+            }
+        """
+        metircs = self.evaluator.combine_results(self.results)
+        self.reset()
+        return metircs
+    def reset(self):
+        self.results = []  # clear results
--- a/ppocr/metrics/RecMetric.py
+++ b/ppocr/metrics/RecMetric.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import Levenshtein
+class RecMetric(object):
+    def __init__(self, main_indicator='acc', **kwargs):
+        self.main_indicator = main_indicator
+        self.reset()
+    def __call__(self, pred_label, *args, **kwargs):
+        preds, labels = pred_label
+        correct_num = 0
+        all_num = 0
+        norm_edit_dis = 0.0
+        for (pred, pred_conf), (target, _) in zip(preds, labels):
+            norm_edit_dis += Levenshtein.distance(pred, target) / max(
+                len(pred), len(target))
+            if pred == target:
+                correct_num += 1
+            all_num += 1
+            # if all_num < 10 and kwargs.get('show_str', False):
+            #     print('{} -> {}'.format(pred, target))
+        self.correct_num += correct_num
+        self.all_num += all_num
+        self.norm_edit_dis += norm_edit_dis
+        return {
+            'acc': correct_num / all_num,
+            'norm_edit_dis': 1 - norm_edit_dis / all_num
+        }
+    def get_metric(self):
+        """
+        return metircs {
+                 'acc': 0,
+                 'norm_edit_dis': 0,
+            }
+        """
+        acc = self.correct_num / self.all_num
+        norm_edit_dis = 1 - self.norm_edit_dis / self.all_num
+        self.reset()
+        return {'acc': acc, 'norm_edit_dis': norm_edit_dis}
+    def reset(self):
+        self.correct_num = 0
+        self.all_num = 0
+        self.norm_edit_dis = 0
--- a/ppocr/metrics/__init__.py
+++ b/ppocr/metrics/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import copy
+__all__ = ['build_metric']
+def build_metric(config):
+    from .DetMetric import DetMetric
+    from .RecMetric import RecMetric
+    support_dict = ['DetMetric', 'RecMetric']
+    config = copy.deepcopy(config)
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'metric only support {}'.format(support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/tools/eval_utils/eval_det_iou.py
+++ b/tools/eval_utils/eval_det_iou.py
@@ -88,8 +88,8 @@ class DetectionIoUEvaluator(object):
            points = gt[n]['points']
            # transcription = gt[n]['text']
            dontCare = gt[n]['ignore']
-#             points = Polygon(points)
+            #             points = Polygon(points)
-#             points = points.buffer(0)
+            #             points = points.buffer(0)
            if not Polygon(points).is_valid or not Polygon(points).is_simple:
                continue
@@ -105,8 +105,8 @@ class DetectionIoUEvaluator(object):
        for n in range(len(pred)):
            points = pred[n]['points']
-#             points = Polygon(points)
+            #             points = Polygon(points)
-#             points = points.buffer(0)
+            #             points = points.buffer(0)
            if not Polygon(points).is_valid or not Polygon(points).is_simple:
                continue

--- a/ppocr/modeling/__init__.py
+++ b/ppocr/modeling/__init__.py
@@ -11,3 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+import copy
+from .losses import build_loss
+__all__ = ['build_model', 'build_loss']
+def build_model(config):
+    from .architectures import Model
+    config = copy.deepcopy(config)
+    module_class = Model(config)
+    return module_class
--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
@@ -11,3 +11,6 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from .model import Model
+__all__ = ['Model']
\ No newline at end of file
--- a/ppocr/modeling/architectures/det_model.py
+++ b/ppocr/modeling/architectures/det_model.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from paddle import fluid
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from copy import deepcopy
-class DetModel(object):
-    def __init__(self, params):
-        """
-        Detection module for OCR text detection.
-        args:
-            params (dict): the super parameters for detection module.
-        """
-        global_params = params['Global']
-        self.algorithm = global_params['algorithm']
-        backbone_params = deepcopy(params["Backbone"])
-        backbone_params.update(global_params)
-        self.backbone = create_module(backbone_params['function'])\
-                (params=backbone_params)
-        head_params = deepcopy(params["Head"])
-        head_params.update(global_params)
-        self.head = create_module(head_params['function'])\
-                (params=head_params)
-        loss_params = deepcopy(params["Loss"])
-        loss_params.update(global_params)
-        self.loss = create_module(loss_params['function'])\
-                (params=loss_params)
-        self.image_shape = global_params['image_shape']
-    def create_feed(self, mode):
-        """
-        create Dataloader feeds
-        args:
-            mode (str): 'train' for training  or else for evaluation
-        return: (image, corresponding label, dataloader)
-        """
-        image_shape = deepcopy(self.image_shape)
-        if image_shape[1] % 4 != 0 or image_shape[2] % 4 != 0:
-            raise Exception("The size of the image must be divisible by 4, "
-                            "received image shape is {}, please reset the "
-                            "Global.image_shape in the yml file".format(
-                                image_shape))
-        image = fluid.layers.data(
-            name='image', shape=image_shape, dtype='float32')
-        if mode == "train":
-            if self.algorithm == "EAST":
-                h, w = int(image_shape[1] // 4), int(image_shape[2] // 4)
-                score = fluid.layers.data(
-                    name='score', shape=[1, h, w], dtype='float32')
-                geo = fluid.layers.data(
-                    name='geo', shape=[9, h, w], dtype='float32')
-                mask = fluid.layers.data(
-                    name='mask', shape=[1, h, w], dtype='float32')
-                feed_list = [image, score, geo, mask]
-                labels = {'score': score, 'geo': geo, 'mask': mask}
-            elif self.algorithm == "DB":
-                shrink_map = fluid.layers.data(
-                    name='shrink_map', shape=image_shape[1:], dtype='float32')
-                shrink_mask = fluid.layers.data(
-                    name='shrink_mask', shape=image_shape[1:], dtype='float32')
-                threshold_map = fluid.layers.data(
-                    name='threshold_map',
-                    shape=image_shape[1:],
-                    dtype='float32')
-                threshold_mask = fluid.layers.data(
-                    name='threshold_mask',
-                    shape=image_shape[1:],
-                    dtype='float32')
-                feed_list=[image, shrink_map, shrink_mask,\
-                    threshold_map, threshold_mask]
-                labels = {'shrink_map':shrink_map,\
-                    'shrink_mask':shrink_mask,\
-                    'threshold_map':threshold_map,\
-                    'threshold_mask':threshold_mask}
-            elif self.algorithm == "SAST":
-                input_score = fluid.layers.data(
-                    name='score', shape=[1, 128, 128], dtype='float32')
-                input_border = fluid.layers.data(
-                    name='border', shape=[5, 128, 128], dtype='float32')
-                input_mask = fluid.layers.data(
-                    name='mask', shape=[1, 128, 128], dtype='float32')
-                input_tvo = fluid.layers.data(
-                    name='tvo', shape=[9, 128, 128], dtype='float32')
-                input_tco = fluid.layers.data(
-                    name='tco', shape=[3, 128, 128], dtype='float32')
-                feed_list = [image, input_score, input_border, input_mask, input_tvo, input_tco]
-                labels = {'input_score': input_score,\
-                    'input_border': input_border,\
-                    'input_mask': input_mask,\
-                    'input_tvo': input_tvo,\
-                    'input_tco': input_tco}
-            loader = fluid.io.DataLoader.from_generator(
-                feed_list=feed_list,
-                capacity=64,
-                use_double_buffer=True,
-                iterable=False)
-        else:
-            labels = None
-            loader = None
-        return image, labels, loader
-    def __call__(self, mode):
-        """
-        run forward of defined module
-        args:
-            mode (str): 'train' for training; 'export'  for inference,
-                others for evaluation]
-        """
-        image, labels, loader = self.create_feed(mode)
-        conv_feas = self.backbone(image)
-        if self.algorithm == "DB":
-            predicts = self.head(conv_feas, mode)
-        else:
-            predicts = self.head(conv_feas)
-        if mode == "train":
-            losses = self.loss(predicts, labels)
-            return loader, losses
-        elif mode == "export":
-            return [image, predicts]
-        else:
-            return loader, predicts
--- a/ppocr/modeling/architectures/model.py
+++ b/ppocr/modeling/architectures/model.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os, sys
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.append('/home/zhoujun20/PaddleOCR')
+import paddle
+from paddle import nn
+from ppocr.modeling.transform import build_transform
+from ppocr.modeling.backbones import build_backbone
+from ppocr.modeling.necks import build_neck
+from ppocr.modeling.heads import build_head
+__all__ = ['Model']
+class Model(nn.Layer):
+    def __init__(self, config):
+        """
+        Detection module for OCR.
+        args:
+            config (dict): the super parameters for module.
+        """
+        super(Model, self).__init__()
+        algorithm = config['algorithm']
+        self.type = config['type']
+        self.model_name = '{}_{}'.format(self.type, algorithm)
+        in_channels = config.get('in_channels', 3)
+        # build transfrom,
+        # for rec, transfrom can be TPS,None
+        # for det and cls, transfrom shoule to be None,
+        #                  if you make model differently, you can use transfrom in det and cls
+        if 'Transform' not in config or config['Transform'] is None:
+            self.use_transform = False
+        else:
+            self.use_transform = True
+            config['Transform']['in_channels'] = in_channels
+            self.transform = build_transform(config['Transform'])
+            in_channels = self.transform.out_channels
+        # build backbone, backbone is need for del, rec and cls
+        config["Backbone"]['in_channels'] = in_channels
+        self.backbone = build_backbone(config["Backbone"], self.type)
+        in_channels = self.backbone.out_channels
+        # build neck
+        # for rec, neck can be cnn,rnn or reshape(None)
+        # for det, neck can be FPN, BIFPN and so on.
+        # for cls, neck should be none
+        if 'Neck' not in config or config['Neck'] is None:
+            self.use_neck = False
+        else:
+            self.use_neck = True
+            config['Neck']['in_channels'] = in_channels
+            self.neck = build_neck(config['Neck'])
+            in_channels = self.neck.out_channels
+        # # build head, head is need for del, rec and cls
+        config["Head"]['in_channels'] = in_channels
+        self.head = build_head(config["Head"])
+    # @paddle.jit.to_static
+    def forward(self, x):
+        if self.use_transform:
+            x = self.transform(x)
+        x = self.backbone(x)
+        if self.use_neck:
+            x = self.neck(x)
+        x = self.head(x)
+        return x
+def check_static():
+    import numpy as np
+    from ppocr.utils.save_load import load_dygraph_pretrain
+    from ppocr.utils.logging import get_logger
+    from tools import program
+    config = program.load_config('configs/det/det_r50_vd_db.yml')
+    # import cv2
+    # data = cv2.imread('doc/imgs/1.jpg')
+    # data = normalize(data)
+    logger = get_logger()
+    data = np.zeros((1, 3, 640, 640), dtype=np.float32)
+    paddle.disable_static()
+    config['Architecture']['in_channels'] = 3
+    config['Architecture']["Head"]['out_channels'] = 6624
+    model = Model(config['Architecture'])
+    model.eval()
+    load_dygraph_pretrain(
+        model,
+        logger,
+        '/Users/zhoujun20/Desktop/code/PaddleOCR/db/db',
+        load_static_weights=True)
+    x = paddle.to_variable(data)
+    y = model(x)
+    for y1 in y:
+        print(y1.shape)
+    #
+    # # from matplotlib import pyplot as plt
+    # # plt.imshow(y.numpy())
+    # # plt.show()
+    static_out = np.load('/Users/zhoujun20/Desktop/code/PaddleOCR/db/db.npy')
+    diff = y.numpy() - static_out
+    print(y.shape, static_out.shape, diff.mean())
+if __name__ == '__main__':
+    check_static()
--- a/ppocr/modeling/architectures/rec_model.py
+++ b/ppocr/modeling/architectures/rec_model.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from paddle import fluid
-from ppocr.utils.utility import create_module
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-from copy import deepcopy
-class RecModel(object):
-    def __init__(self, params):
-        super(RecModel, self).__init__()
-        global_params = params['Global']
-        char_num = global_params['char_ops'].get_char_num()
-        global_params['char_num'] = char_num
-        self.char_type = global_params['character_type']
-        self.infer_img = global_params['infer_img']
-        if "TPS" in params:
-            tps_params = deepcopy(params["TPS"])
-            tps_params.update(global_params)
-            self.tps = create_module(tps_params['function'])\
-                (params=tps_params)
-        else:
-            self.tps = None
-        backbone_params = deepcopy(params["Backbone"])
-        backbone_params.update(global_params)
-        self.backbone = create_module(backbone_params['function'])\
-                (params=backbone_params)
-        head_params = deepcopy(params["Head"])
-        head_params.update(global_params)
-        self.head = create_module(head_params['function'])\
-                (params=head_params)
-        loss_params = deepcopy(params["Loss"])
-        loss_params.update(global_params)
-        self.loss = create_module(loss_params['function'])\
-                (params=loss_params)
-        self.loss_type = global_params['loss_type']
-        self.image_shape = global_params['image_shape']
-        self.max_text_length = global_params['max_text_length']
-        if "num_heads" in global_params:
-            self.num_heads = global_params["num_heads"]
-        else:
-            self.num_heads = None
-    def create_feed(self, mode):
-        image_shape = deepcopy(self.image_shape)
-        image_shape.insert(0, -1)
-        if mode == "train":
-            image = fluid.data(name='image', shape=image_shape, dtype='float32')
-            if self.loss_type == "attention":
-                label_in = fluid.data(
-                    name='label_in',
-                    shape=[None, 1],
-                    dtype='int32',
-                    lod_level=1)
-                label_out = fluid.data(
-                    name='label_out',
-                    shape=[None, 1],
-                    dtype='int32',
-                    lod_level=1)
-                feed_list = [image, label_in, label_out]
-                labels = {'label_in': label_in, 'label_out': label_out}
-            elif self.loss_type == "srn":
-                encoder_word_pos = fluid.data(
-                    name="encoder_word_pos",
-                    shape=[
-                        -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
-                        1
-                    ],
-                    dtype="int64")
-                gsrm_word_pos = fluid.data(
-                    name="gsrm_word_pos",
-                    shape=[-1, self.max_text_length, 1],
-                    dtype="int64")
-                gsrm_slf_attn_bias1 = fluid.data(
-                    name="gsrm_slf_attn_bias1",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                gsrm_slf_attn_bias2 = fluid.data(
-                    name="gsrm_slf_attn_bias2",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                lbl_weight = fluid.layers.data(
-                    name="lbl_weight", shape=[-1, 1], dtype='int64')
-                label = fluid.data(
-                    name='label', shape=[-1, 1], dtype='int32', lod_level=1)
-                feed_list = [
-                    image, label, encoder_word_pos, gsrm_word_pos,
-                    gsrm_slf_attn_bias1, gsrm_slf_attn_bias2, lbl_weight
-                ]
-                labels = {
-                    'label': label,
-                    'encoder_word_pos': encoder_word_pos,
-                    'gsrm_word_pos': gsrm_word_pos,
-                    'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
-                    'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2,
-                    'lbl_weight': lbl_weight
-                }
-            else:
-                label = fluid.data(
-                    name='label', shape=[None, 1], dtype='int32', lod_level=1)
-                feed_list = [image, label]
-                labels = {'label': label}
-            loader = fluid.io.DataLoader.from_generator(
-                feed_list=feed_list,
-                capacity=64,
-                use_double_buffer=True,
-                iterable=False)
-        else:
-            labels = None
-            loader = None
-            if self.char_type == "ch" and self.infer_img:
-                image_shape[-1] = -1
-                if self.tps != None:
-                    logger.info(
-                        "WARNRNG!!!\n"
-                        "TPS does not support variable shape in chinese!"
-                        "We set img_shape to be the same , it may affect the inference effect"
-                    )
-                    image_shape = deepcopy(self.image_shape)
-            image = fluid.data(name='image', shape=image_shape, dtype='float32')
-            if self.loss_type == "srn":
-                encoder_word_pos = fluid.data(
-                    name="encoder_word_pos",
-                    shape=[
-                        -1, int((image_shape[-2] / 8) * (image_shape[-1] / 8)),
-                        1
-                    ],
-                    dtype="int64")
-                gsrm_word_pos = fluid.data(
-                    name="gsrm_word_pos",
-                    shape=[-1, self.max_text_length, 1],
-                    dtype="int64")
-                gsrm_slf_attn_bias1 = fluid.data(
-                    name="gsrm_slf_attn_bias1",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                gsrm_slf_attn_bias2 = fluid.data(
-                    name="gsrm_slf_attn_bias2",
-                    shape=[
-                        -1, self.num_heads, self.max_text_length,
-                        self.max_text_length
-                    ],
-                    dtype="float32")
-                feed_list = [
-                    image, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
-                    gsrm_slf_attn_bias2
-                ]
-                labels = {
-                    'encoder_word_pos': encoder_word_pos,
-                    'gsrm_word_pos': gsrm_word_pos,
-                    'gsrm_slf_attn_bias1': gsrm_slf_attn_bias1,
-                    'gsrm_slf_attn_bias2': gsrm_slf_attn_bias2
-                }
-        return image, labels, loader
-    def __call__(self, mode):
-        image, labels, loader = self.create_feed(mode)
-        if self.tps is None:
-            inputs = image
-        else:
-            inputs = self.tps(image)
-        conv_feas = self.backbone(inputs)
-        predicts = self.head(conv_feas, labels, mode)
-        decoded_out = predicts['decoded_out']
-        if mode == "train":
-            loss = self.loss(predicts, labels)
-            if self.loss_type == "attention":
-                label = labels['label_out']
-            else:
-                label = labels['label']
-            if self.loss_type == 'srn':
-                total_loss, img_loss, word_loss = self.loss(predicts, labels)
-                outputs = {
-                    'total_loss': total_loss,
-                    'img_loss': img_loss,
-                    'word_loss': word_loss,
-                    'decoded_out': decoded_out,
-                    'label': label
-                }
-            else:
-                outputs = {'total_loss':loss, 'decoded_out':\
-                    decoded_out, 'label':label}
-            return loader, outputs
-        elif mode == "export":
-            predict = predicts['predict']
-            if self.loss_type == "ctc":
-                predict = fluid.layers.softmax(predict)
-            if self.loss_type == "srn":
-                raise Exception(
-                    "Warning! SRN does not support export model currently")
-            return [image, {'decoded_out': decoded_out, 'predicts': predict}]
-        else:
-            predict = predicts['predict']
-            if self.loss_type == "ctc":
-                predict = fluid.layers.softmax(predict)
-            return loader, {'decoded_out': decoded_out, 'predicts': predict}
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -11,3 +11,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+__all__ = ['build_backbone']
+def build_backbone(config, model_type):
+    if model_type == 'det':
+        from .det_mobilenet_v3 import MobileNetV3
+        from .det_resnet_vd import ResNet
+        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
+    elif model_type == 'rec':
+        from .rec_mobilenet_v3 import MobileNetV3
+        from .rec_resnet_vd import ResNet
+        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_FPN']
+    else:
+        raise NotImplementedError
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'when model typs is {}, backbone only support {}'.format(model_type,
+                                                                 support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-#limitations under the License.
+# limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import paddle.fluid as fluid
+import paddle
-from paddle.fluid.initializer import MSRA
+from paddle import nn
-from paddle.fluid.param_attr import ParamAttr
+import paddle.nn.functional as F
+from paddle import ParamAttr
 __all__ = ['MobileNetV3']
-class MobileNetV3():
+def make_divisible(v, divisor=8, min_value=None):
-    def __init__(self, params):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+class MobileNetV3(nn.Layer):
+    def __init__(self, in_channels=3, model_name='large', scale=0.5, **kwargs):
        """
        the MobilenetV3 backbone network for detection module.
        Args:
            params(dict): the super parameters for build network
        """
-        self.scale = params['scale']
+        super(MobileNetV3, self).__init__()
-        model_name = params['model_name']
-        self.inplanes = 16
        if model_name == "large":
-            self.cfg = [
+            cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, False, 'relu', 1],
                [3, 64, 24, False, 'relu', 2],
@@ -52,10 +60,9 @@ class MobileNetV3():
                [5, 960, 160, True, 'hard_swish', 1],
                [5, 960, 160, True, 'hard_swish', 1],
            ]
-            self.cls_ch_squeeze = 960
+            cls_ch_squeeze = 960
-            self.cls_ch_expand = 1280
        elif model_name == "small":
-            self.cfg = [
+            cfg = [
                # k, exp, c,  se,     nl,  s,
                [3, 16, 16, True, 'relu', 2],
                [3, 72, 24, False, 'relu', 2],
@@ -69,183 +76,203 @@ class MobileNetV3():
                [5, 576, 96, True, 'hard_swish', 1],
                [5, 576, 96, True, 'hard_swish', 1],
            ]
-            self.cls_ch_squeeze = 576
+            cls_ch_squeeze = 576
-            self.cls_ch_expand = 1280
        else:
            raise NotImplementedError("mode[" + model_name +
                                      "_model] is not implemented!")
        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert self.scale in supported_scale, \
+        assert scale in supported_scale, \
-            "supported scale are {} but input scale is {}".format(supported_scale, self.scale)
+            "supported scale are {} but input scale is {}".format(supported_scale, scale)
+        inplanes = 16
-    def __call__(self, input):
+        # conv1
-        scale = self.scale
+        self.conv = ConvBNLayer(
-        inplanes = self.inplanes
+            in_channels=in_channels,
-        cfg = self.cfg
+            out_channels=make_divisible(inplanes * scale),
-        cls_ch_squeeze = self.cls_ch_squeeze
+            kernel_size=3,
-        cls_ch_expand = self.cls_ch_expand
-        #conv1
-        conv = self.conv_bn_layer(
-            input,
-            filter_size=3,
-            num_filters=self.make_divisible(inplanes * scale),
            stride=2,
            padding=1,
-            num_groups=1,
+            groups=1,
            if_act=True,
            act='hard_swish',
            name='conv1')
+        self.stages = []
+        self.out_channels = []
+        block_list = []
        i = 0
-        inplanes = self.make_divisible(inplanes * scale)
+        inplanes = make_divisible(inplanes * scale)
-        outs = []
+        for (k, exp, c, se, nl, s) in cfg:
-        for layer_cfg in cfg:
+            if s == 2 and i > 2:
-            if layer_cfg[5] == 2 and i > 2:
+                self.out_channels.append(inplanes)
-                outs.append(conv)
+                self.stages.append(nn.Sequential(*block_list))
-            conv = self.residual_unit(
+                block_list = []
-                input=conv,
+            block_list.append(
-                num_in_filter=inplanes,
+                ResidualUnit(
-                num_mid_filter=self.make_divisible(scale * layer_cfg[1]),
+                    in_channels=inplanes,
-                num_out_filter=self.make_divisible(scale * layer_cfg[2]),
+                    mid_channels=make_divisible(scale * exp),
-                act=layer_cfg[4],
+                    out_channels=make_divisible(scale * c),
-                stride=layer_cfg[5],
+                    kernel_size=k,
-                filter_size=layer_cfg[0],
+                    stride=s,
-                use_se=layer_cfg[3],
+                    use_se=se,
-                name='conv' + str(i + 2))
+                    act=nl,
-            inplanes = self.make_divisible(scale * layer_cfg[2])
+                    name="conv" + str(i + 2)))
+            inplanes = make_divisible(scale * c)
            i += 1
+        block_list.append(
+            ConvBNLayer(
+                in_channels=inplanes,
+                out_channels=make_divisible(scale * cls_ch_squeeze),
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                groups=1,
+                if_act=True,
+                act='hard_swish',
+                name='conv_last'))
-        conv = self.conv_bn_layer(
+        self.stages.append(nn.Sequential(*block_list))
-            input=conv,
+        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
-            filter_size=1,
+        for i, stage in enumerate(self.stages):
-            num_filters=self.make_divisible(scale * cls_ch_squeeze),
+            self.add_sublayer(sublayer=stage, name="stage{}".format(i))
-            stride=1,
-            padding=0,
+    def forward(self, x):
-            num_groups=1,
+        x = self.conv(x)
-            if_act=True,
+        out_list = []
-            act='hard_swish',
+        for stage in self.stages:
-            name='conv_last')
+            x = stage(x)
-        outs.append(conv)
+            out_list.append(x)
-        return outs
+        return out_list
-    def conv_bn_layer(self,
-                      input,
+class ConvBNLayer(nn.Layer):
-                      filter_size,
+    def __init__(self,
-                      num_filters,
+                 in_channels,
-                      stride,
+                 out_channels,
-                      padding,
+                 kernel_size,
-                      num_groups=1,
+                 stride,
-                      if_act=True,
+                 padding,
-                      act=None,
+                 groups=1,
-                      name=None,
+                 if_act=True,
-                      use_cudnn=True,
+                 act=None,
-                      res_last_bn_init=False):
+                 name=None):
-        conv = fluid.layers.conv2d(
+        super(ConvBNLayer, self).__init__()
-            input=input,
+        self.if_act = if_act
-            num_filters=num_filters,
+        self.act = act
-            filter_size=filter_size,
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
-            groups=num_groups,
+            groups=groups,
-            act=None,
+            weight_attr=ParamAttr(name=name + '_weights'),
-            use_cudnn=use_cudnn,
-            param_attr=ParamAttr(name=name + '_weights'),
            bias_attr=False)
-        bn_name = name + '_bn'
-        bn = fluid.layers.batch_norm(
+        self.bn = nn.BatchNorm(
-            input=conv,
+            num_channels=out_channels,
-            param_attr=ParamAttr(
+            act=None,
-                name=bn_name + "_scale",
+            param_attr=ParamAttr(name=name + "_bn_scale"),
-                regularizer=fluid.regularizer.L2DecayRegularizer(
+            bias_attr=ParamAttr(name=name + "_bn_offset"),
-                    regularization_coeff=0.0)),
+            moving_mean_name=name + "_bn_mean",
-            bias_attr=ParamAttr(
+            moving_variance_name=name + "_bn_variance")
-                name=bn_name + "_offset",
-                regularizer=fluid.regularizer.L2DecayRegularizer(
+    def forward(self, x):
-                    regularization_coeff=0.0)),
+        x = self.conv(x)
-            moving_mean_name=bn_name + '_mean',
+        x = self.bn(x)
-            moving_variance_name=bn_name + '_variance')
+        if self.if_act:
-        if if_act:
+            if self.act == "relu":
-            if act == 'relu':
+                x = F.relu(x)
-                bn = fluid.layers.relu(bn)
+            elif self.act == "hard_swish":
-            elif act == 'hard_swish':
+                x = F.hard_swish(x)
-                bn = fluid.layers.hard_swish(bn)
+            else:
-        return bn
+                print("The activation function is selected incorrectly.")
+                exit()
-    def make_divisible(self, v, divisor=8, min_value=None):
+        return x
-        if min_value is None:
-            min_value = divisor
-        new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+class ResidualUnit(nn.Layer):
-        if new_v < 0.9 * v:
+    def __init__(self,
-            new_v += divisor
+                 in_channels,
-        return new_v
+                 mid_channels,
+                 out_channels,
-    def se_block(self, input, num_out_filter, ratio=4, name=None):
+                 kernel_size,
-        num_mid_filter = num_out_filter // ratio
+                 stride,
-        pool = fluid.layers.pool2d(
+                 use_se,
-            input=input, pool_type='avg', global_pooling=True, use_cudnn=False)
+                 act=None,
-        conv1 = fluid.layers.conv2d(
+                 name=''):
-            input=pool,
+        super(ResidualUnit, self).__init__()
-            filter_size=1,
+        self.if_shortcut = stride == 1 and in_channels == out_channels
-            num_filters=num_mid_filter,
+        self.if_se = use_se
-            act='relu',
-            param_attr=ParamAttr(name=name + '_1_weights'),
+        self.expand_conv = ConvBNLayer(
-            bias_attr=ParamAttr(name=name + '_1_offset'))
+            in_channels=in_channels,
-        conv2 = fluid.layers.conv2d(
+            out_channels=mid_channels,
-            input=conv1,
+            kernel_size=1,
-            filter_size=1,
-            num_filters=num_out_filter,
-            act='hard_sigmoid',
-            param_attr=ParamAttr(name=name + '_2_weights'),
-            bias_attr=ParamAttr(name=name + '_2_offset'))
-        scale = fluid.layers.elementwise_mul(x=input, y=conv2, axis=0)
-        return scale
-    def residual_unit(self,
-                      input,
-                      num_in_filter,
-                      num_mid_filter,
-                      num_out_filter,
-                      stride,
-                      filter_size,
-                      act=None,
-                      use_se=False,
-                      name=None):
-        conv0 = self.conv_bn_layer(
-            input=input,
-            filter_size=1,
-            num_filters=num_mid_filter,
            stride=1,
            padding=0,
            if_act=True,
            act=act,
-            name=name + '_expand')
+            name=name + "_expand")
+        self.bottleneck_conv = ConvBNLayer(
-        conv1 = self.conv_bn_layer(
+            in_channels=mid_channels,
-            input=conv0,
+            out_channels=mid_channels,
-            filter_size=filter_size,
+            kernel_size=kernel_size,
-            num_filters=num_mid_filter,
            stride=stride,
-            padding=int((filter_size - 1) // 2),
+            padding=int((kernel_size - 1) // 2),
+            groups=mid_channels,
            if_act=True,
            act=act,
-            num_groups=num_mid_filter,
+            name=name + "_depthwise")
-            use_cudnn=False,
+        if self.if_se:
-            name=name + '_depthwise')
+            self.mid_se = SEModule(mid_channels, name=name + "_se")
-        if use_se:
+        self.linear_conv = ConvBNLayer(
-            conv1 = self.se_block(
+            in_channels=mid_channels,
-                input=conv1, num_out_filter=num_mid_filter, name=name + '_se')
+            out_channels=out_channels,
+            kernel_size=1,
-        conv2 = self.conv_bn_layer(
-            input=conv1,
-            filter_size=1,
-            num_filters=num_out_filter,
            stride=1,
            padding=0,
            if_act=False,
-            name=name + '_linear',
+            act=None,
-            res_last_bn_init=True)
+            name=name + "_linear")
-        if num_in_filter != num_out_filter or stride != 1:
-            return conv2
+    def forward(self, inputs):
-        else:
+        x = self.expand_conv(inputs)
-            return fluid.layers.elementwise_add(x=input, y=conv2, act=None)
+        x = self.bottleneck_conv(x)
+        if self.if_se:
+            x = self.mid_se(x)
+        x = self.linear_conv(x)
+        if self.if_shortcut:
+            x = paddle.elementwise_add(inputs, x)
+        return x
+class SEModule(nn.Layer):
+    def __init__(self, in_channels, reduction=4, name=""):
+        super(SEModule, self).__init__()
+        self.avg_pool = nn.Pool2D(
+            pool_type="avg", global_pooling=True, use_cudnn=False)
+        self.conv1 = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=in_channels // reduction,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(name=name + "_1_weights"),
+            bias_attr=ParamAttr(name=name + "_1_offset"))
+        self.conv2 = nn.Conv2d(
+            in_channels=in_channels // reduction,
+            out_channels=in_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            weight_attr=ParamAttr(name + "_2_weights"),
+            bias_attr=ParamAttr(name=name + "_2_offset"))
+    def forward(self, inputs):
+        outputs = self.avg_pool(inputs)
+        outputs = self.conv1(outputs)
+        outputs = F.relu(outputs)
+        outputs = self.conv2(outputs)
+        outputs = F.hard_sigmoid(outputs)
+        return inputs * outputs
--- a/ppocr/modeling/backbones/det_resnet_vd.py
+++ b/ppocr/modeling/backbones/det_resnet_vd.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-#limitations under the License.
+# limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import paddle.fluid as fluid
+from paddle import nn
-from paddle.fluid.param_attr import ParamAttr
+from paddle.nn import functional as F
+from paddle import ParamAttr
 __all__ = ["ResNet"]
-class ResNet(object):
+class ResNet(nn.Layer):
-    def __init__(self, params):
+    def __init__(self, in_channels=3, layers=50, **kwargs):
        """
        the Resnet backbone network for detection module.
        Args:
            params(dict): the super parameters for network build
        """
-        self.layers = params['layers']
+        super(ResNet, self).__init__()
-        supported_layers = [18, 34, 50, 101, 152]
+        supported_layers = {
-        assert self.layers in supported_layers, \
+            18: {
-            "supported layers are {} but input layer is {}".format(supported_layers, self.layers)
+                'depth': [2, 2, 2, 2],
-        self.is_3x3 = True
+                'block_class': BasicBlock
+            },
-    def __call__(self, input):
+            34: {
-        layers = self.layers
+                'depth': [3, 4, 6, 3],
-        is_3x3 = self.is_3x3
+                'block_class': BasicBlock
-        if layers == 18:
+            },
-            depth = [2, 2, 2, 2]
+            50: {
-        elif layers == 34 or layers == 50:
+                'depth': [3, 4, 6, 3],
-            depth = [3, 4, 6, 3]
+                'block_class': BottleneckBlock
-        elif layers == 101:
+            },
-            depth = [3, 4, 23, 3]
+            101: {
-        elif layers == 152:
+                'depth': [3, 4, 23, 3],
-            depth = [3, 8, 36, 3]
+                'block_class': BottleneckBlock
-        elif layers == 200:
+            },
-            depth = [3, 12, 48, 3]
+            152: {
+                'depth': [3, 8, 36, 3],
+                'block_class': BottleneckBlock
+            },
+            200: {
+                'depth': [3, 12, 48, 3],
+                'block_class': BottleneckBlock
+            }
+        }
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(supported_layers.keys(), layers)
+        is_3x3 = True
+        depth = supported_layers[layers]['depth']
+        block_class = supported_layers[layers]['block_class']
        num_filters = [64, 128, 256, 512]
-        outs = []
+        conv = []
        if is_3x3 == False:
-            conv = self.conv_bn_layer(
+            conv.append(
-                input=input,
+                ConvBNLayer(
-                num_filters=64,
+                    in_channels=in_channels,
-                filter_size=7,
+                    out_channels=64,
-                stride=2,
+                    kernel_size=7,
-                act='relu')
+                    stride=2,
+                    act='relu'))
        else:
-            conv = self.conv_bn_layer(
+            conv.append(
-                input=input,
+                ConvBNLayer(
-                num_filters=32,
+                    in_channels=3,
-                filter_size=3,
+                    out_channels=32,
-                stride=2,
+                    kernel_size=3,
-                act='relu',
+                    stride=2,
-                name='conv1_1')
+                    act='relu',
-            conv = self.conv_bn_layer(
+                    name='conv1_1'))
-                input=conv,
+            conv.append(
-                num_filters=32,
+                ConvBNLayer(
-                filter_size=3,
+                    in_channels=32,
-                stride=1,
+                    out_channels=32,
-                act='relu',
+                    kernel_size=3,
-                name='conv1_2')
+                    stride=1,
-            conv = self.conv_bn_layer(
+                    act='relu',
-                input=conv,
+                    name='conv1_2'))
-                num_filters=64,
+            conv.append(
-                filter_size=3,
+                ConvBNLayer(
-                stride=1,
+                    in_channels=32,
-                act='relu',
+                    out_channels=64,
-                name='conv1_3')
+                    kernel_size=3,
+                    stride=1,
-        conv = fluid.layers.pool2d(
+                    act='relu',
-            input=conv,
+                    name='conv1_3'))
-            pool_size=3,
+        self.conv1 = nn.Sequential(*conv)
-            pool_stride=2,
+        self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-            pool_padding=1,
+        self.stages = []
-            pool_type='max')
+        self.out_channels = []
+        in_ch = 64
-        if layers >= 50:
+        for block_index in range(len(depth)):
-            for block in range(len(depth)):
+            block_list = []
-                for i in range(depth[block]):
+            for i in range(depth[block_index]):
-                    if layers in [101, 152, 200] and block == 2:
+                if layers >= 50:
+                    if layers in [101, 152, 200] and block_index == 2:
                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
+                            conv_name = "res" + str(block_index + 2) + "a"
                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                            conv_name = "res" + str(block_index +
+                                                    2) + "b" + str(i)
                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                        conv_name = "res" + str(block_index + 2) + chr(97 + i)
-                    conv = self.bottleneck_block(
+                else:
-                        input=conv,
+                    conv_name = "res" + str(block_index + 2) + chr(97 + i)
-                        num_filters=num_filters[block],
+                block_list.append(
-                        stride=2 if i == 0 and block != 0 else 1,
+                    block_class(
-                        if_first=block == i == 0,
+                        in_channels=in_ch,
-                        name=conv_name)
+                        out_channels=num_filters[block_index],
-                outs.append(conv)
+                        stride=2 if i == 0 and block_index != 0 else 1,
-        else:
+                        if_first=block_index == i == 0,
-            for block in range(len(depth)):
+                        name=conv_name))
-                for i in range(depth[block]):
+                in_ch = block_list[-1].out_channels
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
+            self.out_channels.append(in_ch)
-                    conv = self.basic_block(
+            self.stages.append(nn.Sequential(*block_list))
-                        input=conv,
+        for i, stage in enumerate(self.stages):
-                        num_filters=num_filters[block],
+            self.add_sublayer(sublayer=stage, name="stage{}".format(i))
-                        stride=2 if i == 0 and block != 0 else 1,
-                        if_first=block == i == 0,
+    def forward(self, x):
-                        name=conv_name)
+        x = self.conv1(x)
-                outs.append(conv)
+        x = self.pool(x)
-        return outs
+        out_list = []
+        for stage in self.stages:
-    def conv_bn_layer(self,
+            x = stage(x)
-                      input,
+            out_list.append(x)
-                      num_filters,
+        return out_list
-                      filter_size,
-                      stride=1,
-                      groups=1,
+class ConvBNLayer(nn.Layer):
-                      act=None,
+    def __init__(self,
-                      name=None):
+                 in_channels,
-        conv = fluid.layers.conv2d(
+                 out_channels,
-            input=input,
+                 kernel_size,
-            num_filters=num_filters,
+                 stride=1,
-            filter_size=filter_size,
+                 groups=1,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=stride,
-            padding=(filter_size - 1) // 2,
+            padding=(kernel_size - 1) // 2,
            groups=groups,
-            act=None,
+            weight_attr=ParamAttr(name=name + "_weights"),
-            param_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
+        self.bn = nn.BatchNorm(
-            input=conv,
+            num_channels=out_channels,
            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
+            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + '_offset'),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
+            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + '_variance')
+            moving_variance_name=bn_name + "_variance")
-    def conv_bn_layer_new(self,
+    def __call__(self, x):
-                          input,
+        x = self.conv(x)
-                          num_filters,
+        x = self.bn(x)
-                          filter_size,
+        return x
-                          stride=1,
-                          groups=1,
-                          act=None,
+class ConvBNLayerNew(nn.Layer):
-                          name=None):
+    def __init__(self,
-        pool = fluid.layers.pool2d(
+                 in_channels,
-            input=input,
+                 out_channels,
-            pool_size=2,
+                 kernel_size,
-            pool_stride=2,
+                 stride=1,
-            pool_padding=0,
+                 groups=1,
-            pool_type='avg',
+                 act=None,
-            ceil_mode=True)
+                 name=None):
+        super(ConvBNLayerNew, self).__init__()
-        conv = fluid.layers.conv2d(
+        self.pool = nn.AvgPool2d(
-            input=pool,
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-            num_filters=num_filters,
-            filter_size=filter_size,
+        self.conv = nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
            stride=1,
-            padding=(filter_size - 1) // 2,
+            padding=(kernel_size - 1) // 2,
            groups=groups,
-            act=None,
+            weight_attr=ParamAttr(name=name + "_weights"),
-            param_attr=ParamAttr(name=name + "_weights"),
            bias_attr=False)
        if name == "conv1":
            bn_name = "bn_" + name
        else:
            bn_name = "bn" + name[3:]
-        return fluid.layers.batch_norm(
+        self.bn = nn.BatchNorm(
-            input=conv,
+            num_channels=out_channels,
            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
+            param_attr=ParamAttr(name=bn_name + "_scale"),
-            bias_attr=ParamAttr(bn_name + '_offset'),
+            bias_attr=ParamAttr(name=bn_name + "_offset"),
-            moving_mean_name=bn_name + '_mean',
+            moving_mean_name=bn_name + "_mean",
-            moving_variance_name=bn_name + '_variance')
+            moving_variance_name=bn_name + "_variance")
-    def shortcut(self, input, ch_out, stride, name, if_first=False):
+    def __call__(self, x):
-        ch_in = input.shape[1]
+        x = self.pool(x)
-        if ch_in != ch_out or stride != 1:
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+class ShortCut(nn.Layer):
+    def __init__(self, in_channels, out_channels, stride, name, if_first=False):
+        super(ShortCut, self).__init__()
+        self.use_conv = True
+        if in_channels != out_channels or stride != 1:
            if if_first:
-                return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+                self.conv = ConvBNLayer(
+                    in_channels, out_channels, 1, stride, name=name)
            else:
-                return self.conv_bn_layer_new(
+                self.conv = ConvBNLayerNew(
-                    input, ch_out, 1, stride, name=name)
+                    in_channels, out_channels, 1, stride, name=name)
        elif if_first:
-            return self.conv_bn_layer(input, ch_out, 1, stride, name=name)
+            self.conv = ConvBNLayer(
+                in_channels, out_channels, 1, stride, name=name)
        else:
-            return input
+            self.use_conv = False
+    def forward(self, x):
+        if self.use_conv:
+            x = self.conv(x)
+        return x
-    def bottleneck_block(self, input, num_filters, stride, name, if_first):
-        conv0 = self.conv_bn_layer(
+class BottleneckBlock(nn.Layer):
-            input=input,
+    def __init__(self, in_channels, out_channels, stride, name, if_first):
-            num_filters=num_filters,
+        super(BottleneckBlock, self).__init__()
-            filter_size=1,
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
            act='relu',
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
+        self.conv1 = ConvBNLayer(
-            input=conv0,
+            in_channels=out_channels,
-            num_filters=num_filters,
+            out_channels=out_channels,
-            filter_size=3,
+            kernel_size=3,
            stride=stride,
            act='relu',
            name=name + "_branch2b")
-        conv2 = self.conv_bn_layer(
+        self.conv2 = ConvBNLayer(
-            input=conv1,
+            in_channels=out_channels,
-            num_filters=num_filters * 4,
+            out_channels=out_channels * 4,
-            filter_size=1,
+            kernel_size=1,
            act=None,
            name=name + "_branch2c")
-        short = self.shortcut(
+        self.short = ShortCut(
-            input,
+            in_channels=in_channels,
-            num_filters * 4,
+            out_channels=out_channels * 4,
-            stride,
+            stride=stride,
            if_first=if_first,
            name=name + "_branch1")
+        self.out_channels = out_channels * 4
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = self.conv2(y)
+        y = y + self.short(x)
+        y = F.relu(y)
+        return y
-        return fluid.layers.elementwise_add(x=short, y=conv2, act='relu')
-    def basic_block(self, input, num_filters, stride, name, if_first):
+class BasicBlock(nn.Layer):
-        conv0 = self.conv_bn_layer(
+    def __init__(self, in_channels, out_channels, stride, name, if_first):
-            input=input,
+        super(BasicBlock, self).__init__()
-            num_filters=num_filters,
+        self.conv0 = ConvBNLayer(
-            filter_size=3,
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
            act='relu',
            stride=stride,
            name=name + "_branch2a")
-        conv1 = self.conv_bn_layer(
+        self.conv1 = ConvBNLayer(
-            input=conv0,
+            in_channels=out_channels,
-            num_filters=num_filters,
+            out_channels=out_channels,
-            filter_size=3,
+            kernel_size=3,
            act=None,
            name=name + "_branch2b")
-        short = self.shortcut(
+        self.short = ShortCut(
-            input,
+            in_channels=in_channels,
-            num_filters,
+            out_channels=out_channels,
-            stride,
+            stride=stride,
            if_first=if_first,
            name=name + "_branch1")
-        return fluid.layers.elementwise_add(x=short, y=conv1, act='relu')
+        self.out_channels = out_channels
+    def forward(self, x):
+        y = self.conv0(x)
+        y = self.conv1(y)
+        y = y + self.short(x)
+        return F.relu(y)
+if __name__ == '__main__':
+    import paddle
+    paddle.disable_static()
+    x = paddle.zeros([1, 3, 640, 640])
+    x = paddle.to_variable(x)
+    print(x.shape)
+    net = ResNet(layers=18)
+    y = net(x)
+    for stage in y:
+        print(stage.shape)
+    # paddle.save(net.state_dict(),'1.pth')