dygraph first commit

aad3093a · WenmuZhou · 10f7e519 · 10f7e519 · aad3093a · 10f7e519
Commit aad3093a authored Oct 13, 2020 by WenmuZhou
20 changed files
--- a/ppocr/modeling/losses/rec_attention_loss.py
+++ b/ppocr/modeling/losses/rec_attention_loss.py
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import paddle
-import paddle.fluid as fluid
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-class AttentionLoss(object):
-    def __init__(self, params):
-        super(AttentionLoss, self).__init__()
-        self.char_num = params['char_num']
-    def __call__(self, predicts, labels):
-        predict = predicts['predict']
-        label_out = labels['label_out']
-        label_out = fluid.layers.cast(x=label_out, dtype='int64')
-        cost = fluid.layers.cross_entropy(input=predict, label=label_out)
-        sum_cost = fluid.layers.reduce_sum(cost)
-        return sum_cost
--- a/ppocr/modeling/losses/rec_ctc_loss.py
+++ b/ppocr/modeling/losses/rec_ctc_loss.py
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
 #
-#Licensed under the Apache License, Version 2.0 (the "License");
+# Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
+# you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
+# You may obtain a copy of the License at
 #
 #    http://www.apache.org/licenses/LICENSE-2.0
 #
-#Unless required by applicable law or agreed to in writing, software
+# Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
+# See the License for the specific language governing permissions and
-#limitations under the License.
+# limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import math
 import paddle
-import paddle.fluid as fluid
+from paddle import nn
-class CTCLoss(object):
+class CTCLoss(nn.Layer):
-    def __init__(self, params):
+    def __init__(self, **kwargs):
        super(CTCLoss, self).__init__()
-        self.char_num = params['char_num']
+        self.loss_func = nn.CTCLoss(blank=0, reduction='none')
-    def __call__(self, predicts, labels):
+    def __call__(self, predicts, batch):
-        predict = predicts['predict']
+        predicts = predicts.transpose((1, 0, 2))
-        label = labels['label']
+        N, B, _ = predicts.shape
-        cost = fluid.layers.warpctc(
+        preds_lengths = paddle.to_tensor([N] * B, dtype='int64')
-            input=predict, label=label, blank=self.char_num, norm_by_times=True)
+        labels = batch[1].astype("int32")
-        sum_cost = fluid.layers.reduce_sum(cost)
+        label_lengths = batch[2].astype('int64')
-        return sum_cost
+        loss = self.loss_func(predicts, labels, preds_lengths, label_lengths)
+        loss = loss.mean()
+        return {'loss': loss}
--- a/ppocr/modeling/losses/rec_srn_loss.py
+++ b/ppocr/modeling/losses/rec_srn_loss.py
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import paddle
-import paddle.fluid as fluid
-class SRNLoss(object):
-    def __init__(self, params):
-        super(SRNLoss, self).__init__()
-        self.char_num = params['char_num']
-    def __call__(self, predicts, others):
-        predict = predicts['predict']
-        word_predict = predicts['word_out']
-        gsrm_predict = predicts['gsrm_out']
-        label = others['label']
-        lbl_weight = others['lbl_weight']
-        casted_label = fluid.layers.cast(x=label, dtype='int64')
-        cost_word = fluid.layers.cross_entropy(
-            input=word_predict, label=casted_label)
-        cost_gsrm = fluid.layers.cross_entropy(
-            input=gsrm_predict, label=casted_label)
-        cost_vsfd = fluid.layers.cross_entropy(
-            input=predict, label=casted_label)
-        cost_word = fluid.layers.reshape(
-            x=fluid.layers.reduce_sum(cost_word), shape=[1])
-        cost_gsrm = fluid.layers.reshape(
-            x=fluid.layers.reduce_sum(cost_gsrm), shape=[1])
-        cost_vsfd = fluid.layers.reshape(
-            x=fluid.layers.reduce_sum(cost_vsfd), shape=[1])
-        sum_cost = fluid.layers.sum(
-            [cost_word, cost_vsfd * 2.0, cost_gsrm * 0.15])
-        return [sum_cost, cost_vsfd, cost_word]
--- a/tools/eval_utils/__init__.py
+++ b/tools/eval_utils/__init__.py
@@ -11,3 +11,17 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+__all__ = ['build_neck']
+def build_neck(config):
+    from .fpn import FPN
+    from .rnn import SequenceEncoder
+    support_dict = ['FPN', 'SequenceEncoder']
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception('neck only support {}'.format(
+        support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/modeling/necks/fpn.py
+++ b/ppocr/modeling/necks/fpn.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+class FPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(FPN, self).__init__()
+        self.out_channels = out_channels
+        weight_attr = paddle.nn.initializer.MSRA(uniform=False)
+        self.in2_conv = nn.Conv2d(
+            in_channels=in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_51.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.in3_conv = nn.Conv2d(
+            in_channels=in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_50.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.in4_conv = nn.Conv2d(
+            in_channels=in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_49.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.in5_conv = nn.Conv2d(
+            in_channels=in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=1,
+            weight_attr=ParamAttr(
+                name='conv2d_48.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p5_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_52.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p4_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_53.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p3_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_54.w_0', initializer=weight_attr),
+            bias_attr=False)
+        self.p2_conv = nn.Conv2d(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels // 4,
+            kernel_size=3,
+            padding=1,
+            weight_attr=ParamAttr(
+                name='conv2d_55.w_0', initializer=weight_attr),
+            bias_attr=False)
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+        in5 = self.in5_conv(c5)
+        in4 = self.in4_conv(c4)
+        in3 = self.in3_conv(c3)
+        in2 = self.in2_conv(c2)
+        out4 = in4 + F.resize_nearest(in5, scale=2)  # 1/16
+        out3 = in3 + F.resize_nearest(out4, scale=2)  # 1/8
+        out2 = in2 + F.resize_nearest(out3, scale=2)  # 1/4
+        p5 = self.p5_conv(in5)
+        p4 = self.p4_conv(out4)
+        p3 = self.p3_conv(out3)
+        p2 = self.p2_conv(out2)
+        p5 = F.resize_nearest(p5, scale=8)
+        p4 = F.resize_nearest(p4, scale=4)
+        p3 = F.resize_nearest(p3, scale=2)
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse
--- a/ppocr/modeling/necks/rnn.py
+++ b/ppocr/modeling/necks/rnn.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from paddle import nn
+from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
+class EncoderWithReshape(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+    def forward(self, x):
+        B, C, H, W = x.shape
+        x = x.reshape((B, C, -1))
+        x = x.transpose([0, 2, 1])  # (NTC)(batch, width, channels)
+        return x
+class Im2Seq(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super().__init__()
+        self.out_channels = in_channels
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H == 1
+        x = x.transpose((0, 2, 3, 1))
+        x = x.reshape((-1, C))
+        return x
+class EncoderWithRNN(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithRNN, self).__init__()
+        self.out_channels = hidden_size * 2
+        # self.lstm1_fw = nn.LSTMCell(
+        #     in_channels,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st1_fc1_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st1_fc1_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st1_out1_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st1_out1_b'),
+        # )
+        # self.lstm1_bw = nn.LSTMCell(
+        #     in_channels,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st1_fc2_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st1_fc2_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st1_out2_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st1_out2_b'),
+        # )
+        # self.lstm2_fw = nn.LSTMCell(
+        #     hidden_size,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st2_fc1_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st2_fc1_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st2_out1_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st2_out1_b'),
+        # )
+        # self.lstm2_bw = nn.LSTMCell(
+        #     hidden_size,
+        #     hidden_size,
+        #     weight_ih_attr=ParamAttr(name='lstm_st2_fc2_w'),
+        #     bias_ih_attr=ParamAttr(name='lstm_st2_fc2_b'),
+        #     weight_hh_attr=ParamAttr(name='lstm_st2_out2_w'),
+        #     bias_hh_attr=ParamAttr(name='lstm_st2_out2_b'),
+        # )
+        self.lstm = nn.LSTM(
+            in_channels, hidden_size, direction='bidirectional', num_layers=2)
+    def forward(self, x):
+        # fw_x, _ = self.lstm1_fw(x)
+        # fw_x, _ = self.lstm2_fw(fw_x)
+        #
+        # # bw
+        # bw_x, _ = self.lstm1_bw(x)
+        # bw_x, _ = self.lstm2_bw(bw_x)
+        # x = paddle.concat([fw_x, bw_x], axis=2)
+        x, _ = self.lstm(x)
+        return x
+class EncoderWithFC(nn.Layer):
+    def __init__(self, in_channels, hidden_size):
+        super(EncoderWithFC, self).__init__()
+        self.out_channels = hidden_size
+        weight_attr, bias_attr = get_para_bias_attr(
+            l2_decay=0.00001, k=in_channels, name='reduce_encoder_fea')
+        self.fc = nn.Linear(
+            in_channels,
+            hidden_size,
+            weight_attr=weight_attr,
+            bias_attr=bias_attr,
+            name='reduce_encoder_fea')
+    def forward(self, x):
+        x = self.fc(x)
+        return x
+class SequenceEncoder(nn.Layer):
+    def __init__(self, in_channels, encoder_type, hidden_size, **kwargs):
+        super(SequenceEncoder, self).__init__()
+        self.encoder_reshape = EncoderWithReshape(in_channels)
+        self.out_channels = self.encoder_reshape.out_channels
+        if encoder_type == 'reshape':
+            self.only_reshape = True
+        else:
+            support_encoder_dict = {
+                'reshape': EncoderWithReshape,
+                'fc': EncoderWithFC,
+                'rnn': EncoderWithRNN
+            }
+            assert encoder_type in support_encoder_dict, '{} must in {}'.format(
+                encoder_type, support_encoder_dict.keys())
+            self.encoder = support_encoder_dict[encoder_type](
+                self.encoder_reshape.out_channels, hidden_size)
+            self.out_channels = self.encoder.out_channels
+            self.only_reshape = False
+    def forward(self, x):
+        x = self.encoder_reshape(x)
+        if not self.only_reshape:
+            x = self.encoder(x)
+        return x
--- a/ppocr/modeling/stns/tps.py
+++ b/ppocr/modeling/stns/tps.py
-#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import paddle.fluid as fluid
-import paddle.fluid.layers as layers
-from paddle.fluid.param_attr import ParamAttr
-import numpy as np
-class LocalizationNetwork(object):
-    def __init__(self, params):
-        super(LocalizationNetwork, self).__init__()
-        self.F = params['num_fiducial']
-        self.loc_lr = params['loc_lr']
-        self.model_name = params['model_name']
-    def conv_bn_layer(self,
-                      input,
-                      num_filters,
-                      filter_size,
-                      stride=1,
-                      groups=1,
-                      act=None,
-                      name=None):
-        conv = layers.conv2d(
-            input=input,
-            num_filters=num_filters,
-            filter_size=filter_size,
-            stride=stride,
-            padding=(filter_size - 1) // 2,
-            groups=groups,
-            act=None,
-            param_attr=ParamAttr(name=name + "_weights"),
-            bias_attr=False)
-        bn_name = "bn_" + name
-        return layers.batch_norm(
-            input=conv,
-            act=act,
-            param_attr=ParamAttr(name=bn_name + '_scale'),
-            bias_attr=ParamAttr(bn_name + '_offset'),
-            moving_mean_name=bn_name + '_mean',
-            moving_variance_name=bn_name + '_variance')
-    def get_initial_fiducials(self):
-        """ see RARE paper Fig. 6 (a) """
-        F = self.F
-        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
-        ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
-        ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
-        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
-        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
-        initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
-        return initial_bias
-    def __call__(self, image):
-        F = self.F
-        loc_lr = self.loc_lr
-        if self.model_name == "large":
-            num_filters_list = [64, 128, 256, 512]
-            fc_dim = 256
-        else:
-            num_filters_list = [16, 32, 64, 128]
-            fc_dim = 64
-        for fno in range(len(num_filters_list)):
-            num_filters = num_filters_list[fno]
-            name = "loc_conv%d" % fno
-            if fno == 0:
-                conv = self.conv_bn_layer(
-                    image, num_filters, 3, act='relu', name=name)
-            else:
-                conv = self.conv_bn_layer(
-                    pool, num_filters, 3, act='relu', name=name)
-            if fno == len(num_filters_list) - 1:
-                pool = layers.adaptive_pool2d(
-                    input=conv, pool_size=[1, 1], pool_type='avg')
-            else:
-                pool = layers.pool2d(
-                    input=conv,
-                    pool_size=2,
-                    pool_stride=2,
-                    pool_padding=0,
-                    pool_type='max')
-        name = "loc_fc1"
-        stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
-        fc1 = layers.fc(input=pool,
-                        size=fc_dim,
-                        param_attr=fluid.param_attr.ParamAttr(
-                            learning_rate=loc_lr,
-                            initializer=fluid.initializer.Uniform(-stdv, stdv),
-                            name=name + "_w"),
-                        act='relu',
-                        name=name)
-        initial_bias = self.get_initial_fiducials()
-        initial_bias = initial_bias.reshape(-1)
-        name = "loc_fc2"
-        param_attr = fluid.param_attr.ParamAttr(
-            learning_rate=loc_lr,
-            initializer=fluid.initializer.NumpyArrayInitializer(
-                np.zeros([fc_dim, F * 2])),
-            name=name + "_w")
-        bias_attr = fluid.param_attr.ParamAttr(
-            learning_rate=loc_lr,
-            initializer=fluid.initializer.NumpyArrayInitializer(initial_bias),
-            name=name + "_b")
-        fc2 = layers.fc(input=fc1,
-                        size=F * 2,
-                        param_attr=param_attr,
-                        bias_attr=bias_attr,
-                        name=name)
-        batch_C_prime = layers.reshape(x=fc2, shape=[-1, F, 2], inplace=False)
-        return batch_C_prime
-class GridGenerator(object):
-    def __init__(self, params):
-        super(GridGenerator, self).__init__()
-        self.eps = 1e-6
-        self.F = params['num_fiducial']
-    def build_C(self):
-        """ Return coordinates of fiducial points in I_r; C """
-        F = self.F
-        ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
-        ctrl_pts_y_top = -1 * np.ones(int(F / 2))
-        ctrl_pts_y_bottom = np.ones(int(F / 2))
-        ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
-        ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
-        C = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
-        return C  # F x 2
-    def build_P(self, I_r_size):
-        I_r_width, I_r_height = I_r_size
-        I_r_grid_x = (np.arange(-I_r_width, I_r_width, 2) + 1.0)\
-            / I_r_width  # self.I_r_width
-        I_r_grid_y = (np.arange(-I_r_height, I_r_height, 2) + 1.0)\
-            / I_r_height  # self.I_r_height
-        # P: self.I_r_width x self.I_r_height x 2
-        P = np.stack(np.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
-        # n (= self.I_r_width x self.I_r_height) x 2
-        return P.reshape([-1, 2])
-    def build_inv_delta_C(self, C):
-        """ Return inv_delta_C which is needed to calculate T """
-        F = self.F
-        hat_C = np.zeros((F, F), dtype=float)  # F x F
-        for i in range(0, F):
-            for j in range(i, F):
-                r = np.linalg.norm(C[i] - C[j])
-                hat_C[i, j] = r
-                hat_C[j, i] = r
-        np.fill_diagonal(hat_C, 1)
-        hat_C = (hat_C**2) * np.log(hat_C)
-        # print(C.shape, hat_C.shape)
-        delta_C = np.concatenate(  # F+3 x F+3
-            [
-                np.concatenate(
-                    [np.ones((F, 1)), C, hat_C], axis=1),  # F x F+3
-                np.concatenate(
-                    [np.zeros((2, 3)), np.transpose(C)], axis=1),  # 2 x F+3
-                np.concatenate(
-                    [np.zeros((1, 3)), np.ones((1, F))], axis=1)  # 1 x F+3
-            ],
-            axis=0)
-        inv_delta_C = np.linalg.inv(delta_C)
-        return inv_delta_C  # F+3 x F+3
-    def build_P_hat(self, C, P):
-        F = self.F
-        eps = self.eps
-        n = P.shape[0]  # n (= self.I_r_width x self.I_r_height)
-        #P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
-        P_tile = np.tile(np.expand_dims(P, axis=1), (1, F, 1))
-        C_tile = np.expand_dims(C, axis=0)  # 1 x F x 2
-        P_diff = P_tile - C_tile  # n x F x 2
-        #rbf_norm: n x F
-        rbf_norm = np.linalg.norm(P_diff, ord=2, axis=2, keepdims=False)
-        #rbf: n x F
-        rbf = np.multiply(np.square(rbf_norm), np.log(rbf_norm + eps))
-        P_hat = np.concatenate([np.ones((n, 1)), P, rbf], axis=1)
-        return P_hat  # n x F+3
-    def get_expand_tensor(self, batch_C_prime):
-        name = "ex_fc"
-        initializer = fluid.initializer.ConstantInitializer(value=0.0)
-        param_attr = fluid.param_attr.ParamAttr(
-            learning_rate=0.0, initializer=initializer, name=name + "_w")
-        bias_attr = fluid.param_attr.ParamAttr(
-            learning_rate=0.0, initializer=initializer, name=name + "_b")
-        batch_C_ex_part_tensor = fluid.layers.fc(input=batch_C_prime,
-                                                 size=6,
-                                                 param_attr=param_attr,
-                                                 bias_attr=bias_attr,
-                                                 name=name)
-        batch_C_ex_part_tensor = fluid.layers.reshape(
-            x=batch_C_ex_part_tensor, shape=[-1, 3, 2])
-        return batch_C_ex_part_tensor
-    def __call__(self, batch_C_prime, I_r_size):
-        C = self.build_C()
-        P = self.build_P(I_r_size)
-        inv_delta_C = self.build_inv_delta_C(C).astype('float32')
-        P_hat = self.build_P_hat(C, P).astype('float32')
-        inv_delta_C_tensor = layers.create_tensor(dtype='float32')
-        layers.assign(inv_delta_C, inv_delta_C_tensor)
-        inv_delta_C_tensor.stop_gradient = True
-        P_hat_tensor = layers.create_tensor(dtype='float32')
-        layers.assign(P_hat, P_hat_tensor)
-        P_hat_tensor.stop_gradient = True
-        batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
-        #         batch_C_ex_part_tensor = create_tmp_var(
-        #             fluid.default_main_program(),
-        #             name='batch_C_ex_part_tensor', 
-        #             dtype='float32', shape=[-1, 3, 2])
-        #         layers.py_func(func=get_batch_C_expand, 
-        #             x=[batch_C_prime], out=[batch_C_ex_part_tensor])
-        batch_C_ex_part_tensor.stop_gradient = True
-        batch_C_prime_with_zeros = layers.concat(
-            [batch_C_prime, batch_C_ex_part_tensor], axis=1)
-        batch_T = layers.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
-        batch_P_prime = layers.matmul(P_hat_tensor, batch_T)
-        return batch_P_prime
-class TPS(object):
-    def __init__(self, params):
-        super(TPS, self).__init__()
-        self.loc_net = LocalizationNetwork(params)
-        self.grid_generator = GridGenerator(params)
-    def __call__(self, image):
-        batch_C_prime = self.loc_net(image)
-        I_r_size = [image.shape[3], image.shape[2]]
-        batch_P_prime = self.grid_generator(batch_C_prime, I_r_size)
-        batch_P_prime = layers.reshape(
-            x=batch_P_prime, shape=[-1, image.shape[2], image.shape[3], 2])
-        batch_I_r = layers.grid_sampler(x=image, grid=batch_P_prime)
-        image.stop_gradient = False
-        return batch_I_r
--- a/ppocr/modeling/stns/__init__.py
+++ b/ppocr/modeling/stns/__init__.py
@@ -11,3 +11,15 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+__all__ = ['build_transform']
+def build_transform(config):
+    support_dict = ['']
+    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
+        'transform only support {}'.format(support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/optimizer.py
+++ b/ppocr/optimizer.py
-#copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-#Licensed under the Apache License, Version 2.0 (the "License");
-#you may not use this file except in compliance with the License.
-#You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-#Unless required by applicable law or agreed to in writing, software
-#distributed under the License is distributed on an "AS IS" BASIS,
-#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-#See the License for the specific language governing permissions and
-#limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import math
-import paddle.fluid as fluid
-from paddle.fluid.regularizer import L2Decay
-from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
-import paddle.fluid.layers.ops as ops
-from ppocr.utils.utility import initial_logger
-logger = initial_logger()
-def cosine_decay_with_warmup(learning_rate,
-                             step_each_epoch,
-                             epochs=500,
-                             warmup_minibatch=1000):
-    """Applies cosine decay to the learning rate.
-    lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1)
-    decrease lr for every mini-batch and start with warmup.
-    """
-    global_step = _decay_step_counter()
-    lr = fluid.layers.tensor.create_global_var(
-        shape=[1],
-        value=0.0,
-        dtype='float32',
-        persistable=True,
-        name="learning_rate")
-    warmup_minibatch = fluid.layers.fill_constant(
-        shape=[1],
-        dtype='float32',
-        value=float(warmup_minibatch),
-        force_cpu=True)
-    with fluid.layers.control_flow.Switch() as switch:
-        with switch.case(global_step < warmup_minibatch):
-            decayed_lr = learning_rate * (1.0 * global_step / warmup_minibatch)
-            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-        with switch.default():
-            decayed_lr = learning_rate * \
-                (ops.cos((global_step - warmup_minibatch) * (math.pi / (epochs * step_each_epoch))) + 1)/2
-            fluid.layers.tensor.assign(input=decayed_lr, output=lr)
-    return lr
-def AdamDecay(params, parameter_list=None):
-    """
-    define optimizer function
-    args:
-        params(dict): the super parameters
-        parameter_list (list): list of Variable names to update to minimize loss
-    return:
-    """
-    base_lr = params['base_lr']
-    beta1 = params['beta1']
-    beta2 = params['beta2']
-    l2_decay = params.get("l2_decay", 0.0)
-    if 'decay' in params:
-        supported_decay_mode = [
-            "cosine_decay", "cosine_decay_warmup", "piecewise_decay"
-        ]
-        params = params['decay']
-        decay_mode = params['function']
-        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
-            supported_decay_mode, decay_mode)
-        if decay_mode == "cosine_decay":
-            step_each_epoch = params['step_each_epoch']
-            total_epoch = params['total_epoch']
-            base_lr = fluid.layers.cosine_decay(
-                learning_rate=base_lr,
-                step_each_epoch=step_each_epoch,
-                epochs=total_epoch)
-        elif decay_mode == "cosine_decay_warmup":
-            step_each_epoch = params['step_each_epoch']
-            total_epoch = params['total_epoch']
-            warmup_minibatch = params.get("warmup_minibatch", 1000)
-            base_lr = cosine_decay_with_warmup(
-                learning_rate=base_lr,
-                step_each_epoch=step_each_epoch,
-                epochs=total_epoch,
-                warmup_minibatch=warmup_minibatch)
-        elif decay_mode == "piecewise_decay":
-            boundaries = params["boundaries"]
-            decay_rate = params["decay_rate"]
-            values = [
-                base_lr * decay_rate**idx
-                for idx in range(len(boundaries) + 1)
-            ]
-            base_lr = fluid.layers.piecewise_decay(boundaries, values)
-    optimizer = fluid.optimizer.Adam(
-        learning_rate=base_lr,
-        beta1=beta1,
-        beta2=beta2,
-        regularization=L2Decay(regularization_coeff=l2_decay),
-        parameter_list=parameter_list)
-    return optimizer
-def RMSProp(params, parameter_list=None):
-    """
-    define optimizer function
-    args:
-        params(dict): the super parameters
-        parameter_list (list): list of Variable names to update to minimize loss
-    return:
-    """
-    base_lr = params.get("base_lr", 0.001)
-    l2_decay = params.get("l2_decay", 0.00005)
-    if 'decay' in params:
-        supported_decay_mode = ["cosine_decay", "piecewise_decay"]
-        params = params['decay']
-        decay_mode = params['function']
-        assert decay_mode in supported_decay_mode, "Supported decay mode is {}, but got {}".format(
-            supported_decay_mode, decay_mode)
-        if decay_mode == "cosine_decay":
-            step_each_epoch = params['step_each_epoch']
-            total_epoch = params['total_epoch']
-            base_lr = fluid.layers.cosine_decay(
-                learning_rate=base_lr,
-                step_each_epoch=step_each_epoch,
-                epochs=total_epoch)
-        elif decay_mode == "piecewise_decay":
-            boundaries = params["boundaries"]
-            decay_rate = params["decay_rate"]
-            values = [
-                base_lr * decay_rate**idx
-                for idx in range(len(boundaries) + 1)
-            ]
-            base_lr = fluid.layers.piecewise_decay(boundaries, values)
-    optimizer = fluid.optimizer.RMSProp(
-        learning_rate=base_lr,
-        regularization=fluid.regularizer.L2Decay(regularization_coeff=l2_decay))
-    return optimizer
--- a/ppocr/optimizer/__init__.py
+++ b/ppocr/optimizer/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import copy
+__all__ = ['build_optimizer']
+def build_lr_scheduler(lr_config, epochs, step_each_epoch):
+    from . import learning_rate
+    lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
+    if 'name' in lr_config:
+        lr_name = lr_config.pop('name')
+        lr = getattr(learning_rate, lr_name)(**lr_config)()
+    else:
+        lr = lr_config['lr']
+    return lr
+def build_optimizer(config, epochs, step_each_epoch, parameters):
+    from . import regularizer, optimizer
+    config = copy.deepcopy(config)
+    # step1 build lr
+    lr = build_lr_scheduler(
+        config.pop('learning_rate'), epochs, step_each_epoch)
+    # step2 build regularization
+    if 'regularizer' in config and config['regularizer'] is not None:
+        reg_config = config.pop('regularizer')
+        reg_name = reg_config.pop('name') + 'Decay'
+        reg = getattr(regularizer, reg_name)(**reg_config)()
+    else:
+        reg = None
+    # step3 build optimizer
+    optim_name = config.pop('name')
+    optim = getattr(optimizer, optim_name)(learning_rate=lr,
+                                           regularization=reg,
+                                           **config)
+    return optim(parameters), lr
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from paddle.optimizer import lr_scheduler
+class Linear(object):
+    """
+    Linear learning rate decay
+    Args:
+        lr (float): The initial learning rate. It is a python float number.
+        epochs(int): The decay step size. It determines the decay cycle.
+        end_lr(float, optional): The minimum final learning rate. Default: 0.0001.
+        power(float, optional): Power of polynomial. Default: 1.0.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+    def __init__(self,
+                 lr,
+                 epochs,
+                 step_each_epoch,
+                 end_lr=0.0,
+                 power=1.0,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Linear, self).__init__()
+        self.lr = lr
+        self.epochs = epochs * step_each_epoch
+        self.end_lr = end_lr
+        self.power = power
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+    def __call__(self):
+        learning_rate = lr_scheduler.PolynomialLR(
+            learning_rate=self.lr,
+            decay_steps=self.epochs,
+            end_lr=self.end_lr,
+            power=self.power,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+class Cosine(object):
+    """
+    Cosine learning rate decay
+    lr = 0.05 * (math.cos(epoch * (math.pi / epochs)) + 1)
+    Args:
+        lr(float): initial learning rate
+        step_each_epoch(int): steps each epoch
+        epochs(int): total training epochs
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+    def __init__(self,
+                 lr,
+                 step_each_epoch,
+                 epochs,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Cosine, self).__init__()
+        self.lr = lr
+        self.T_max = step_each_epoch * epochs
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+    def __call__(self):
+        learning_rate = lr_scheduler.CosineAnnealingLR(
+            learning_rate=self.lr, T_max=self.T_max, last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+class Step(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        step_each_epoch(int): steps each epoch
+        learning_rate (float): The initial learning rate. It is a python float number.
+        step_size (int): the interval to update.
+        gamma (float, optional): The Ratio that the learning rate will be reduced. ``new_lr = origin_lr * gamma`` .
+            It should be less than 1.0. Default: 0.1.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+    def __init__(self,
+                 lr,
+                 step_size,
+                 step_each_epoch,
+                 gamma,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Step, self).__init__()
+        self.step_size = step_each_epoch * step_size
+        self.lr = lr
+        self.gamma = gamma
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+    def __call__(self):
+        learning_rate = lr_scheduler.StepLR(
+            learning_rate=self.lr,
+            step_size=self.step_size,
+            gamma=self.gamma,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.lr,
+                last_epoch=self.last_epoch)
+        return learning_rate
+class Piecewise(object):
+    """
+    Piecewise learning rate decay
+    Args:
+        boundaries(list): A list of steps numbers. The type of element in the list is python int.
+        values(list): A list of learning rate values that will be picked during different epoch boundaries.
+            The type of element in the list is python float.
+        last_epoch (int, optional):  The index of last epoch. Can be set to restart training. Default: -1, means initial learning rate.
+    """
+    def __init__(self,
+                 step_each_epoch,
+                 decay_epochs,
+                 values,
+                 warmup_epoch=0,
+                 last_epoch=-1,
+                 **kwargs):
+        super(Piecewise, self).__init__()
+        self.boundaries = [step_each_epoch * e for e in decay_epochs]
+        self.values = values
+        self.last_epoch = last_epoch
+        self.warmup_epoch = warmup_epoch * step_each_epoch
+    def __call__(self):
+        learning_rate = lr_scheduler.PiecewiseLR(
+            boundaries=self.boundaries,
+            values=self.values,
+            last_epoch=self.last_epoch)
+        if self.warmup_epoch > 0:
+            learning_rate = lr_scheduler.LinearLrWarmup(
+                learning_rate=learning_rate,
+                warmup_steps=self.warmup_epoch,
+                start_lr=0.0,
+                end_lr=self.values[0],
+                last_epoch=self.last_epoch)
+        return learning_rate
--- a/ppocr/optimizer/optimizer.py
+++ b/ppocr/optimizer/optimizer.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from paddle import optimizer as optim
+class Momentum(object):
+    """
+    Simple Momentum optimizer with velocity state.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+    def __init__(self, learning_rate, momentum, weight_decay=None, **args):
+        super(Momentum, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+    def __call__(self, parameters):
+        opt = optim.Momentum(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            parameters=self.weight_decay,
+            weight_decay=parameters)
+        return opt
+class Adam(object):
+    def __init__(self,
+                 learning_rate=0.001,
+                 beta1=0.9,
+                 beta2=0.999,
+                 epsilon=1e-08,
+                 parameter_list=None,
+                 weight_decay=None,
+                 grad_clip=None,
+                 name=None,
+                 lazy_mode=False,
+                 **kwargs):
+        self.learning_rate = learning_rate
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.parameter_list = parameter_list
+        self.learning_rate = learning_rate
+        self.weight_decay = weight_decay
+        self.grad_clip = grad_clip
+        self.name = name
+        self.lazy_mode = lazy_mode
+    def __call__(self, parameters):
+        opt = optim.Adam(
+            learning_rate=self.learning_rate,
+            beta1=self.beta1,
+            beta2=self.beta2,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            grad_clip=self.grad_clip,
+            name=self.name,
+            lazy_mode=self.lazy_mode,
+            parameters=parameters)
+        return opt
+class RMSProp(object):
+    """
+    Root Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning rate method.
+    Args:
+        learning_rate (float|Variable) - The learning rate used to update parameters.
+            Can be a float value or a Variable with one float value as data element.
+        momentum (float) - Momentum factor.
+        rho (float) - rho value in equation.
+        epsilon (float) - avoid division by zero, default is 1e-6.
+        regularization (WeightDecayRegularizer, optional) - The strategy of regularization.
+    """
+    def __init__(self,
+                 learning_rate,
+                 momentum,
+                 rho=0.95,
+                 epsilon=1e-6,
+                 weight_decay=None,
+                 **args):
+        super(RMSProp, self).__init__()
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.rho = rho
+        self.epsilon = epsilon
+        self.weight_decay = weight_decay
+    def __call__(self, parameters):
+        opt = optim.RMSProp(
+            learning_rate=self.learning_rate,
+            momentum=self.momentum,
+            rho=self.rho,
+            epsilon=self.epsilon,
+            weight_decay=self.weight_decay,
+            parameters=parameters)
+        return opt
--- a/ppocr/optimizer/regularizer.py
+++ b/ppocr/optimizer/regularizer.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+from paddle import fluid
+class L1Decay(object):
+    """
+    L1 Weight Decay Regularization, which encourages the weights to be sparse.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+    def __init__(self, factor=0.0):
+        super(L1Decay, self).__init__()
+        self.regularization_coeff = factor
+    def __call__(self):
+        reg = fluid.regularizer.L1Decay(
+            regularization_coeff=self.regularization_coeff)
+        return reg
+class L2Decay(object):
+    """
+    L2 Weight Decay Regularization, which encourages the weights to be sparse.
+    Args:
+        factor(float): regularization coeff. Default:0.0.
+    """
+    def __init__(self, factor=0.0):
+        super(L2Decay, self).__init__()
+        self.regularization_coeff = factor
+    def __call__(self):
+        reg = fluid.regularizer.L2Decay(
+            regularization_coeff=self.regularization_coeff)
+        return reg
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+import copy
+__all__ = ['build_post_process']
+def build_post_process(config, global_config=None):
+    from .db_postprocess import DBPostProcess
+    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
+    support_dict = ['DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode']
+    config = copy.deepcopy(config)
+    module_name = config.pop('name')
+    if global_config is not None:
+        config.update(global_config)
+    assert module_name in support_dict, Exception(
+        'post process only support {}'.format(support_dict))
+    module_class = eval(module_name)(**config)
+    return module_class
--- a/ppocr/postprocess/db_postprocess.py
+++ b/ppocr/postprocess/db_postprocess.py
@@ -16,11 +16,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import paddle
-import paddle.fluid as fluid
 import numpy as np
-import string
 import cv2
 from shapely.geometry import Polygon
 import pyclipper
@@ -31,11 +27,16 @@ class DBPostProcess(object):
    The post process for Differentiable Binarization (DB).
    """
-    def __init__(self, params):
+    def __init__(self,
-        self.thresh = params['thresh']
+                 thresh=0.3,
-        self.box_thresh = params['box_thresh']
+                 box_thresh=0.7,
-        self.max_candidates = params['max_candidates']
+                 max_candidates=1000,
-        self.unclip_ratio = params['unclip_ratio']
+                 unclip_ratio=2.0,
+                 **kwargs):
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
        self.min_size = 3
    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
@@ -55,9 +56,9 @@ class DBPostProcess(object):
            contours, _ = outs[0], outs[1]
        num_contours = min(len(contours), self.max_candidates)
-        boxes = np.zeros((num_contours, 4, 2), dtype=np.int16)
-        scores = np.zeros((num_contours, ), dtype=np.float32)
+        boxes = []
+        scores = []
        for index in range(num_contours):
            contour = contours[index]
            points, sside = self.get_mini_boxes(contour)
@@ -73,17 +74,14 @@ class DBPostProcess(object):
            if sside < self.min_size + 2:
                continue
            box = np.array(box)
-            if not isinstance(dest_width, int):
-                dest_width = dest_width.item()
-                dest_height = dest_height.item()
            box[:, 0] = np.clip(
                np.round(box[:, 0] / width * dest_width), 0, dest_width)
            box[:, 1] = np.clip(
                np.round(box[:, 1] / height * dest_height), 0, dest_height)
-            boxes[index, :, :] = box.astype(np.int16)
+            boxes.append(box.astype(np.int16))
-            scores[index] = score
+            scores.append(score)
-        return boxes, scores
+        return np.array(boxes, dtype=np.int16), scores
    def unclip(self, box):
        unclip_ratio = self.unclip_ratio
@@ -131,28 +129,15 @@ class DBPostProcess(object):
        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
-    def __call__(self, outs_dict, ratio_list):
+    def __call__(self, pred, shape_list):
-        pred = outs_dict['maps']
+        pred = pred.numpy()[:, 0, :, :]
-        pred = pred[:, 0, :, :]
        segmentation = pred > self.thresh
        boxes_batch = []
        for batch_index in range(pred.shape[0]):
-            height, width = pred.shape[-2:]
+            height, width = shape_list[batch_index]
-            tmp_boxes, tmp_scores = self.boxes_from_bitmap(
+            boxes, scores = self.boxes_from_bitmap(
                pred[batch_index], segmentation[batch_index], width, height)
-            boxes = []
+            boxes_batch.append({'points': boxes})
-            for k in range(len(tmp_boxes)):
-                if tmp_scores[k] > self.box_thresh:
-                    boxes.append(tmp_boxes[k])
-            if len(boxes) > 0:
-                boxes = np.array(boxes)
-                ratio_h, ratio_w = ratio_list[batch_index]
-                boxes[:, :, 0] = boxes[:, :, 0] / ratio_w
-                boxes[:, :, 1] = boxes[:, :, 1] / ratio_h
-            boxes_batch.append(boxes)
        return boxes_batch
--- a/ppocr/postprocess/db_postprocess_torch.py
+++ b/ppocr/postprocess/db_postprocess_torch.py
+import cv2
+import numpy as np
+import pyclipper
+from shapely.geometry import Polygon
+class DBPostProcess():
+    def __init__(self,
+                 thresh=0.3,
+                 box_thresh=0.7,
+                 max_candidates=1000,
+                 unclip_ratio=1.5):
+        self.min_size = 3
+        self.thresh = thresh
+        self.box_thresh = box_thresh
+        self.max_candidates = max_candidates
+        self.unclip_ratio = unclip_ratio
+    def __call__(self, pred, shape_list, is_output_polygon=False):
+        '''
+        batch: (image, polygons, ignore_tags
+        h_w_list: 包含[h,w]的数组
+        pred:
+            binary: text region segmentation map, with shape (N, 1,H, W)
+        '''
+        pred = pred.numpy()[:, 0, :, :]
+        segmentation = self.binarize(pred)
+        batch_out = []
+        for batch_index in range(pred.shape[0]):
+            height, width = shape_list[batch_index]
+            boxes, scores = self.post_p(
+                pred[batch_index],
+                segmentation[batch_index],
+                width,
+                height,
+                is_output_polygon=is_output_polygon)
+            batch_out.append({"points": boxes})
+        return batch_out
+    def binarize(self, pred):
+        return pred > self.thresh
+    def post_p(self,
+               pred,
+               bitmap,
+               dest_width,
+               dest_height,
+               is_output_polygon=True):
+        '''
+        _bitmap: single map with shape (H, W),
+            whose values are binarized as {0, 1}
+        '''
+        height, width = pred.shape
+        boxes = []
+        new_scores = []
+        contours, _ = cv2.findContours((bitmap * 255).astype(np.uint8),
+                                       cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
+        for contour in contours[:self.max_candidates]:
+            epsilon = 0.005 * cv2.arcLength(contour, True)
+            approx = cv2.approxPolyDP(contour, epsilon, True)
+            points = approx.reshape((-1, 2))
+            if points.shape[0] < 4:
+                continue
+            score = self.box_score_fast(pred, points.reshape(-1, 2))
+            if self.box_thresh > score:
+                continue
+            if points.shape[0] > 2:
+                box = self.unclip(points, unclip_ratio=self.unclip_ratio)
+                if len(box) > 1 or len(box) == 0:
+                    continue
+            else:
+                continue
+            four_point_box, sside = self.get_mini_boxes(box.reshape((-1, 1, 2)))
+            if sside < self.min_size + 2:
+                continue
+            if not is_output_polygon:
+                box = np.array(four_point_box)
+            else:
+                box = box.reshape(-1, 2)
+            box[:, 0] = np.clip(
+                np.round(box[:, 0] / width * dest_width), 0, dest_width)
+            box[:, 1] = np.clip(
+                np.round(box[:, 1] / height * dest_height), 0, dest_height)
+            boxes.append(box)
+            new_scores.append(score)
+        return boxes, new_scores
+    def unclip(self, box, unclip_ratio=1.5):
+        poly = Polygon(box)
+        distance = poly.area * unclip_ratio / poly.length
+        offset = pyclipper.PyclipperOffset()
+        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
+        expanded = np.array(offset.Execute(distance))
+        return expanded
+    def get_mini_boxes(self, contour):
+        bounding_box = cv2.minAreaRect(contour)
+        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
+        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
+        if points[1][1] > points[0][1]:
+            index_1 = 0
+            index_4 = 1
+        else:
+            index_1 = 1
+            index_4 = 0
+        if points[3][1] > points[2][1]:
+            index_2 = 2
+            index_3 = 3
+        else:
+            index_2 = 3
+            index_3 = 2
+        box = [
+            points[index_1], points[index_2], points[index_3], points[index_4]
+        ]
+        return box, min(bounding_box[1])
+    def box_score_fast(self, bitmap, _box):
+        h, w = bitmap.shape[:2]
+        box = _box.copy()
+        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int), 0, w - 1)
+        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int), 0, w - 1)
+        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int), 0, h - 1)
+        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int), 0, h - 1)
+        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
+        box[:, 0] = box[:, 0] - xmin
+        box[:, 1] = box[:, 1] - ymin
+        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
+        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import numpy as np
-from .locality_aware_nms import nms_locality
-import cv2
-import os
-import sys
-__dir__ = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(__dir__)
-sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
-class EASTPostPocess(object):
-    """
-    The post process for EAST.
-    """
-    def __init__(self, params):
-        self.score_thresh = params['score_thresh']
-        self.cover_thresh = params['cover_thresh']
-        self.nms_thresh = params['nms_thresh']
-        # c++ la-nms is faster, but only support python 3.5
-        self.is_python35 = False
-        if sys.version_info.major == 3 and sys.version_info.minor == 5:
-            self.is_python35 = True
-    def restore_rectangle_quad(self, origin, geometry):
-        """
-        Restore rectangle from quadrangle.
-        """
-        # quad
-        origin_concat = np.concatenate(
-            (origin, origin, origin, origin), axis=1)  # (n, 8)
-        pred_quads = origin_concat - geometry
-        pred_quads = pred_quads.reshape((-1, 4, 2))  # (n, 4, 2)
-        return pred_quads
-    def detect(self,
-               score_map,
-               geo_map,
-               score_thresh=0.8,
-               cover_thresh=0.1,
-               nms_thresh=0.2):
-        """
-        restore text boxes from score map and geo map
-        """
-        score_map = score_map[0]
-        geo_map = np.swapaxes(geo_map, 1, 0)
-        geo_map = np.swapaxes(geo_map, 1, 2)
-        # filter the score map
-        xy_text = np.argwhere(score_map > score_thresh)
-        if len(xy_text) == 0:
-            return []
-        # sort the text boxes via the y axis
-        xy_text = xy_text[np.argsort(xy_text[:, 0])]
-        #restore quad proposals
-        text_box_restored = self.restore_rectangle_quad(
-            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
-        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
-        boxes[:, :8] = text_box_restored.reshape((-1, 8))
-        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
-        if self.is_python35:
-            import lanms
-            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
-        else:
-            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
-        if boxes.shape[0] == 0:
-            return []
-        # Here we filter some low score boxes by the average score map, 
-        #   this is different from the orginal paper.
-        for i, box in enumerate(boxes):
-            mask = np.zeros_like(score_map, dtype=np.uint8)
-            cv2.fillPoly(mask, box[:8].reshape(
-                (-1, 4, 2)).astype(np.int32) // 4, 1)
-            boxes[i, 8] = cv2.mean(score_map, mask)[0]
-        boxes = boxes[boxes[:, 8] > cover_thresh]
-        return boxes
-    def sort_poly(self, p):
-        """
-        Sort polygons.
-        """
-        min_axis = np.argmin(np.sum(p, axis=1))
-        p = p[[min_axis, (min_axis + 1) % 4,\
-            (min_axis + 2) % 4, (min_axis + 3) % 4]]
-        if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
-            return p
-        else:
-            return p[[0, 3, 2, 1]]
-    def __call__(self, outs_dict, ratio_list):
-        score_list = outs_dict['f_score']
-        geo_list = outs_dict['f_geo']
-        img_num = len(ratio_list)
-        dt_boxes_list = []
-        for ino in range(img_num):
-            score = score_list[ino]
-            geo = geo_list[ino]
-            boxes = self.detect(
-                score_map=score,
-                geo_map=geo,
-                score_thresh=self.score_thresh,
-                cover_thresh=self.cover_thresh,
-                nms_thresh=self.nms_thresh)
-            boxes_norm = []
-            if len(boxes) > 0:
-                ratio_h, ratio_w = ratio_list[ino]
-                boxes = boxes[:, :8].reshape((-1, 4, 2))
-                boxes[:, :, 0] /= ratio_w
-                boxes[:, :, 1] /= ratio_h
-                for i_box, box in enumerate(boxes):
-                    box = self.sort_poly(box.astype(np.int32))
-                    if np.linalg.norm(box[0] - box[1]) < 5 \
-                        or np.linalg.norm(box[3] - box[0]) < 5:
-                        continue
-                    boxes_norm.append(box)
-            dt_boxes_list.append(np.array(boxes_norm))
-        return dt_boxes_list
--- a/ppocr/postprocess/lanms/.gitignore
+++ b/ppocr/postprocess/lanms/.gitignore
-adaptor.so
--- a/ppocr/postprocess/lanms/.ycm_extra_conf.py
+++ b/ppocr/postprocess/lanms/.ycm_extra_conf.py
-#!/usr/bin/env python
-#
-# Copyright (C) 2014  Google Inc.
-#
-# This file is part of YouCompleteMe.
-#
-# YouCompleteMe is free software: you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# YouCompleteMe is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with YouCompleteMe.  If not, see <http://www.gnu.org/licenses/>.
-import os
-import sys
-import glob
-import ycm_core
-# These are the compilation flags that will be used in case there's no
-# compilation database set (by default, one is not set).
-# CHANGE THIS LIST OF FLAGS. YES, THIS IS THE DROID YOU HAVE BEEN LOOKING FOR.
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-BASE_DIR = os.path.dirname(os.path.realpath(__file__))
-from plumbum.cmd import python_config
-flags = [
-    '-Wall',
-    '-Wextra',
-    '-Wnon-virtual-dtor',
-    '-Winvalid-pch',
-    '-Wno-unused-local-typedefs',
-    '-std=c++11',
-    '-x', 'c++',
-    '-Iinclude',
-] + python_config('--cflags').split()
-# Set this to the absolute path to the folder (NOT the file!) containing the
-# compile_commands.json file to use that instead of 'flags'. See here for
-# more details: http://clang.llvm.org/docs/JSONCompilationDatabase.html
-#
-# Most projects will NOT need to set this to anything; you can just change the
-# 'flags' list of compilation flags.
-compilation_database_folder = ''
-if os.path.exists( compilation_database_folder ):
-  database = ycm_core.CompilationDatabase( compilation_database_folder )
-else:
-  database = None
-SOURCE_EXTENSIONS = [ '.cpp', '.cxx', '.cc', '.c', '.m', '.mm' ]
-def DirectoryOfThisScript():
-  return os.path.dirname( os.path.abspath( __file__ ) )
-def MakeRelativePathsInFlagsAbsolute( flags, working_directory ):
-  if not working_directory:
-    return list( flags )
-  new_flags = []
-  make_next_absolute = False
-  path_flags = [ '-isystem', '-I', '-iquote', '--sysroot=' ]
-  for flag in flags:
-    new_flag = flag
-    if make_next_absolute:
-      make_next_absolute = False
-      if not flag.startswith( '/' ):
-        new_flag = os.path.join( working_directory, flag )
-    for path_flag in path_flags:
-      if flag == path_flag:
-        make_next_absolute = True
-        break
-      if flag.startswith( path_flag ):
-        path = flag[ len( path_flag ): ]
-        new_flag = path_flag + os.path.join( working_directory, path )
-        break
-    if new_flag:
-      new_flags.append( new_flag )
-  return new_flags
-def IsHeaderFile( filename ):
-  extension = os.path.splitext( filename )[ 1 ]
-  return extension in [ '.h', '.hxx', '.hpp', '.hh' ]
-def GetCompilationInfoForFile( filename ):
-  # The compilation_commands.json file generated by CMake does not have entries
-  # for header files. So we do our best by asking the db for flags for a
-  # corresponding source file, if any. If one exists, the flags for that file
-  # should be good enough.
-  if IsHeaderFile( filename ):
-    basename = os.path.splitext( filename )[ 0 ]
-    for extension in SOURCE_EXTENSIONS:
-      replacement_file = basename + extension
-      if os.path.exists( replacement_file ):
-        compilation_info = database.GetCompilationInfoForFile(
-          replacement_file )
-        if compilation_info.compiler_flags_:
-          return compilation_info
-    return None
-  return database.GetCompilationInfoForFile( filename )
-# This is the entry point; this function is called by ycmd to produce flags for
-# a file.
-def FlagsForFile( filename, **kwargs ):
-  if database:
-    # Bear in mind that compilation_info.compiler_flags_ does NOT return a
-    # python list, but a "list-like" StringVec object
-    compilation_info = GetCompilationInfoForFile( filename )
-    if not compilation_info:
-      return None
-    final_flags = MakeRelativePathsInFlagsAbsolute(
-      compilation_info.compiler_flags_,
-      compilation_info.compiler_working_dir_ )
-  else:
-    relative_to = DirectoryOfThisScript()
-    final_flags = MakeRelativePathsInFlagsAbsolute( flags, relative_to )
-  return {
-    'flags': final_flags,
-    'do_cache': True
-  }
--- a/ppocr/postprocess/lanms/Makefile
+++ b/ppocr/postprocess/lanms/Makefile
-CXXFLAGS = -I include  -std=c++11 -O3 $(shell python3-config --cflags)
-LDFLAGS = $(shell python3-config --ldflags)
-DEPS = lanms.h $(shell find include -xtype f)
-CXX_SOURCES = adaptor.cpp include/clipper/clipper.cpp
-LIB_SO = adaptor.so
-$(LIB_SO): $(CXX_SOURCES) $(DEPS)
-	$(CXX) -o $@ $(CXXFLAGS) $(LDFLAGS) $(CXX_SOURCES) --shared -fPIC
-clean:
-	rm -rf $(LIB_SO)