add east & sast

021c1132 · MissPenguin · 8a5566c9 · 021c1132 · 021c1132 · 021c1132
Commit 021c1132 authored Dec 09, 2020 by MissPenguin
16 changed files
--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+import paddle.fluid as fluid
+import numpy as np
+
+
+class SASTLoss(nn.Layer):
+    """
+    """
+
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(SASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+
+    def forward(self, predicts, labels):
+        """
+        tcl_pos: N x 128 x 3
+        tcl_mask: N x 128 x 1
+        tcl_label: N x X list or LoDTensor
+        """
+                
+        f_score = predicts['f_score']
+        f_border = predicts['f_border']
+        f_tvo = predicts['f_tvo']
+        f_tco = predicts['f_tco']
+
+        l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
+
+        #score_loss
+        intersection = paddle.sum(f_score * l_score * l_mask)
+        union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
+        score_loss = 1.0 - 2 * intersection / (union + 1e-5)
+
+        #border loss
+        l_border_split, l_border_norm = paddle.split(l_border, num_or_sections=[4, 1], axis=1)
+        f_border_split = f_border
+        border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
+        l_border_norm_split = paddle.expand(x=l_border_norm, shape=border_ex_shape)
+        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)   
+        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)  
+
+        border_diff = l_border_split - f_border_split
+        abs_border_diff = paddle.abs(border_diff) 
+        border_sign = abs_border_diff < 1.0
+        border_sign = paddle.cast(border_sign, dtype='float32')
+        border_sign.stop_gradient = True
+        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
+                    (abs_border_diff - 0.5) * (1.0 - border_sign)
+        border_out_loss = l_border_norm_split * border_in_loss
+        border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
+                    (paddle.sum(l_border_score * l_border_mask) + 1e-5)
+
+        #tvo_loss
+        l_tvo_split, l_tvo_norm = paddle.split(l_tvo, num_or_sections=[8, 1], axis=1)
+        f_tvo_split = f_tvo
+        tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
+        l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
+        l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)   
+        l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)   
+        #
+        tvo_geo_diff = l_tvo_split - f_tvo_split
+        abs_tvo_geo_diff = paddle.abs(tvo_geo_diff) 
+        tvo_sign = abs_tvo_geo_diff < 1.0
+        tvo_sign = paddle.cast(tvo_sign, dtype='float32')
+        tvo_sign.stop_gradient = True
+        tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
+                    (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
+        tvo_out_loss = l_tvo_norm_split * tvo_in_loss
+        tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
+                    (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
+
+        #tco_loss
+        l_tco_split, l_tco_norm = paddle.split(l_tco, num_or_sections=[2, 1], axis=1)
+        f_tco_split = f_tco
+        tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
+        l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
+        l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)   
+        l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape) 
+        
+        tco_geo_diff = l_tco_split - f_tco_split
+        abs_tco_geo_diff = paddle.abs(tco_geo_diff) 
+        tco_sign = abs_tco_geo_diff < 1.0
+        tco_sign = paddle.cast(tco_sign, dtype='float32')
+        tco_sign.stop_gradient = True
+        tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
+                    (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
+        tco_out_loss = l_tco_norm_split * tco_in_loss
+        tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
+                    (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
+
+
+        # total loss
+        tvo_lw, tco_lw = 1.5, 1.5
+        score_lw, border_lw = 1.0, 1.0
+        total_loss = score_loss * score_lw + border_loss * border_lw + \
+                    tvo_loss * tvo_lw + tco_loss * tco_lw
+                    
+        losses = {'loss':total_loss, "score_loss":score_loss,\
+            "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
+        return losses
\ No newline at end of file
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -19,6 +19,7 @@ def build_backbone(config, model_type):
    if model_type == 'det':
        from .det_mobilenet_v3 import MobileNetV3
        from .det_resnet_vd import ResNet
+        from .det_resnet_vd_sast import ResNet_SAST
        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
    elif model_type == 'rec' or model_type == 'cls':
        from .rec_mobilenet_v3 import MobileNetV3

--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/ppocr/modeling/backbones/det_resnet_vd_sast.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+__all__ = ["ResNet_SAST"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            groups=1,
+            is_vd_mode=False,
+            act=None,
+            name=None, ):
+        super(ConvBNLayer, self).__init__()
+
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = nn.AvgPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self._conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self._batch_norm = nn.BatchNorm(
+            out_channels,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def forward(self, inputs):
+        if self.is_vd_mode:
+            inputs = self._pool2d_avg(inputs)
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            act='relu',
+            name=name + "_branch2a")
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2b")
+        self.conv2 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels * 4,
+            kernel_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=out_channels * 4,
+                kernel_size=1,
+                stride=1,
+                is_vd_mode=False if if_first else True,
+                name=name + "_branch1")
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+        conv2 = self.conv2(conv1)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv2)
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+        self.stride = stride
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2a")
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            act=None,
+            name=name + "_branch2b")
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                is_vd_mode=False if if_first else True,
+                name=name + "_branch1")
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv1)
+        y = F.relu(y)
+        return y
+
+
+class ResNet_SAST(nn.Layer):
+    def __init__(self, in_channels=3, layers=50, **kwargs):
+        super(ResNet_SAST, self).__init__()
+
+        self.layers = layers
+        supported_layers = [18, 34, 50, 101, 152, 200]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(
+                supported_layers, layers)
+
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            # depth = [3, 4, 6, 3]
+            depth = [3, 4, 6, 3, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        # num_channels = [64, 256, 512,
+        #                 1024] if layers >= 50 else [64, 64, 128, 256]
+        # num_filters = [64, 128, 256, 512]
+        num_channels = [64, 256, 512,
+                        1024, 2048] if layers >= 50 else [64, 64, 128, 256]
+        num_filters = [64, 128, 256, 512, 512]
+
+        self.conv1_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=32,
+            kernel_size=3,
+            stride=2,
+            act='relu',
+            name="conv1_1")
+        self.conv1_2 = ConvBNLayer(
+            in_channels=32,
+            out_channels=32,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_2")
+        self.conv1_3 = ConvBNLayer(
+            in_channels=32,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_3")
+        self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        self.stages = []
+        self.out_channels = [3, 64]
+        if layers >= 50:
+            for block in range(len(depth)):
+                block_list = []
+                shortcut = False
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    bottleneck_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BottleneckBlock(
+                            in_channels=num_channels[block]
+                            if i == 0 else num_filters[block] * 4,
+                            out_channels=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name))
+                    shortcut = True
+                    block_list.append(bottleneck_block)
+                self.out_channels.append(num_filters[block] * 4)
+                self.stages.append(nn.Sequential(*block_list))
+        else:
+            for block in range(len(depth)):
+                block_list = []
+                shortcut = False
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    basic_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BasicBlock(
+                            in_channels=num_channels[block]
+                            if i == 0 else num_filters[block],
+                            out_channels=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name))
+                    shortcut = True
+                    block_list.append(basic_block)
+                self.out_channels.append(num_filters[block])
+                self.stages.append(nn.Sequential(*block_list))
+
+    def forward(self, inputs):
+        out = [inputs]
+        y = self.conv1_1(inputs)
+        y = self.conv1_2(y)
+        y = self.conv1_3(y)
+        out.append(y)
+        y = self.pool2d_max(y)
+        for block in self.stages:
+            y = block(y)
+            out.append(y)
+        return out
\ No newline at end of file
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -18,13 +18,15 @@ __all__ = ['build_head']
 def build_head(config):
    # det head
    from .det_db_head import DBHead
+    from .det_east_head import EASTHead
+    from .det_sast_head import SASTHead

    # rec head
    from .rec_ctc_head import CTCHead

    # cls head
    from .cls_head import ClsHead
-    support_dict = ['DBHead', 'CTCHead', 'ClsHead']
+    support_dict = ['DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('head only support {}'.format(

--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class EASTHead(nn.Layer):
+    """
+    """
+    def __init__(self, in_channels, model_name, **kwargs):
+        super(EASTHead, self).__init__()
+        self.model_name = model_name
+        if self.model_name == "large":
+            num_outputs = [128, 64, 1, 8]
+        else:
+            num_outputs = [64, 32, 1, 8]
+
+        self.det_conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=num_outputs[0],
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="det_head1")
+        self.det_conv2 = ConvBNLayer(
+            in_channels=num_outputs[0],
+            out_channels=num_outputs[1],
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="det_head2")
+        self.score_conv = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[2],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name="f_score")
+        self.geo_conv = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[3],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name="f_geo")
+
+    def forward(self, x):
+        f_det = self.det_conv1(x)
+        f_det = self.det_conv2(f_det)
+        f_score = self.score_conv(f_det)
+        f_score = F.sigmoid(f_score)
+        f_geo = self.geo_conv(f_det)
+        f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800
+
+        pred = {'f_score': f_score, 'f_geo': f_geo}
+        return pred
--- a/ppocr/modeling/heads/det_sast_head.py
+++ b/ppocr/modeling/heads/det_sast_head.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class SAST_Header1(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super(SAST_Header1, self).__init__()
+        out_channels = [64, 64, 128]
+        self.score_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_score1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_score2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_score3'),
+            ConvBNLayer(out_channels[2], 1, 3, 1, act=None, name='f_score4')
+        )
+        self.border_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_border1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_border2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_border3'),
+            ConvBNLayer(out_channels[2], 4, 3, 1, act=None, name='f_border4')            
+        )
+
+    def forward(self, x):
+        f_score = self.score_conv(x)
+        f_score = F.sigmoid(f_score)
+        f_border = self.border_conv(x)
+        return f_score, f_border
+
+
+class SAST_Header2(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super(SAST_Header2, self).__init__()
+        out_channels = [64, 64, 128]
+        self.tvo_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tvo1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tvo2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tvo3'),
+            ConvBNLayer(out_channels[2], 8, 3, 1, act=None, name='f_tvo4')
+        )
+        self.tco_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tco1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tco2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tco3'),
+            ConvBNLayer(out_channels[2], 2, 3, 1, act=None, name='f_tco4')            
+        )
+
+    def forward(self, x):
+        f_tvo = self.tvo_conv(x)
+        f_tco = self.tco_conv(x)
+        return f_tvo, f_tco
+
+
+class SASTHead(nn.Layer):
+    """
+    """
+    def __init__(self, in_channels, **kwargs):
+        super(SASTHead, self).__init__()
+
+        self.head1 = SAST_Header1(in_channels)
+        self.head2 = SAST_Header2(in_channels)
+
+    def forward(self, x):
+        f_score, f_border = self.head1(x)
+        f_tvo, f_tco = self.head2(x)
+
+        predicts = {}
+        predicts['f_score'] = f_score
+        predicts['f_border'] = f_border
+        predicts['f_tvo'] = f_tvo
+        predicts['f_tco'] = f_tco
+        return predicts
\ No newline at end of file
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -16,8 +16,10 @@ __all__ = ['build_neck']

 def build_neck(config):
    from .db_fpn import DBFPN
+    from .east_fpn import EASTFPN
+    from .sast_fpn import SASTFPN
    from .rnn import SequenceEncoder
-    support_dict = ['DBFPN', 'SequenceEncoder']
+    support_dict = ['DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/east_fpn.py
+++ b/ppocr/modeling/necks/east_fpn.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class EASTFPN(nn.Layer):
+    def __init__(self, in_channels, model_name, **kwargs):
+        super(EASTFPN, self).__init__()
+        self.model_name = model_name
+        if self.model_name == "large":
+            self.out_channels = 128
+        else:
+            self.out_channels = 64
+        self.in_channels = in_channels[::-1]
+        self.h1_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_1")
+        self.h2_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_2")
+        self.h3_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_3")
+        self.g0_deconv = DeConvBNLayer(
+            in_channels=self.in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_0")
+        self.g1_deconv = DeConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_1")
+        self.g2_deconv = DeConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_2")
+        self.g3_conv = ConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_3")
+
+    def forward(self, x):
+        f = x[::-1]
+
+        h = f[0]
+        g = self.g0_deconv(h)
+        h = paddle.concat([g, f[1]], axis=1)
+        h = self.h1_conv(h)
+        g = self.g1_deconv(h)
+        h = paddle.concat([g, f[2]], axis=1)
+        h = self.h2_conv(h)
+        g = self.g2_deconv(h)
+        h = paddle.concat([g, f[3]], axis=1)
+        h = self.h3_conv(h)
+        g = self.g3_conv(h)
+
+        return g
\ No newline at end of file
--- a/ppocr/modeling/necks/sast_fpn.py
+++ b/ppocr/modeling/necks/sast_fpn.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+  
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class FPN_Up_Fusion(nn.Layer):
+    def __init__(self, in_channels):
+        super(FPN_Up_Fusion, self).__init__()
+        in_channels = in_channels[::-1]
+        out_channels = [256, 256, 192, 192, 128]
+                
+        self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0')
+        self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1')
+        self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2')
+        self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3')
+        self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4')
+
+        self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0')
+
+        self.g1_conv = nn.Sequential(
+            ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'),
+            DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2')
+        )
+        self.g2_conv = nn.Sequential(
+            ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'),
+            DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2')
+        )
+        self.g3_conv = nn.Sequential(
+            ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'),
+            DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2')
+        )
+
+        self.g4_conv = nn.Sequential(
+            ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'),
+            ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2')
+        )
+
+    def _add_relu(self, x1, x2):
+        x = paddle.add(x=x1, y=x2)
+        x = F.relu(x)
+        return x
+
+    def forward(self, x):
+        f = x[2:][::-1]
+        h0 = self.h0_conv(f[0])
+        h1 = self.h1_conv(f[1])
+        h2 = self.h2_conv(f[2])
+        h3 = self.h3_conv(f[3])
+        h4 = self.h4_conv(f[4])
+
+        g0 = self.g0_conv(h0)
+        g1 = self._add_relu(g0, h1)
+        g1 = self.g1_conv(g1)
+        g2 = self.g2_conv(self._add_relu(g1, h2))
+        g3 = self.g3_conv(self._add_relu(g2, h3))
+        g4 = self.g4_conv(self._add_relu(g3, h4))
+
+        return g4
+
+
+class FPN_Down_Fusion(nn.Layer):
+    def __init__(self, in_channels):
+        super(FPN_Down_Fusion, self).__init__()
+        out_channels = [32, 64, 128]
+
+        self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0')
+        self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1')
+        self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2')
+
+        self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0')
+
+        self.g1_conv = nn.Sequential(
+            ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'),
+            ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2')            
+        )
+
+        self.g2_conv = nn.Sequential(
+            ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'),
+            ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2')            
+        )
+
+    def forward(self, x):
+        f = x[:3]
+        h0 = self.h0_conv(f[0])
+        h1 = self.h1_conv(f[1])
+        h2 = self.h2_conv(f[2])
+        g0 = self.g0_conv(h0)
+        g1 = paddle.add(x=g0, y=h1)
+        g1 = F.relu(g1)
+        g1 = self.g1_conv(g1)
+        g2 = paddle.add(x=g1, y=h2)
+        g2 = F.relu(g2)
+        g2 = self.g2_conv(g2)
+        return g2
+
+
+class Cross_Attention(nn.Layer):
+    def __init__(self, in_channels):
+        super(Cross_Attention, self).__init__()
+        self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta')
+        self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi')
+        self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g')
+
+        self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight')
+        self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc')
+
+        self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight')
+        self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc')
+
+        self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn')
+
+    def _cal_fweight(self, f, shape):
+        f_theta, f_phi, f_g = f
+        #flatten
+        f_theta = paddle.transpose(f_theta, [0, 2, 3, 1])
+        f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128])
+        f_phi = paddle.transpose(f_phi, [0, 2, 3, 1])
+        f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128])
+        f_g = paddle.transpose(f_g, [0, 2, 3, 1])
+        f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128])
+        #correlation
+        f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1]))
+        #scale
+        f_attn = f_attn / (128**0.5)
+        f_attn = F.softmax(f_attn)
+        #weighted sum
+        f_weight = paddle.matmul(f_attn, f_g)
+        f_weight = paddle.reshape(
+            f_weight, [shape[0], shape[1], shape[2], 128])
+        return f_weight
+
+    def forward(self, f_common):
+        f_shape = paddle.shape(f_common)
+        # print('f_shape: ', f_shape)
+
+        f_theta = self.theta_conv(f_common)
+        f_phi = self.phi_conv(f_common)
+        f_g = self.g_conv(f_common)
+
+        ######## horizon ########
+        fh_weight = self._cal_fweight([f_theta, f_phi, f_g], 
+                                        [f_shape[0], f_shape[2], f_shape[3]])
+        fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2])
+        fh_weight = self.fh_weight_conv(fh_weight)
+        #short cut
+        fh_sc = self.fh_sc_conv(f_common)
+        f_h = F.relu(fh_weight + fh_sc)
+
+        ######## vertical ########
+        fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2])
+        fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2])
+        fv_g = paddle.transpose(f_g, [0, 1, 3, 2])
+        fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g], 
+                                        [f_shape[0], f_shape[3], f_shape[2]])
+        fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1])
+        fv_weight = self.fv_weight_conv(fv_weight)
+        #short cut
+        fv_sc = self.fv_sc_conv(f_common)
+        f_v = F.relu(fv_weight + fv_sc)
+
+        ######## merge ########
+        f_attn = paddle.concat([f_h, f_v], axis=1)
+        f_attn = self.f_attn_conv(f_attn)
+        return f_attn
+
+
+class SASTFPN(nn.Layer):
+    def __init__(self, in_channels, with_cab=False, **kwargs):
+        super(SASTFPN, self).__init__()
+        self.in_channels = in_channels
+        self.with_cab = with_cab
+        self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels)
+        self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels)
+        self.out_channels = 128
+        self.cross_attention = Cross_Attention(self.out_channels)
+
+    def forward(self, x):
+        #down fpn
+        f_down = self.FPN_Down_Fusion(x)
+
+        #up fpn
+        f_up = self.FPN_Up_Fusion(x)
+
+        #fusion
+        f_common = paddle.add(x=f_down, y=f_up)
+        f_common = F.relu(f_common)
+
+        if self.with_cab:
+            # print('enhence f_common with CAB.')
+            f_common = self.cross_attention(f_common)
+
+        return f_common
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -24,11 +24,13 @@ __all__ = ['build_post_process']

 def build_post_process(config, global_config=None):
    from .db_postprocess import DBPostProcess
+    from .east_postprocess import EASTPostProcess
+    from .sast_postprocess import SASTPostProcess
    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
    from .cls_postprocess import ClsPostProcess

    support_dict = [
-        'DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess'
+        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from .locality_aware_nms import nms_locality
+import cv2
+
+import os
+import sys
+# __dir__ = os.path.dirname(os.path.abspath(__file__))
+# sys.path.append(__dir__)
+# sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+
+
+class EASTPostProcess(object):
+    """
+    The post process for EAST.
+    """
+    def __init__(self,
+                 score_thresh=0.8,
+                 cover_thresh=0.1,
+                 nms_thresh=0.2,
+                 **kwargs):
+
+        self.score_thresh = score_thresh
+        self.cover_thresh = cover_thresh
+        self.nms_thresh = nms_thresh
+        
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True
+
+    def restore_rectangle_quad(self, origin, geometry):
+        """
+        Restore rectangle from quadrangle.
+        """
+        # quad
+        origin_concat = np.concatenate(
+            (origin, origin, origin, origin), axis=1)  # (n, 8)
+        pred_quads = origin_concat - geometry
+        pred_quads = pred_quads.reshape((-1, 4, 2))  # (n, 4, 2)
+        return pred_quads
+
+    def detect(self,
+               score_map,
+               geo_map,
+               score_thresh=0.8,
+               cover_thresh=0.1,
+               nms_thresh=0.2):
+        """
+        restore text boxes from score map and geo map
+        """
+        score_map = score_map[0]
+        geo_map = np.swapaxes(geo_map, 1, 0)
+        geo_map = np.swapaxes(geo_map, 1, 2)
+        # filter the score map
+        xy_text = np.argwhere(score_map > score_thresh)
+        if len(xy_text) == 0:
+            return []
+        # sort the text boxes via the y axis
+        xy_text = xy_text[np.argsort(xy_text[:, 0])]
+        #restore quad proposals
+        text_box_restored = self.restore_rectangle_quad(
+            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
+        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
+        boxes[:, :8] = text_box_restored.reshape((-1, 8))
+        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
+        if self.is_python35:
+            import lanms
+            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
+        else:
+            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
+        if boxes.shape[0] == 0:
+            return []
+        # Here we filter some low score boxes by the average score map, 
+        #   this is different from the orginal paper.
+        for i, box in enumerate(boxes):
+            mask = np.zeros_like(score_map, dtype=np.uint8)
+            cv2.fillPoly(mask, box[:8].reshape(
+                (-1, 4, 2)).astype(np.int32) // 4, 1)
+            boxes[i, 8] = cv2.mean(score_map, mask)[0]
+        boxes = boxes[boxes[:, 8] > cover_thresh]
+        return boxes
+
+    def sort_poly(self, p):
+        """
+        Sort polygons.
+        """
+        min_axis = np.argmin(np.sum(p, axis=1))
+        p = p[[min_axis, (min_axis + 1) % 4,\
+            (min_axis + 2) % 4, (min_axis + 3) % 4]]
+        if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
+            return p
+        else:
+            return p[[0, 3, 2, 1]]
+
+    def __call__(self, outs_dict, shape_list):
+        score_list = outs_dict['f_score']
+        geo_list = outs_dict['f_geo']
+        img_num = len(shape_list)
+        dt_boxes_list = []
+        for ino in range(img_num):
+            score = score_list[ino].numpy()
+            geo = geo_list[ino].numpy()
+            boxes = self.detect(
+                score_map=score,
+                geo_map=geo,
+                score_thresh=self.score_thresh,
+                cover_thresh=self.cover_thresh,
+                nms_thresh=self.nms_thresh)
+            boxes_norm = []
+            if len(boxes) > 0:
+                h, w = score.shape[1:]
+                src_h, src_w, ratio_h, ratio_w = shape_list[ino]
+                boxes = boxes[:, :8].reshape((-1, 4, 2))
+                boxes[:, :, 0] /= ratio_w
+                boxes[:, :, 1] /= ratio_h
+                for i_box, box in enumerate(boxes):
+                    box = self.sort_poly(box.astype(np.int32))
+                    if np.linalg.norm(box[0] - box[1]) < 5 \
+                        or np.linalg.norm(box[3] - box[0]) < 5:
+                        continue
+                    boxes_norm.append(box)
+            dt_boxes_list.append({'points': np.array(boxes_norm)})
+        return dt_boxes_list
\ No newline at end of file
--- a/ppocr/postprocess/locality_aware_nms.py
+++ b/ppocr/postprocess/locality_aware_nms.py
+"""
+Locality aware nms.
+"""
+
+import numpy as np
+from shapely.geometry import Polygon
+
+
+def intersection(g, p):
+    """
+    Intersection.
+    """
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    g = g.buffer(0)
+    p = p.buffer(0)
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def intersection_iog(g, p):
+    """
+    Intersection_iog.
+    """
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    #union = g.area + p.area - inter
+    union = p.area
+    if union == 0:
+        print("p_area is very small")
+        return 0
+    else:
+        return inter / union
+
+
+def weighted_merge(g, p):
+    """
+    Weighted merge.
+    """
+    g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
+    g[8] = (g[8] + p[8])
+    return g
+
+
+def standard_nms(S, thres):
+    """
+    Standard nms.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return S[keep]
+
+
+def standard_nms_inds(S, thres):
+    """
+    Standard nms, retun inds.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def nms(S, thres):
+    """
+    nms.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
+    """
+    soft_nms
+    :para boxes_in, N x 9 (coords + score)
+    :para threshould, eliminate cases min score(0.001)
+    :para Nt_thres, iou_threshi
+    :para sigma, gaussian weght
+    :method, linear or gaussian
+    """
+    boxes = boxes_in.copy()
+    N = boxes.shape[0]
+    if N is None or N < 1:
+        return np.array([])
+    pos, maxpos = 0, 0
+    weight = 0.0
+    inds = np.arange(N)
+    tbox, sbox = boxes[0].copy(), boxes[0].copy()
+    for i in range(N):
+        maxscore = boxes[i, 8]
+        maxpos = i
+        tbox = boxes[i].copy()
+        ti = inds[i]
+        pos = i + 1
+        #get max box
+        while pos < N:
+            if maxscore < boxes[pos, 8]:
+                maxscore = boxes[pos, 8]
+                maxpos = pos
+            pos = pos + 1
+        #add max box as a detection
+        boxes[i, :] = boxes[maxpos, :]
+        inds[i] = inds[maxpos]
+        #swap
+        boxes[maxpos, :] = tbox
+        inds[maxpos] = ti
+        tbox = boxes[i].copy()
+        pos = i + 1
+        #NMS iteration
+        while pos < N:
+            sbox = boxes[pos].copy()
+            ts_iou_val = intersection(tbox, sbox)
+            if ts_iou_val > 0:
+                if method == 1:
+                    if ts_iou_val > Nt_thres:
+                        weight = 1 - ts_iou_val
+                    else:
+                        weight = 1
+                elif method == 2:
+                    weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
+                else:
+                    if ts_iou_val > Nt_thres:
+                        weight = 0
+                    else:
+                        weight = 1
+                boxes[pos, 8] = weight * boxes[pos, 8]
+                #if box score falls below thresold, discard the box by
+                #swaping last box update N
+                if boxes[pos, 8] < threshold:
+                    boxes[pos, :] = boxes[N - 1, :]
+                    inds[pos] = inds[N - 1]
+                    N = N - 1
+                    pos = pos - 1
+            pos = pos + 1
+
+    return boxes[:N]
+
+
+def nms_locality(polys, thres=0.3):
+    """
+    locality aware nms of EAST
+    :param polys: a N*9 numpy array. first 8 coordinates, then prob
+    :return: boxes after nms
+    """
+    S = []
+    p = None
+    for g in polys:
+        if p is not None and intersection(g, p) > thres:
+            p = weighted_merge(g, p)
+        else:
+            if p is not None:
+                S.append(p)
+            p = g
+    if p is not None:
+        S.append(p)
+
+    if len(S) == 0:
+        return np.array([])
+    return standard_nms(np.array(S), thres)
+
+
+if __name__ == '__main__':
+    # 343,350,448,135,474,143,369,359
+    print(
+        Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
+        .area)
\ No newline at end of file
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -23,14 +23,16 @@ class BaseRecLabelDecode(object):
                 character_dict_path=None,
                 character_type='ch',
                 use_space_char=False):
-        support_character_type = ['ch', 'en', 'en_sensitive']
+        support_character_type = [
+            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
+        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
            support_character_type, self.character_str)

        if character_type == "en":
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
            dict_character = list(self.character_str)
-        elif character_type == "ch":
+        elif character_type in ["ch", "french", "german", "japan", "korean"]:
            self.character_str = ""
            assert character_dict_path is not None, "character_dict_path should not be None when character_type is ch"
            with open(character_dict_path, "rb") as fin:
@@ -150,4 +152,4 @@ class AttnLabelDecode(BaseRecLabelDecode):
        else:
            assert False, "unsupport type %s in get_beg_end_flag_idx" \
                          % beg_or_end
-        return idx
\ No newline at end of file
+        return idx
--- a/ppocr/postprocess/sast_postprocess.py
+++ b/ppocr/postprocess/sast_postprocess.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+
+import numpy as np
+from .locality_aware_nms import nms_locality
+# import lanms
+import cv2
+import time
+
+
+class SASTPostProcess(object):
+    """
+    The post process for SAST.
+    """
+
+    def __init__(self,
+                 score_thresh=0.5,
+                 nms_thresh=0.2,
+                 sample_pts_num=2,
+                 shrink_ratio_of_width=0.3,
+                 expand_scale=1.0,
+                 tcl_map_thresh=0.5,
+                 **kwargs):
+
+        self.score_thresh = score_thresh
+        self.nms_thresh = nms_thresh
+        self.sample_pts_num = sample_pts_num
+        self.shrink_ratio_of_width = shrink_ratio_of_width
+        self.expand_scale = expand_scale
+        self.tcl_map_thresh = tcl_map_thresh
+        
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True
+            
+    def point_pair2poly(self, point_pair_list):
+        """
+        Transfer vertical point_pairs into poly point in clockwise.
+        """
+        # constract poly
+        point_num = len(point_pair_list) * 2
+        point_list = [0] * point_num
+        for idx, point_pair in enumerate(point_pair_list):
+            point_list[idx] = point_pair[0]
+            point_list[point_num - 1 - idx] = point_pair[1]
+        return np.array(point_list).reshape(-1, 2)
+    
+    def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.):
+        """ 
+        Generate shrink_quad_along_width.
+        """
+        ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+        p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
+        p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
+        return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
+    
+    def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3):
+        """
+        expand poly along width.
+        """
+        point_num = poly.shape[0]
+        left_quad = np.array([poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
+        left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
+                    (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
+        left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio, 1.0)
+        right_quad = np.array([poly[point_num // 2 - 2], poly[point_num // 2 - 1],
+                            poly[point_num // 2], poly[point_num // 2 + 1]], dtype=np.float32)
+        right_ratio = 1.0 + \
+                    shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
+                    (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
+        right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0, right_ratio)
+        poly[0] = left_quad_expand[0]
+        poly[-1] = left_quad_expand[-1]
+        poly[point_num // 2 - 1] = right_quad_expand[1]
+        poly[point_num // 2] = right_quad_expand[2]
+        return poly
+
+    def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map):
+        """Restore quad."""
+        xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
+        xy_text = xy_text[:, ::-1] # (n, 2)
+
+        # Sort the text boxes via the y axis
+        xy_text = xy_text[np.argsort(xy_text[:, 1])]
+
+        scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
+        scores = scores[:, np.newaxis]
+
+        # Restore
+        point_num = int(tvo_map.shape[-1] / 2)
+        assert point_num == 4
+        tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :]
+        xy_text_tile = np.tile(xy_text, (1, point_num)) # (n, point_num * 2)
+        quads = xy_text_tile - tvo_map
+
+        return scores, quads, xy_text
+
+    def quad_area(self, quad):
+        """
+        compute area of a quad.
+        """
+        edge = [
+            (quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]),
+            (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]),
+            (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]),
+            (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])
+        ]
+        return np.sum(edge) / 2.
+        
+    def nms(self, dets):
+        if self.is_python35:
+            import lanms
+            dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh)
+        else:
+            dets = nms_locality(dets, self.nms_thresh)
+        return dets
+
+    def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map):
+        """
+        Cluster pixels in tcl_map based on quads.
+        """
+        instance_count = quads.shape[0] + 1 # contain background
+        instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32)
+        if instance_count == 1:
+            return instance_count, instance_label_map
+
+        # predict text center
+        xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
+        n = xy_text.shape[0]
+        xy_text = xy_text[:, ::-1] # (n, 2)
+        tco = tco_map[xy_text[:, 1], xy_text[:, 0], :] # (n, 2)
+        pred_tc = xy_text - tco
+        
+        # get gt text center
+        m = quads.shape[0]
+        gt_tc = np.mean(quads, axis=1) # (m, 2)
+
+        pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :], (1, m, 1)) # (n, m, 2)
+        gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1)) # (n, m, 2)
+        dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2) # (n, m)
+        xy_text_assign = np.argmin(dist_mat, axis=1) + 1 # (n,)
+
+        instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign
+        return instance_count, instance_label_map
+
+    def estimate_sample_pts_num(self, quad, xy_text):
+        """
+        Estimate sample points number.
+        """
+        eh = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])) / 2.0
+        ew = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0
+
+        dense_sample_pts_num = max(2, int(ew))
+        dense_xy_center_line = xy_text[np.linspace(0, xy_text.shape[0] - 1, dense_sample_pts_num,
+                                                endpoint=True, dtype=np.float32).astype(np.int32)]
+
+        dense_xy_center_line_diff = dense_xy_center_line[1:] - dense_xy_center_line[:-1]
+        estimate_arc_len = np.sum(np.linalg.norm(dense_xy_center_line_diff, axis=1))
+
+        sample_pts_num = max(2, int(estimate_arc_len / eh))
+        return sample_pts_num
+
+    def detect_sast(self, tcl_map, tvo_map, tbo_map, tco_map, ratio_w, ratio_h, src_w, src_h, 
+                shrink_ratio_of_width=0.3, tcl_map_thresh=0.5, offset_expand=1.0, out_strid=4.0):
+        """
+        first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
+        """
+        # restore quad
+        scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh, tvo_map)
+        dets = np.hstack((quads, scores)).astype(np.float32, copy=False)
+        dets = self.nms(dets)
+        if dets.shape[0] == 0:
+            return []
+        quads = dets[:, :-1].reshape(-1, 4, 2)
+
+        # Compute quad area
+        quad_areas = []
+        for quad in quads:
+            quad_areas.append(-self.quad_area(quad))
+
+        # instance segmentation
+        # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
+        instance_count, instance_label_map = self.cluster_by_quads_tco(tcl_map, tcl_map_thresh, quads, tco_map)
+
+        # restore single poly with tcl instance.
+        poly_list = []
+        for instance_idx in range(1, instance_count):
+            xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1]
+            quad = quads[instance_idx - 1]
+            q_area = quad_areas[instance_idx - 1]
+            if q_area < 5:
+                continue
+            
+            #
+            len1 = float(np.linalg.norm(quad[0] -quad[1]))
+            len2 = float(np.linalg.norm(quad[1] -quad[2]))
+            min_len = min(len1, len2)
+            if min_len < 3:
+                continue
+
+            # filter small CC
+            if xy_text.shape[0] <= 0:
+                continue
+
+            # filter low confidence instance
+            xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] 
+            if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1:
+            # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
+                continue
+
+            # sort xy_text
+            left_center_pt = np.array([[(quad[0, 0] + quad[-1, 0]) / 2.0,
+                                        (quad[0, 1] + quad[-1, 1]) / 2.0]]) # (1, 2)
+            right_center_pt = np.array([[(quad[1, 0] + quad[2, 0]) / 2.0,
+                                        (quad[1, 1] + quad[2, 1]) / 2.0]]) # (1, 2)
+            proj_unit_vec = (right_center_pt - left_center_pt) / \
+                            (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
+            proj_value = np.sum(xy_text * proj_unit_vec, axis=1)
+            xy_text = xy_text[np.argsort(proj_value)]
+
+            # Sample pts in tcl map
+            if self.sample_pts_num == 0:
+                sample_pts_num = self.estimate_sample_pts_num(quad, xy_text)
+            else:
+                sample_pts_num = self.sample_pts_num
+            xy_center_line = xy_text[np.linspace(0, xy_text.shape[0] - 1, sample_pts_num,
+                                                endpoint=True, dtype=np.float32).astype(np.int32)]
+
+            point_pair_list = []
+            for x, y in xy_center_line:
+                # get corresponding offset
+                offset = tbo_map[y, x, :].reshape(2, 2)
+                if offset_expand != 1.0:
+                    offset_length = np.linalg.norm(offset, axis=1, keepdims=True)
+                    expand_length = np.clip(offset_length * (offset_expand - 1), a_min=0.5, a_max=3.0)
+                    offset_detal = offset / offset_length * expand_length
+                    offset = offset + offset_detal                
+                # original point
+                ori_yx = np.array([y, x], dtype=np.float32)
+                point_pair = (ori_yx +  offset)[:, ::-1]* out_strid / np.array([ratio_w, ratio_h]).reshape(-1, 2) 
+                point_pair_list.append(point_pair)
+
+            # ndarry: (x, 2), expand poly along width
+            detected_poly = self.point_pair2poly(point_pair_list)
+            detected_poly = self.expand_poly_along_width(detected_poly, shrink_ratio_of_width)
+            detected_poly[:, 0] = np.clip(detected_poly[:, 0], a_min=0, a_max=src_w)
+            detected_poly[:, 1] = np.clip(detected_poly[:, 1], a_min=0, a_max=src_h)
+            poly_list.append(detected_poly)
+
+        return poly_list
+
+    def __call__(self, outs_dict, shape_list):                
+        score_list = outs_dict['f_score']
+        border_list = outs_dict['f_border']
+        tvo_list = outs_dict['f_tvo']
+        tco_list = outs_dict['f_tco']
+                    
+        img_num = len(shape_list)
+        poly_lists = []
+        for ino in range(img_num):
+            p_score = score_list[ino].transpose((1,2,0)).numpy()
+            p_border = border_list[ino].transpose((1,2,0)).numpy()
+            p_tvo = tvo_list[ino].transpose((1,2,0)).numpy()
+            p_tco = tco_list[ino].transpose((1,2,0)).numpy()
+            src_h, src_w, ratio_h, ratio_w = shape_list[ino]
+
+            poly_list = self.detect_sast(p_score, p_tvo, p_border, p_tco, ratio_w, ratio_h, src_w, src_h, 
+                                         shrink_ratio_of_width=self.shrink_ratio_of_width, 
+                                         tcl_map_thresh=self.tcl_map_thresh, offset_expand=self.expand_scale)
+            poly_lists.append({'points': np.array(poly_list)})
+
+        return poly_lists
+
--- a/setup.py
+++ b/setup.py
@@ -32,7 +32,7 @@ setup(
    package_dir={'paddleocr': ''},
    include_package_data=True,
    entry_points={"console_scripts": ["paddleocr= paddleocr.paddleocr:main"]},
-    version='0.0.3',
+    version='2.0',
    install_requires=requirements,
    license='Apache License 2.0',
    description='Awesome OCR toolkits based on PaddlePaddle （8.6M ultra-lightweight pre-trained model, support training and deployment among server, mobile, embeded and IoT devices',

--- a/tools/infer/predict_system.py
+++ b/tools/infer/predict_system.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import os
 import sys
+
 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(__dir__)
 sys.path.append(os.path.abspath(os.path.join(__dir__, '../..')))
@@ -30,12 +31,15 @@ from ppocr.utils.utility import get_image_file_list, check_and_read_gif
 from ppocr.utils.logging import get_logger
 from tools.infer.utility import draw_ocr_box_txt

+logger = get_logger()
+

 class TextSystem(object):
    def __init__(self, args):
        self.text_detector = predict_det.TextDetector(args)
        self.text_recognizer = predict_rec.TextRecognizer(args)
        self.use_angle_cls = args.use_angle_cls
+        self.drop_score = args.drop_score
        if self.use_angle_cls:
            self.text_classifier = predict_cls.TextClassifier(args)

@@ -81,7 +85,8 @@ class TextSystem(object):
    def __call__(self, img):
        ori_im = img.copy()
        dt_boxes, elapse = self.text_detector(img)
-        logger.info("dt_boxes num : {}, elapse : {}".format(len(dt_boxes), elapse))
+        logger.info("dt_boxes num : {}, elapse : {}".format(
+            len(dt_boxes), elapse))
        if dt_boxes is None:
            return None, None
        img_crop_list = []
@@ -99,9 +104,16 @@ class TextSystem(object):
                len(img_crop_list), elapse))

        rec_res, elapse = self.text_recognizer(img_crop_list)
-        logger.info("rec_res num  : {}, elapse : {}".format(len(rec_res), elapse))
+        logger.info("rec_res num  : {}, elapse : {}".format(
+            len(rec_res), elapse))
        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
-        return dt_boxes, rec_res
+        filter_boxes, filter_rec_res = [], []
+        for box, rec_reuslt in zip(dt_boxes, rec_res):
+            text, score = rec_reuslt
+            if score >= self.drop_score:
+                filter_boxes.append(box)
+                filter_rec_res.append(rec_reuslt)
+        return filter_boxes, filter_rec_res


 def sorted_boxes(dt_boxes):
@@ -117,8 +129,8 @@ def sorted_boxes(dt_boxes):
    _boxes = list(sorted_boxes)

    for i in range(num_boxes - 1):
-        if abs(_boxes[i+1][0][1] - _boxes[i][0][1]) < 10 and \
-            (_boxes[i + 1][0][0] < _boxes[i][0][0]):
+        if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
+                (_boxes[i + 1][0][0] < _boxes[i][0][0]):
            tmp = _boxes[i]
            _boxes[i] = _boxes[i + 1]
            _boxes[i + 1] = tmp
@@ -143,12 +155,8 @@ def main(args):
        elapse = time.time() - starttime
        logger.info("Predict time of %s: %.3fs" % (image_file, elapse))

-        dt_num = len(dt_boxes)
-        for dno in range(dt_num):
-            text, score = rec_res[dno]
-            if score >= drop_score:
-                text_str = "%s, %.3f" % (text, score)
-                logger.info(text_str)
+        for text, score in rec_res:
+            logger.info("{}, {:.3f}".format(text, score))

        if is_visualize:
            image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -174,5 +182,4 @@ def main(args):


 if __name__ == "__main__":
-    logger = get_logger()
-    main(utility.parse_args())
+    main(utility.parse_args())
\ No newline at end of file