fix conflicts

e7ad27c3 · LDOUBLEV · c0b4cefd · 91f5ab5c · e7ad27c3 · e7ad27c3
Commit e7ad27c3 authored Dec 09, 2020 by LDOUBLEV
15 changed files
--- a/ppocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/ppocr/modeling/backbones/det_resnet_vd_sast.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+__all__ = ["ResNet_SAST"]
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(
+            self,
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=1,
+            groups=1,
+            is_vd_mode=False,
+            act=None,
+            name=None, ):
+        super(ConvBNLayer, self).__init__()
+
+        self.is_vd_mode = is_vd_mode
+        self._pool2d_avg = nn.AvgPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self._conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + "_weights"),
+            bias_attr=False)
+        if name == "conv1":
+            bn_name = "bn_" + name
+        else:
+            bn_name = "bn" + name[3:]
+        self._batch_norm = nn.BatchNorm(
+            out_channels,
+            act=act,
+            param_attr=ParamAttr(name=bn_name + '_scale'),
+            bias_attr=ParamAttr(bn_name + '_offset'),
+            moving_mean_name=bn_name + '_mean',
+            moving_variance_name=bn_name + '_variance')
+
+    def forward(self, inputs):
+        if self.is_vd_mode:
+            inputs = self._pool2d_avg(inputs)
+        y = self._conv(inputs)
+        y = self._batch_norm(y)
+        return y
+
+
+class BottleneckBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 name=None):
+        super(BottleneckBlock, self).__init__()
+
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            act='relu',
+            name=name + "_branch2a")
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2b")
+        self.conv2 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels * 4,
+            kernel_size=1,
+            act=None,
+            name=name + "_branch2c")
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=out_channels * 4,
+                kernel_size=1,
+                stride=1,
+                is_vd_mode=False if if_first else True,
+                name=name + "_branch1")
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+        conv2 = self.conv2(conv1)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv2)
+        y = F.relu(y)
+        return y
+
+
+class BasicBlock(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 stride,
+                 shortcut=True,
+                 if_first=False,
+                 name=None):
+        super(BasicBlock, self).__init__()
+        self.stride = stride
+        self.conv0 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            stride=stride,
+            act='relu',
+            name=name + "_branch2a")
+        self.conv1 = ConvBNLayer(
+            in_channels=out_channels,
+            out_channels=out_channels,
+            kernel_size=3,
+            act=None,
+            name=name + "_branch2b")
+
+        if not shortcut:
+            self.short = ConvBNLayer(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                is_vd_mode=False if if_first else True,
+                name=name + "_branch1")
+
+        self.shortcut = shortcut
+
+    def forward(self, inputs):
+        y = self.conv0(inputs)
+        conv1 = self.conv1(y)
+
+        if self.shortcut:
+            short = inputs
+        else:
+            short = self.short(inputs)
+        y = paddle.add(x=short, y=conv1)
+        y = F.relu(y)
+        return y
+
+
+class ResNet_SAST(nn.Layer):
+    def __init__(self, in_channels=3, layers=50, **kwargs):
+        super(ResNet_SAST, self).__init__()
+
+        self.layers = layers
+        supported_layers = [18, 34, 50, 101, 152, 200]
+        assert layers in supported_layers, \
+            "supported layers are {} but input layer is {}".format(
+                supported_layers, layers)
+
+        if layers == 18:
+            depth = [2, 2, 2, 2]
+        elif layers == 34 or layers == 50:
+            # depth = [3, 4, 6, 3]
+            depth = [3, 4, 6, 3, 3]
+        elif layers == 101:
+            depth = [3, 4, 23, 3]
+        elif layers == 152:
+            depth = [3, 8, 36, 3]
+        elif layers == 200:
+            depth = [3, 12, 48, 3]
+        # num_channels = [64, 256, 512,
+        #                 1024] if layers >= 50 else [64, 64, 128, 256]
+        # num_filters = [64, 128, 256, 512]
+        num_channels = [64, 256, 512,
+                        1024, 2048] if layers >= 50 else [64, 64, 128, 256]
+        num_filters = [64, 128, 256, 512, 512]
+
+        self.conv1_1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=32,
+            kernel_size=3,
+            stride=2,
+            act='relu',
+            name="conv1_1")
+        self.conv1_2 = ConvBNLayer(
+            in_channels=32,
+            out_channels=32,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_2")
+        self.conv1_3 = ConvBNLayer(
+            in_channels=32,
+            out_channels=64,
+            kernel_size=3,
+            stride=1,
+            act='relu',
+            name="conv1_3")
+        self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
+
+        self.stages = []
+        self.out_channels = [3, 64]
+        if layers >= 50:
+            for block in range(len(depth)):
+                block_list = []
+                shortcut = False
+                for i in range(depth[block]):
+                    if layers in [101, 152] and block == 2:
+                        if i == 0:
+                            conv_name = "res" + str(block + 2) + "a"
+                        else:
+                            conv_name = "res" + str(block + 2) + "b" + str(i)
+                    else:
+                        conv_name = "res" + str(block + 2) + chr(97 + i)
+                    bottleneck_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BottleneckBlock(
+                            in_channels=num_channels[block]
+                            if i == 0 else num_filters[block] * 4,
+                            out_channels=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name))
+                    shortcut = True
+                    block_list.append(bottleneck_block)
+                self.out_channels.append(num_filters[block] * 4)
+                self.stages.append(nn.Sequential(*block_list))
+        else:
+            for block in range(len(depth)):
+                block_list = []
+                shortcut = False
+                for i in range(depth[block]):
+                    conv_name = "res" + str(block + 2) + chr(97 + i)
+                    basic_block = self.add_sublayer(
+                        'bb_%d_%d' % (block, i),
+                        BasicBlock(
+                            in_channels=num_channels[block]
+                            if i == 0 else num_filters[block],
+                            out_channels=num_filters[block],
+                            stride=2 if i == 0 and block != 0 else 1,
+                            shortcut=shortcut,
+                            if_first=block == i == 0,
+                            name=conv_name))
+                    shortcut = True
+                    block_list.append(basic_block)
+                self.out_channels.append(num_filters[block])
+                self.stages.append(nn.Sequential(*block_list))
+
+    def forward(self, inputs):
+        out = [inputs]
+        y = self.conv1_1(inputs)
+        y = self.conv1_2(y)
+        y = self.conv1_3(y)
+        out.append(y)
+        y = self.pool2d_max(y)
+        for block in self.stages:
+            y = block(y)
+            out.append(y)
+        return out
\ No newline at end of file
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -18,13 +18,15 @@ __all__ = ['build_head']
 def build_head(config):
    # det head
    from .det_db_head import DBHead
+    from .det_east_head import EASTHead
+    from .det_sast_head import SASTHead

    # rec head
    from .rec_ctc_head import CTCHead

    # cls head
    from .cls_head import ClsHead
-    support_dict = ['DBHead', 'CTCHead', 'ClsHead']
+    support_dict = ['DBHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('head only support {}'.format(

--- a/ppocr/modeling/heads/det_east_head.py
+++ b/ppocr/modeling/heads/det_east_head.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class EASTHead(nn.Layer):
+    """
+    """
+    def __init__(self, in_channels, model_name, **kwargs):
+        super(EASTHead, self).__init__()
+        self.model_name = model_name
+        if self.model_name == "large":
+            num_outputs = [128, 64, 1, 8]
+        else:
+            num_outputs = [64, 32, 1, 8]
+
+        self.det_conv1 = ConvBNLayer(
+            in_channels=in_channels,
+            out_channels=num_outputs[0],
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="det_head1")
+        self.det_conv2 = ConvBNLayer(
+            in_channels=num_outputs[0],
+            out_channels=num_outputs[1],
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="det_head2")
+        self.score_conv = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[2],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name="f_score")
+        self.geo_conv = ConvBNLayer(
+            in_channels=num_outputs[1],
+            out_channels=num_outputs[3],
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            if_act=False,
+            act=None,
+            name="f_geo")
+
+    def forward(self, x):
+        f_det = self.det_conv1(x)
+        f_det = self.det_conv2(f_det)
+        f_score = self.score_conv(f_det)
+        f_score = F.sigmoid(f_score)
+        f_geo = self.geo_conv(f_det)
+        f_geo = (F.sigmoid(f_geo) - 0.5) * 2 * 800
+
+        pred = {'f_score': f_score, 'f_geo': f_geo}
+        return pred
--- a/ppocr/modeling/heads/det_sast_head.py
+++ b/ppocr/modeling/heads/det_sast_head.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class SAST_Header1(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super(SAST_Header1, self).__init__()
+        out_channels = [64, 64, 128]
+        self.score_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_score1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_score2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_score3'),
+            ConvBNLayer(out_channels[2], 1, 3, 1, act=None, name='f_score4')
+        )
+        self.border_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_border1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_border2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_border3'),
+            ConvBNLayer(out_channels[2], 4, 3, 1, act=None, name='f_border4')            
+        )
+
+    def forward(self, x):
+        f_score = self.score_conv(x)
+        f_score = F.sigmoid(f_score)
+        f_border = self.border_conv(x)
+        return f_score, f_border
+
+
+class SAST_Header2(nn.Layer):
+    def __init__(self, in_channels, **kwargs):
+        super(SAST_Header2, self).__init__()
+        out_channels = [64, 64, 128]
+        self.tvo_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tvo1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tvo2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tvo3'),
+            ConvBNLayer(out_channels[2], 8, 3, 1, act=None, name='f_tvo4')
+        )
+        self.tco_conv = nn.Sequential(
+            ConvBNLayer(in_channels, out_channels[0], 1, 1, act='relu', name='f_tco1'),
+            ConvBNLayer(out_channels[0], out_channels[1], 3, 1, act='relu', name='f_tco2'),
+            ConvBNLayer(out_channels[1], out_channels[2], 1, 1, act='relu', name='f_tco3'),
+            ConvBNLayer(out_channels[2], 2, 3, 1, act=None, name='f_tco4')            
+        )
+
+    def forward(self, x):
+        f_tvo = self.tvo_conv(x)
+        f_tco = self.tco_conv(x)
+        return f_tvo, f_tco
+
+
+class SASTHead(nn.Layer):
+    """
+    """
+    def __init__(self, in_channels, **kwargs):
+        super(SASTHead, self).__init__()
+
+        self.head1 = SAST_Header1(in_channels)
+        self.head2 = SAST_Header2(in_channels)
+
+    def forward(self, x):
+        f_score, f_border = self.head1(x)
+        f_tvo, f_tco = self.head2(x)
+
+        predicts = {}
+        predicts['f_score'] = f_score
+        predicts['f_border'] = f_border
+        predicts['f_tvo'] = f_tvo
+        predicts['f_tco'] = f_tco
+        return predicts
\ No newline at end of file
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -16,8 +16,10 @@ __all__ = ['build_neck']

 def build_neck(config):
    from .db_fpn import DBFPN
+    from .east_fpn import EASTFPN
+    from .sast_fpn import SASTFPN
    from .rnn import SequenceEncoder
-    support_dict = ['DBFPN', 'SequenceEncoder']
+    support_dict = ['DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/east_fpn.py
+++ b/ppocr/modeling/necks/east_fpn.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class EASTFPN(nn.Layer):
+    def __init__(self, in_channels, model_name, **kwargs):
+        super(EASTFPN, self).__init__()
+        self.model_name = model_name
+        if self.model_name == "large":
+            self.out_channels = 128
+        else:
+            self.out_channels = 64
+        self.in_channels = in_channels[::-1]
+        self.h1_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[1],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_1")
+        self.h2_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[2],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_2")
+        self.h3_conv = ConvBNLayer(
+            in_channels=self.out_channels+self.in_channels[3],
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_h_3")
+        self.g0_deconv = DeConvBNLayer(
+            in_channels=self.in_channels[0],
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_0")
+        self.g1_deconv = DeConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_1")
+        self.g2_deconv = DeConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=4,
+            stride=2,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_2")
+        self.g3_conv = ConvBNLayer(
+            in_channels=self.out_channels,
+            out_channels=self.out_channels,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            if_act=True,
+            act='relu',
+            name="unet_g_3")
+
+    def forward(self, x):
+        f = x[::-1]
+
+        h = f[0]
+        g = self.g0_deconv(h)
+        h = paddle.concat([g, f[1]], axis=1)
+        h = self.h1_conv(h)
+        g = self.g1_deconv(h)
+        h = paddle.concat([g, f[2]], axis=1)
+        h = self.h2_conv(h)
+        g = self.g2_deconv(h)
+        h = paddle.concat([g, f[3]], axis=1)
+        h = self.h3_conv(h)
+        g = self.g3_conv(h)
+
+        return g
\ No newline at end of file
--- a/ppocr/modeling/necks/sast_fpn.py
+++ b/ppocr/modeling/necks/sast_fpn.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(ConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+  
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
+
+class DeConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups=1,
+                 if_act=True,
+                 act=None,
+                 name=None):
+        super(DeConvBNLayer, self).__init__()
+        self.if_act = if_act
+        self.act = act
+        self.deconv = nn.Conv2DTranspose(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=(kernel_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(name=name + '_weights'),
+            bias_attr=False)
+        self.bn = nn.BatchNorm(
+            num_channels=out_channels,
+            act=act,
+            param_attr=ParamAttr(name="bn_" + name + "_scale"),
+            bias_attr=ParamAttr(name="bn_" + name + "_offset"),
+            moving_mean_name="bn_" + name + "_mean",
+            moving_variance_name="bn_" + name + "_variance")
+
+    def forward(self, x):
+        x = self.deconv(x)
+        x = self.bn(x)
+        return x
+
+
+class FPN_Up_Fusion(nn.Layer):
+    def __init__(self, in_channels):
+        super(FPN_Up_Fusion, self).__init__()
+        in_channels = in_channels[::-1]
+        out_channels = [256, 256, 192, 192, 128]
+                
+        self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0')
+        self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1')
+        self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2')
+        self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3')
+        self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4')
+
+        self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0')
+
+        self.g1_conv = nn.Sequential(
+            ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'),
+            DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2')
+        )
+        self.g2_conv = nn.Sequential(
+            ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'),
+            DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2')
+        )
+        self.g3_conv = nn.Sequential(
+            ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'),
+            DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2')
+        )
+
+        self.g4_conv = nn.Sequential(
+            ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'),
+            ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2')
+        )
+
+    def _add_relu(self, x1, x2):
+        x = paddle.add(x=x1, y=x2)
+        x = F.relu(x)
+        return x
+
+    def forward(self, x):
+        f = x[2:][::-1]
+        h0 = self.h0_conv(f[0])
+        h1 = self.h1_conv(f[1])
+        h2 = self.h2_conv(f[2])
+        h3 = self.h3_conv(f[3])
+        h4 = self.h4_conv(f[4])
+
+        g0 = self.g0_conv(h0)
+        g1 = self._add_relu(g0, h1)
+        g1 = self.g1_conv(g1)
+        g2 = self.g2_conv(self._add_relu(g1, h2))
+        g3 = self.g3_conv(self._add_relu(g2, h3))
+        g4 = self.g4_conv(self._add_relu(g3, h4))
+
+        return g4
+
+
+class FPN_Down_Fusion(nn.Layer):
+    def __init__(self, in_channels):
+        super(FPN_Down_Fusion, self).__init__()
+        out_channels = [32, 64, 128]
+
+        self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0')
+        self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1')
+        self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2')
+
+        self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0')
+
+        self.g1_conv = nn.Sequential(
+            ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'),
+            ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2')            
+        )
+
+        self.g2_conv = nn.Sequential(
+            ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'),
+            ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2')            
+        )
+
+    def forward(self, x):
+        f = x[:3]
+        h0 = self.h0_conv(f[0])
+        h1 = self.h1_conv(f[1])
+        h2 = self.h2_conv(f[2])
+        g0 = self.g0_conv(h0)
+        g1 = paddle.add(x=g0, y=h1)
+        g1 = F.relu(g1)
+        g1 = self.g1_conv(g1)
+        g2 = paddle.add(x=g1, y=h2)
+        g2 = F.relu(g2)
+        g2 = self.g2_conv(g2)
+        return g2
+
+
+class Cross_Attention(nn.Layer):
+    def __init__(self, in_channels):
+        super(Cross_Attention, self).__init__()
+        self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta')
+        self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi')
+        self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g')
+
+        self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight')
+        self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc')
+
+        self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight')
+        self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc')
+
+        self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn')
+
+    def _cal_fweight(self, f, shape):
+        f_theta, f_phi, f_g = f
+        #flatten
+        f_theta = paddle.transpose(f_theta, [0, 2, 3, 1])
+        f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128])
+        f_phi = paddle.transpose(f_phi, [0, 2, 3, 1])
+        f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128])
+        f_g = paddle.transpose(f_g, [0, 2, 3, 1])
+        f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128])
+        #correlation
+        f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1]))
+        #scale
+        f_attn = f_attn / (128**0.5)
+        f_attn = F.softmax(f_attn)
+        #weighted sum
+        f_weight = paddle.matmul(f_attn, f_g)
+        f_weight = paddle.reshape(
+            f_weight, [shape[0], shape[1], shape[2], 128])
+        return f_weight
+
+    def forward(self, f_common):
+        f_shape = paddle.shape(f_common)
+        # print('f_shape: ', f_shape)
+
+        f_theta = self.theta_conv(f_common)
+        f_phi = self.phi_conv(f_common)
+        f_g = self.g_conv(f_common)
+
+        ######## horizon ########
+        fh_weight = self._cal_fweight([f_theta, f_phi, f_g], 
+                                        [f_shape[0], f_shape[2], f_shape[3]])
+        fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2])
+        fh_weight = self.fh_weight_conv(fh_weight)
+        #short cut
+        fh_sc = self.fh_sc_conv(f_common)
+        f_h = F.relu(fh_weight + fh_sc)
+
+        ######## vertical ########
+        fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2])
+        fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2])
+        fv_g = paddle.transpose(f_g, [0, 1, 3, 2])
+        fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g], 
+                                        [f_shape[0], f_shape[3], f_shape[2]])
+        fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1])
+        fv_weight = self.fv_weight_conv(fv_weight)
+        #short cut
+        fv_sc = self.fv_sc_conv(f_common)
+        f_v = F.relu(fv_weight + fv_sc)
+
+        ######## merge ########
+        f_attn = paddle.concat([f_h, f_v], axis=1)
+        f_attn = self.f_attn_conv(f_attn)
+        return f_attn
+
+
+class SASTFPN(nn.Layer):
+    def __init__(self, in_channels, with_cab=False, **kwargs):
+        super(SASTFPN, self).__init__()
+        self.in_channels = in_channels
+        self.with_cab = with_cab
+        self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels)
+        self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels)
+        self.out_channels = 128
+        self.cross_attention = Cross_Attention(self.out_channels)
+
+    def forward(self, x):
+        #down fpn
+        f_down = self.FPN_Down_Fusion(x)
+
+        #up fpn
+        f_up = self.FPN_Up_Fusion(x)
+
+        #fusion
+        f_common = paddle.add(x=f_down, y=f_up)
+        f_common = F.relu(f_common)
+
+        if self.with_cab:
+            # print('enhence f_common with CAB.')
+            f_common = self.cross_attention(f_common)
+
+        return f_common
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -24,11 +24,13 @@ __all__ = ['build_post_process']

 def build_post_process(config, global_config=None):
    from .db_postprocess import DBPostProcess
+    from .east_postprocess import EASTPostProcess
+    from .sast_postprocess import SASTPostProcess
    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode
    from .cls_postprocess import ClsPostProcess

    support_dict = [
-        'DBPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess'
+        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess'
    ]

    config = copy.deepcopy(config)

--- a/ppocr/postprocess/east_postprocess.py
+++ b/ppocr/postprocess/east_postprocess.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from .locality_aware_nms import nms_locality
+import cv2
+
+import os
+import sys
+# __dir__ = os.path.dirname(os.path.abspath(__file__))
+# sys.path.append(__dir__)
+# sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
+
+
+class EASTPostProcess(object):
+    """
+    The post process for EAST.
+    """
+    def __init__(self,
+                 score_thresh=0.8,
+                 cover_thresh=0.1,
+                 nms_thresh=0.2,
+                 **kwargs):
+
+        self.score_thresh = score_thresh
+        self.cover_thresh = cover_thresh
+        self.nms_thresh = nms_thresh
+        
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True
+
+    def restore_rectangle_quad(self, origin, geometry):
+        """
+        Restore rectangle from quadrangle.
+        """
+        # quad
+        origin_concat = np.concatenate(
+            (origin, origin, origin, origin), axis=1)  # (n, 8)
+        pred_quads = origin_concat - geometry
+        pred_quads = pred_quads.reshape((-1, 4, 2))  # (n, 4, 2)
+        return pred_quads
+
+    def detect(self,
+               score_map,
+               geo_map,
+               score_thresh=0.8,
+               cover_thresh=0.1,
+               nms_thresh=0.2):
+        """
+        restore text boxes from score map and geo map
+        """
+        score_map = score_map[0]
+        geo_map = np.swapaxes(geo_map, 1, 0)
+        geo_map = np.swapaxes(geo_map, 1, 2)
+        # filter the score map
+        xy_text = np.argwhere(score_map > score_thresh)
+        if len(xy_text) == 0:
+            return []
+        # sort the text boxes via the y axis
+        xy_text = xy_text[np.argsort(xy_text[:, 0])]
+        #restore quad proposals
+        text_box_restored = self.restore_rectangle_quad(
+            xy_text[:, ::-1] * 4, geo_map[xy_text[:, 0], xy_text[:, 1], :])
+        boxes = np.zeros((text_box_restored.shape[0], 9), dtype=np.float32)
+        boxes[:, :8] = text_box_restored.reshape((-1, 8))
+        boxes[:, 8] = score_map[xy_text[:, 0], xy_text[:, 1]]
+        if self.is_python35:
+            import lanms
+            boxes = lanms.merge_quadrangle_n9(boxes, nms_thresh)
+        else:
+            boxes = nms_locality(boxes.astype(np.float64), nms_thresh)
+        if boxes.shape[0] == 0:
+            return []
+        # Here we filter some low score boxes by the average score map, 
+        #   this is different from the orginal paper.
+        for i, box in enumerate(boxes):
+            mask = np.zeros_like(score_map, dtype=np.uint8)
+            cv2.fillPoly(mask, box[:8].reshape(
+                (-1, 4, 2)).astype(np.int32) // 4, 1)
+            boxes[i, 8] = cv2.mean(score_map, mask)[0]
+        boxes = boxes[boxes[:, 8] > cover_thresh]
+        return boxes
+
+    def sort_poly(self, p):
+        """
+        Sort polygons.
+        """
+        min_axis = np.argmin(np.sum(p, axis=1))
+        p = p[[min_axis, (min_axis + 1) % 4,\
+            (min_axis + 2) % 4, (min_axis + 3) % 4]]
+        if abs(p[0, 0] - p[1, 0]) > abs(p[0, 1] - p[1, 1]):
+            return p
+        else:
+            return p[[0, 3, 2, 1]]
+
+    def __call__(self, outs_dict, shape_list):
+        score_list = outs_dict['f_score']
+        geo_list = outs_dict['f_geo']
+        img_num = len(shape_list)
+        dt_boxes_list = []
+        for ino in range(img_num):
+            score = score_list[ino].numpy()
+            geo = geo_list[ino].numpy()
+            boxes = self.detect(
+                score_map=score,
+                geo_map=geo,
+                score_thresh=self.score_thresh,
+                cover_thresh=self.cover_thresh,
+                nms_thresh=self.nms_thresh)
+            boxes_norm = []
+            if len(boxes) > 0:
+                h, w = score.shape[1:]
+                src_h, src_w, ratio_h, ratio_w = shape_list[ino]
+                boxes = boxes[:, :8].reshape((-1, 4, 2))
+                boxes[:, :, 0] /= ratio_w
+                boxes[:, :, 1] /= ratio_h
+                for i_box, box in enumerate(boxes):
+                    box = self.sort_poly(box.astype(np.int32))
+                    if np.linalg.norm(box[0] - box[1]) < 5 \
+                        or np.linalg.norm(box[3] - box[0]) < 5:
+                        continue
+                    boxes_norm.append(box)
+            dt_boxes_list.append({'points': np.array(boxes_norm)})
+        return dt_boxes_list
\ No newline at end of file
--- a/ppocr/postprocess/locality_aware_nms.py
+++ b/ppocr/postprocess/locality_aware_nms.py
+"""
+Locality aware nms.
+"""
+
+import numpy as np
+from shapely.geometry import Polygon
+
+
+def intersection(g, p):
+    """
+    Intersection.
+    """
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    g = g.buffer(0)
+    p = p.buffer(0)
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    union = g.area + p.area - inter
+    if union == 0:
+        return 0
+    else:
+        return inter / union
+
+
+def intersection_iog(g, p):
+    """
+    Intersection_iog.
+    """
+    g = Polygon(g[:8].reshape((4, 2)))
+    p = Polygon(p[:8].reshape((4, 2)))
+    if not g.is_valid or not p.is_valid:
+        return 0
+    inter = Polygon(g).intersection(Polygon(p)).area
+    #union = g.area + p.area - inter
+    union = p.area
+    if union == 0:
+        print("p_area is very small")
+        return 0
+    else:
+        return inter / union
+
+
+def weighted_merge(g, p):
+    """
+    Weighted merge.
+    """
+    g[:8] = (g[8] * g[:8] + p[8] * p[:8]) / (g[8] + p[8])
+    g[8] = (g[8] + p[8])
+    return g
+
+
+def standard_nms(S, thres):
+    """
+    Standard nms.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return S[keep]
+
+
+def standard_nms_inds(S, thres):
+    """
+    Standard nms, retun inds.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def nms(S, thres):
+    """
+    nms.
+    """
+    order = np.argsort(S[:, 8])[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        ovr = np.array([intersection(S[i], S[t]) for t in order[1:]])
+
+        inds = np.where(ovr <= thres)[0]
+        order = order[inds + 1]
+
+    return keep
+
+
+def soft_nms(boxes_in, Nt_thres=0.3, threshold=0.8, sigma=0.5, method=2):
+    """
+    soft_nms
+    :para boxes_in, N x 9 (coords + score)
+    :para threshould, eliminate cases min score(0.001)
+    :para Nt_thres, iou_threshi
+    :para sigma, gaussian weght
+    :method, linear or gaussian
+    """
+    boxes = boxes_in.copy()
+    N = boxes.shape[0]
+    if N is None or N < 1:
+        return np.array([])
+    pos, maxpos = 0, 0
+    weight = 0.0
+    inds = np.arange(N)
+    tbox, sbox = boxes[0].copy(), boxes[0].copy()
+    for i in range(N):
+        maxscore = boxes[i, 8]
+        maxpos = i
+        tbox = boxes[i].copy()
+        ti = inds[i]
+        pos = i + 1
+        #get max box
+        while pos < N:
+            if maxscore < boxes[pos, 8]:
+                maxscore = boxes[pos, 8]
+                maxpos = pos
+            pos = pos + 1
+        #add max box as a detection
+        boxes[i, :] = boxes[maxpos, :]
+        inds[i] = inds[maxpos]
+        #swap
+        boxes[maxpos, :] = tbox
+        inds[maxpos] = ti
+        tbox = boxes[i].copy()
+        pos = i + 1
+        #NMS iteration
+        while pos < N:
+            sbox = boxes[pos].copy()
+            ts_iou_val = intersection(tbox, sbox)
+            if ts_iou_val > 0:
+                if method == 1:
+                    if ts_iou_val > Nt_thres:
+                        weight = 1 - ts_iou_val
+                    else:
+                        weight = 1
+                elif method == 2:
+                    weight = np.exp(-1.0 * ts_iou_val**2 / sigma)
+                else:
+                    if ts_iou_val > Nt_thres:
+                        weight = 0
+                    else:
+                        weight = 1
+                boxes[pos, 8] = weight * boxes[pos, 8]
+                #if box score falls below thresold, discard the box by
+                #swaping last box update N
+                if boxes[pos, 8] < threshold:
+                    boxes[pos, :] = boxes[N - 1, :]
+                    inds[pos] = inds[N - 1]
+                    N = N - 1
+                    pos = pos - 1
+            pos = pos + 1
+
+    return boxes[:N]
+
+
+def nms_locality(polys, thres=0.3):
+    """
+    locality aware nms of EAST
+    :param polys: a N*9 numpy array. first 8 coordinates, then prob
+    :return: boxes after nms
+    """
+    S = []
+    p = None
+    for g in polys:
+        if p is not None and intersection(g, p) > thres:
+            p = weighted_merge(g, p)
+        else:
+            if p is not None:
+                S.append(p)
+            p = g
+    if p is not None:
+        S.append(p)
+
+    if len(S) == 0:
+        return np.array([])
+    return standard_nms(np.array(S), thres)
+
+
+if __name__ == '__main__':
+    # 343,350,448,135,474,143,369,359
+    print(
+        Polygon(np.array([[343, 350], [448, 135], [474, 143], [369, 359]]))
+        .area)
\ No newline at end of file
--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -27,7 +27,7 @@ class BaseRecLabelDecode(object):
            'ch', 'en', 'en_sensitive', 'french', 'german', 'japan', 'korean'
        ]
        assert character_type in support_character_type, "Only {} are supported now but get {}".format(
-            support_character_type, self.character_str)
+            support_character_type, character_type)

        if character_type == "en":
            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"

--- a/ppocr/postprocess/sast_postprocess.py
+++ b/ppocr/postprocess/sast_postprocess.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+import sys
+__dir__ = os.path.dirname(__file__)
+sys.path.append(__dir__)
+sys.path.append(os.path.join(__dir__, '..'))
+
+import numpy as np
+from .locality_aware_nms import nms_locality
+# import lanms
+import cv2
+import time
+
+
+class SASTPostProcess(object):
+    """
+    The post process for SAST.
+    """
+
+    def __init__(self,
+                 score_thresh=0.5,
+                 nms_thresh=0.2,
+                 sample_pts_num=2,
+                 shrink_ratio_of_width=0.3,
+                 expand_scale=1.0,
+                 tcl_map_thresh=0.5,
+                 **kwargs):
+
+        self.score_thresh = score_thresh
+        self.nms_thresh = nms_thresh
+        self.sample_pts_num = sample_pts_num
+        self.shrink_ratio_of_width = shrink_ratio_of_width
+        self.expand_scale = expand_scale
+        self.tcl_map_thresh = tcl_map_thresh
+        
+        # c++ la-nms is faster, but only support python 3.5
+        self.is_python35 = False
+        if sys.version_info.major == 3 and sys.version_info.minor == 5:
+            self.is_python35 = True
+            
+    def point_pair2poly(self, point_pair_list):
+        """
+        Transfer vertical point_pairs into poly point in clockwise.
+        """
+        # constract poly
+        point_num = len(point_pair_list) * 2
+        point_list = [0] * point_num
+        for idx, point_pair in enumerate(point_pair_list):
+            point_list[idx] = point_pair[0]
+            point_list[point_num - 1 - idx] = point_pair[1]
+        return np.array(point_list).reshape(-1, 2)
+    
+    def shrink_quad_along_width(self, quad, begin_width_ratio=0., end_width_ratio=1.):
+        """ 
+        Generate shrink_quad_along_width.
+        """
+        ratio_pair = np.array([[begin_width_ratio], [end_width_ratio]], dtype=np.float32)
+        p0_1 = quad[0] + (quad[1] - quad[0]) * ratio_pair
+        p3_2 = quad[3] + (quad[2] - quad[3]) * ratio_pair
+        return np.array([p0_1[0], p0_1[1], p3_2[1], p3_2[0]])
+    
+    def expand_poly_along_width(self, poly, shrink_ratio_of_width=0.3):
+        """
+        expand poly along width.
+        """
+        point_num = poly.shape[0]
+        left_quad = np.array([poly[0], poly[1], poly[-2], poly[-1]], dtype=np.float32)
+        left_ratio = -shrink_ratio_of_width * np.linalg.norm(left_quad[0] - left_quad[3]) / \
+                    (np.linalg.norm(left_quad[0] - left_quad[1]) + 1e-6)
+        left_quad_expand = self.shrink_quad_along_width(left_quad, left_ratio, 1.0)
+        right_quad = np.array([poly[point_num // 2 - 2], poly[point_num // 2 - 1],
+                            poly[point_num // 2], poly[point_num // 2 + 1]], dtype=np.float32)
+        right_ratio = 1.0 + \
+                    shrink_ratio_of_width * np.linalg.norm(right_quad[0] - right_quad[3]) / \
+                    (np.linalg.norm(right_quad[0] - right_quad[1]) + 1e-6)
+        right_quad_expand = self.shrink_quad_along_width(right_quad, 0.0, right_ratio)
+        poly[0] = left_quad_expand[0]
+        poly[-1] = left_quad_expand[-1]
+        poly[point_num // 2 - 1] = right_quad_expand[1]
+        poly[point_num // 2] = right_quad_expand[2]
+        return poly
+
+    def restore_quad(self, tcl_map, tcl_map_thresh, tvo_map):
+        """Restore quad."""
+        xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
+        xy_text = xy_text[:, ::-1] # (n, 2)
+
+        # Sort the text boxes via the y axis
+        xy_text = xy_text[np.argsort(xy_text[:, 1])]
+
+        scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0]
+        scores = scores[:, np.newaxis]
+
+        # Restore
+        point_num = int(tvo_map.shape[-1] / 2)
+        assert point_num == 4
+        tvo_map = tvo_map[xy_text[:, 1], xy_text[:, 0], :]
+        xy_text_tile = np.tile(xy_text, (1, point_num)) # (n, point_num * 2)
+        quads = xy_text_tile - tvo_map
+
+        return scores, quads, xy_text
+
+    def quad_area(self, quad):
+        """
+        compute area of a quad.
+        """
+        edge = [
+            (quad[1][0] - quad[0][0]) * (quad[1][1] + quad[0][1]),
+            (quad[2][0] - quad[1][0]) * (quad[2][1] + quad[1][1]),
+            (quad[3][0] - quad[2][0]) * (quad[3][1] + quad[2][1]),
+            (quad[0][0] - quad[3][0]) * (quad[0][1] + quad[3][1])
+        ]
+        return np.sum(edge) / 2.
+        
+    def nms(self, dets):
+        if self.is_python35:
+            import lanms
+            dets = lanms.merge_quadrangle_n9(dets, self.nms_thresh)
+        else:
+            dets = nms_locality(dets, self.nms_thresh)
+        return dets
+
+    def cluster_by_quads_tco(self, tcl_map, tcl_map_thresh, quads, tco_map):
+        """
+        Cluster pixels in tcl_map based on quads.
+        """
+        instance_count = quads.shape[0] + 1 # contain background
+        instance_label_map = np.zeros(tcl_map.shape[:2], dtype=np.int32)
+        if instance_count == 1:
+            return instance_count, instance_label_map
+
+        # predict text center
+        xy_text = np.argwhere(tcl_map[:, :, 0] > tcl_map_thresh)
+        n = xy_text.shape[0]
+        xy_text = xy_text[:, ::-1] # (n, 2)
+        tco = tco_map[xy_text[:, 1], xy_text[:, 0], :] # (n, 2)
+        pred_tc = xy_text - tco
+        
+        # get gt text center
+        m = quads.shape[0]
+        gt_tc = np.mean(quads, axis=1) # (m, 2)
+
+        pred_tc_tile = np.tile(pred_tc[:, np.newaxis, :], (1, m, 1)) # (n, m, 2)
+        gt_tc_tile = np.tile(gt_tc[np.newaxis, :, :], (n, 1, 1)) # (n, m, 2)
+        dist_mat = np.linalg.norm(pred_tc_tile - gt_tc_tile, axis=2) # (n, m)
+        xy_text_assign = np.argmin(dist_mat, axis=1) + 1 # (n,)
+
+        instance_label_map[xy_text[:, 1], xy_text[:, 0]] = xy_text_assign
+        return instance_count, instance_label_map
+
+    def estimate_sample_pts_num(self, quad, xy_text):
+        """
+        Estimate sample points number.
+        """
+        eh = (np.linalg.norm(quad[0] - quad[3]) + np.linalg.norm(quad[1] - quad[2])) / 2.0
+        ew = (np.linalg.norm(quad[0] - quad[1]) + np.linalg.norm(quad[2] - quad[3])) / 2.0
+
+        dense_sample_pts_num = max(2, int(ew))
+        dense_xy_center_line = xy_text[np.linspace(0, xy_text.shape[0] - 1, dense_sample_pts_num,
+                                                endpoint=True, dtype=np.float32).astype(np.int32)]
+
+        dense_xy_center_line_diff = dense_xy_center_line[1:] - dense_xy_center_line[:-1]
+        estimate_arc_len = np.sum(np.linalg.norm(dense_xy_center_line_diff, axis=1))
+
+        sample_pts_num = max(2, int(estimate_arc_len / eh))
+        return sample_pts_num
+
+    def detect_sast(self, tcl_map, tvo_map, tbo_map, tco_map, ratio_w, ratio_h, src_w, src_h, 
+                shrink_ratio_of_width=0.3, tcl_map_thresh=0.5, offset_expand=1.0, out_strid=4.0):
+        """
+        first resize the tcl_map, tvo_map and tbo_map to the input_size, then restore the polys
+        """
+        # restore quad
+        scores, quads, xy_text = self.restore_quad(tcl_map, tcl_map_thresh, tvo_map)
+        dets = np.hstack((quads, scores)).astype(np.float32, copy=False)
+        dets = self.nms(dets)
+        if dets.shape[0] == 0:
+            return []
+        quads = dets[:, :-1].reshape(-1, 4, 2)
+
+        # Compute quad area
+        quad_areas = []
+        for quad in quads:
+            quad_areas.append(-self.quad_area(quad))
+
+        # instance segmentation
+        # instance_count, instance_label_map = cv2.connectedComponents(tcl_map.astype(np.uint8), connectivity=8)
+        instance_count, instance_label_map = self.cluster_by_quads_tco(tcl_map, tcl_map_thresh, quads, tco_map)
+
+        # restore single poly with tcl instance.
+        poly_list = []
+        for instance_idx in range(1, instance_count):
+            xy_text = np.argwhere(instance_label_map == instance_idx)[:, ::-1]
+            quad = quads[instance_idx - 1]
+            q_area = quad_areas[instance_idx - 1]
+            if q_area < 5:
+                continue
+            
+            #
+            len1 = float(np.linalg.norm(quad[0] -quad[1]))
+            len2 = float(np.linalg.norm(quad[1] -quad[2]))
+            min_len = min(len1, len2)
+            if min_len < 3:
+                continue
+
+            # filter small CC
+            if xy_text.shape[0] <= 0:
+                continue
+
+            # filter low confidence instance
+            xy_text_scores = tcl_map[xy_text[:, 1], xy_text[:, 0], 0] 
+            if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.1:
+            # if np.sum(xy_text_scores) / quad_areas[instance_idx - 1] < 0.05:
+                continue
+
+            # sort xy_text
+            left_center_pt = np.array([[(quad[0, 0] + quad[-1, 0]) / 2.0,
+                                        (quad[0, 1] + quad[-1, 1]) / 2.0]]) # (1, 2)
+            right_center_pt = np.array([[(quad[1, 0] + quad[2, 0]) / 2.0,
+                                        (quad[1, 1] + quad[2, 1]) / 2.0]]) # (1, 2)
+            proj_unit_vec = (right_center_pt - left_center_pt) / \
+                            (np.linalg.norm(right_center_pt - left_center_pt) + 1e-6)
+            proj_value = np.sum(xy_text * proj_unit_vec, axis=1)
+            xy_text = xy_text[np.argsort(proj_value)]
+
+            # Sample pts in tcl map
+            if self.sample_pts_num == 0:
+                sample_pts_num = self.estimate_sample_pts_num(quad, xy_text)
+            else:
+                sample_pts_num = self.sample_pts_num
+            xy_center_line = xy_text[np.linspace(0, xy_text.shape[0] - 1, sample_pts_num,
+                                                endpoint=True, dtype=np.float32).astype(np.int32)]
+
+            point_pair_list = []
+            for x, y in xy_center_line:
+                # get corresponding offset
+                offset = tbo_map[y, x, :].reshape(2, 2)
+                if offset_expand != 1.0:
+                    offset_length = np.linalg.norm(offset, axis=1, keepdims=True)
+                    expand_length = np.clip(offset_length * (offset_expand - 1), a_min=0.5, a_max=3.0)
+                    offset_detal = offset / offset_length * expand_length
+                    offset = offset + offset_detal                
+                # original point
+                ori_yx = np.array([y, x], dtype=np.float32)
+                point_pair = (ori_yx +  offset)[:, ::-1]* out_strid / np.array([ratio_w, ratio_h]).reshape(-1, 2) 
+                point_pair_list.append(point_pair)
+
+            # ndarry: (x, 2), expand poly along width
+            detected_poly = self.point_pair2poly(point_pair_list)
+            detected_poly = self.expand_poly_along_width(detected_poly, shrink_ratio_of_width)
+            detected_poly[:, 0] = np.clip(detected_poly[:, 0], a_min=0, a_max=src_w)
+            detected_poly[:, 1] = np.clip(detected_poly[:, 1], a_min=0, a_max=src_h)
+            poly_list.append(detected_poly)
+
+        return poly_list
+
+    def __call__(self, outs_dict, shape_list):                
+        score_list = outs_dict['f_score']
+        border_list = outs_dict['f_border']
+        tvo_list = outs_dict['f_tvo']
+        tco_list = outs_dict['f_tco']
+                    
+        img_num = len(shape_list)
+        poly_lists = []
+        for ino in range(img_num):
+            p_score = score_list[ino].transpose((1,2,0)).numpy()
+            p_border = border_list[ino].transpose((1,2,0)).numpy()
+            p_tvo = tvo_list[ino].transpose((1,2,0)).numpy()
+            p_tco = tco_list[ino].transpose((1,2,0)).numpy()
+            src_h, src_w, ratio_h, ratio_w = shape_list[ino]
+
+            poly_list = self.detect_sast(p_score, p_tvo, p_border, p_tco, ratio_w, ratio_h, src_w, src_h, 
+                                         shrink_ratio_of_width=self.shrink_ratio_of_width, 
+                                         tcl_map_thresh=self.tcl_map_thresh, offset_expand=self.expand_scale)
+            poly_lists.append({'points': np.array(poly_list)})
+
+        return poly_lists
+
--- a/ppocr/utils/character.py
+++ b/ppocr/utils/character.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import numpy as np
-import string
-import re
-from .check import check_config_params
-import sys
-
-
-class CharacterOps(object):
-    """ Convert between text-label and text-index """
-
-    def __init__(self, config):
-        self.character_type = config['character_type']
-        self.loss_type = config['loss_type']
-        self.max_text_len = config['max_text_length']
-        if self.character_type == "en":
-            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
-            dict_character = list(self.character_str)
-        elif self.character_type == "ch":
-            character_dict_path = config['character_dict_path']
-            add_space = False
-            if 'use_space_char' in config:
-                add_space = config['use_space_char']
-            self.character_str = ""
-            with open(character_dict_path, "rb") as fin:
-                lines = fin.readlines()
-                for line in lines:
-                    line = line.decode('utf-8').strip("\n").strip("\r\n")
-                    self.character_str += line
-            if add_space:
-                self.character_str += " "
-            dict_character = list(self.character_str)
-        elif self.character_type == "en_sensitive":
-            # same with ASTER setting (use 94 char).
-            self.character_str = string.printable[:-6]
-            dict_character = list(self.character_str)
-        else:
-            self.character_str = None
-        assert self.character_str is not None, \
-            "Nonsupport type of the character: {}".format(self.character_str)
-        self.beg_str = "sos"
-        self.end_str = "eos"
-        if self.loss_type == "attention":
-            dict_character = [self.beg_str, self.end_str] + dict_character
-        elif self.loss_type == "srn":
-            dict_character = dict_character + [self.beg_str, self.end_str]
-        self.dict = {}
-        for i, char in enumerate(dict_character):
-            self.dict[char] = i
-        self.character = dict_character
-
-    def encode(self, text):
-        """convert text-label into text-index.
-        input:
-            text: text labels of each image. [batch_size]
-
-        output:
-            text: concatenated text index for CTCLoss.
-                    [sum(text_lengths)] = [text_index_0 + text_index_1 + ... + text_index_(n - 1)]
-            length: length of each text. [batch_size]
-        """
-        if self.character_type == "en":
-            text = text.lower()
-
-        text_list = []
-        for char in text:
-            if char not in self.dict:
-                continue
-            text_list.append(self.dict[char])
-        text = np.array(text_list)
-        return text
-
-    def decode(self, text_index, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        char_list = []
-        char_num = self.get_char_num()
-
-        if self.loss_type == "attention":
-            beg_idx = self.get_beg_end_flag_idx("beg")
-            end_idx = self.get_beg_end_flag_idx("end")
-            ignored_tokens = [beg_idx, end_idx]
-        else:
-            ignored_tokens = [char_num]
-
-        for idx in range(len(text_index)):
-            if text_index[idx] in ignored_tokens:
-                continue
-            if is_remove_duplicate:
-                if idx > 0 and text_index[idx - 1] == text_index[idx]:
-                    continue
-            char_list.append(self.character[int(text_index[idx])])
-        text = ''.join(char_list)
-        return text
-
-    def get_char_num(self):
-        return len(self.character)
-
-    def get_beg_end_flag_idx(self, beg_or_end):
-        if self.loss_type == "attention":
-            if beg_or_end == "beg":
-                idx = np.array(self.dict[self.beg_str])
-            elif beg_or_end == "end":
-                idx = np.array(self.dict[self.end_str])
-            else:
-                assert False, "Unsupport type %s in get_beg_end_flag_idx"\
-                    % beg_or_end
-            return idx
-        else:
-            err = "error in get_beg_end_flag_idx when using the loss %s"\
-                % (self.loss_type)
-            assert False, err
-
-
-def cal_predicts_accuracy(char_ops,
-                          preds,
-                          preds_lod,
-                          labels,
-                          labels_lod,
-                          is_remove_duplicate=False):
-    acc_num = 0
-    img_num = 0
-    for ino in range(len(labels_lod) - 1):
-        beg_no = preds_lod[ino]
-        end_no = preds_lod[ino + 1]
-        preds_text = preds[beg_no:end_no].reshape(-1)
-        preds_text = char_ops.decode(preds_text, is_remove_duplicate)
-
-        beg_no = labels_lod[ino]
-        end_no = labels_lod[ino + 1]
-        labels_text = labels[beg_no:end_no].reshape(-1)
-        labels_text = char_ops.decode(labels_text, is_remove_duplicate)
-        img_num += 1
-
-        if preds_text == labels_text:
-            acc_num += 1
-    acc = acc_num * 1.0 / img_num
-    return acc, acc_num, img_num
-
-
-def cal_predicts_accuracy_srn(char_ops,
-                              preds,
-                              labels,
-                              max_text_len,
-                              is_debug=False):
-    acc_num = 0
-    img_num = 0
-
-    char_num = char_ops.get_char_num()
-
-    total_len = preds.shape[0]
-    img_num = int(total_len / max_text_len)
-    for i in range(img_num):
-        cur_label = []
-        cur_pred = []
-        for j in range(max_text_len):
-            if labels[j + i * max_text_len] != int(char_num-1):  #0
-                cur_label.append(labels[j + i * max_text_len][0])
-            else:
-                break
-
-        for j in range(max_text_len + 1):
-            if j < len(cur_label) and preds[j + i * max_text_len][
-                    0] != cur_label[j]:
-                break
-            elif j == len(cur_label) and j == max_text_len:
-                acc_num += 1
-                break
-            elif j == len(cur_label) and preds[j + i * max_text_len][0] == int(char_num-1):
-                acc_num += 1
-                break
-    acc = acc_num * 1.0 / img_num
-    return acc, acc_num, img_num
-
-
-def convert_rec_attention_infer_res(preds):
-    img_num = preds.shape[0]
-    target_lod = [0]
-    convert_ids = []
-    for ino in range(img_num):
-        end_pos = np.where(preds[ino, :] == 1)[0]
-        if len(end_pos) <= 1:
-            text_list = preds[ino, 1:]
-        else:
-            text_list = preds[ino, 1:end_pos[1]]
-        target_lod.append(target_lod[ino] + len(text_list))
-        convert_ids = convert_ids + list(text_list)
-    convert_ids = np.array(convert_ids)
-    convert_ids = convert_ids.reshape((-1, 1))
-    return convert_ids, target_lod
-
-
-def convert_rec_label_to_lod(ori_labels):
-    img_num = len(ori_labels)
-    target_lod = [0]
-    convert_ids = []
-    for ino in range(img_num):
-        target_lod.append(target_lod[ino] + len(ori_labels[ino]))
-        convert_ids = convert_ids + list(ori_labels[ino])
-    convert_ids = np.array(convert_ids)
-    convert_ids = convert_ids.reshape((-1, 1))
-    return convert_ids, target_lod
--- a/ppocr/utils/check.py
+++ b/ppocr/utils/check.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import sys
-
-import logging
-logger = logging.getLogger(__name__)
-
-
-def check_config_params(config, config_name, params):
-    for param in params:
-        if param not in config:
-            err = "param %s didn't find in %s!" % (param, config_name)
-            assert False, err
-    return
--- a/tools/infer/utility.py
+++ b/tools/infer/utility.py
@@ -230,10 +230,10 @@ def draw_ocr_box_txt(image,
                box[2][1], box[3][0], box[3][1]
            ],
            outline=color)
-        box_height = math.sqrt((box[0][0] - box[3][0]) ** 2 + (box[0][1] - box[3][
-            1]) ** 2)
-        box_width = math.sqrt((box[0][0] - box[1][0]) ** 2 + (box[0][1] - box[1][
-            1]) ** 2)
+        box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
+            1])**2)
+        box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
+            1])**2)
        if box_height > 2 * box_width:
            font_size = max(int(box_width * 0.9), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
@@ -260,7 +260,6 @@ def str_count(s):
    Count the number of Chinese characters,
    a single English character and a single number
    equal to half the length of Chinese characters.
-
    args:
        s(string): the input of string
    return(int):
@@ -295,7 +294,6 @@ def text_visual(texts,
        img_w(int): the width of blank img
        font_path: the path of font which is used to draw text
    return(array):
-
    """
    if scores is not None:
        assert len(texts) == len(