Merge remote-tracking branch 'Evezerest/dygraph' into dygraph

71d37bab · Leif · 8e32ef41 · fbb68c38 · 71d37bab · 71d37bab
Commit 71d37bab authored Mar 23, 2022 by Leif
20 changed files
--- a/ppocr/modeling/backbones/rec_efficientb3_pren.py
+++ b/ppocr/modeling/backbones/rec_efficientb3_pren.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Code is refer from:
+https://github.com/RuijieJ/pren/blob/main/Nets/EfficientNet.py
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+from collections import namedtuple
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+__all__ = ['EfficientNetb3']
+
+
+class EffB3Params:
+    @staticmethod
+    def get_global_params():
+        """
+        The fllowing are efficientnetb3's arch superparams, but to fit for scene 
+        text recognition task, the resolution(image_size) here is changed 
+        from 300 to 64.
+        """
+        GlobalParams = namedtuple('GlobalParams', [
+            'drop_connect_rate', 'width_coefficient', 'depth_coefficient',
+            'depth_divisor', 'image_size'
+        ])
+        global_params = GlobalParams(
+            drop_connect_rate=0.3,
+            width_coefficient=1.2,
+            depth_coefficient=1.4,
+            depth_divisor=8,
+            image_size=64)
+        return global_params
+
+    @staticmethod
+    def get_block_params():
+        BlockParams = namedtuple('BlockParams', [
+            'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
+            'expand_ratio', 'id_skip', 'se_ratio', 'stride'
+        ])
+        block_params = [
+            BlockParams(3, 1, 32, 16, 1, True, 0.25, 1),
+            BlockParams(3, 2, 16, 24, 6, True, 0.25, 2),
+            BlockParams(5, 2, 24, 40, 6, True, 0.25, 2),
+            BlockParams(3, 3, 40, 80, 6, True, 0.25, 2),
+            BlockParams(5, 3, 80, 112, 6, True, 0.25, 1),
+            BlockParams(5, 4, 112, 192, 6, True, 0.25, 2),
+            BlockParams(3, 1, 192, 320, 6, True, 0.25, 1)
+        ]
+        return block_params
+
+
+class EffUtils:
+    @staticmethod
+    def round_filters(filters, global_params):
+        """Calculate and round number of filters based on depth multiplier."""
+        multiplier = global_params.width_coefficient
+        if not multiplier:
+            return filters
+        divisor = global_params.depth_divisor
+        filters *= multiplier
+        new_filters = int(filters + divisor / 2) // divisor * divisor
+        if new_filters < 0.9 * filters:
+            new_filters += divisor
+        return int(new_filters)
+
+    @staticmethod
+    def round_repeats(repeats, global_params):
+        """Round number of filters based on depth multiplier."""
+        multiplier = global_params.depth_coefficient
+        if not multiplier:
+            return repeats
+        return int(math.ceil(multiplier * repeats))
+
+
+class ConvBlock(nn.Layer):
+    def __init__(self, block_params):
+        super(ConvBlock, self).__init__()
+        self.block_args = block_params
+        self.has_se = (self.block_args.se_ratio is not None) and \
+            (0 < self.block_args.se_ratio <= 1)
+        self.id_skip = block_params.id_skip
+
+        # expansion phase
+        self.input_filters = self.block_args.input_filters
+        output_filters = \
+            self.block_args.input_filters * self.block_args.expand_ratio
+        if self.block_args.expand_ratio != 1:
+            self.expand_conv = nn.Conv2D(
+                self.input_filters, output_filters, 1, bias_attr=False)
+            self.bn0 = nn.BatchNorm(output_filters)
+
+        # depthwise conv phase
+        k = self.block_args.kernel_size
+        s = self.block_args.stride
+        self.depthwise_conv = nn.Conv2D(
+            output_filters,
+            output_filters,
+            groups=output_filters,
+            kernel_size=k,
+            stride=s,
+            padding='same',
+            bias_attr=False)
+        self.bn1 = nn.BatchNorm(output_filters)
+
+        # squeeze and excitation layer, if desired
+        if self.has_se:
+            num_squeezed_channels = max(1,
+                                        int(self.block_args.input_filters *
+                                            self.block_args.se_ratio))
+            self.se_reduce = nn.Conv2D(output_filters, num_squeezed_channels, 1)
+            self.se_expand = nn.Conv2D(num_squeezed_channels, output_filters, 1)
+
+        # output phase
+        self.final_oup = self.block_args.output_filters
+        self.project_conv = nn.Conv2D(
+            output_filters, self.final_oup, 1, bias_attr=False)
+        self.bn2 = nn.BatchNorm(self.final_oup)
+        self.swish = nn.Swish()
+
+    def drop_connect(self, inputs, p, training):
+        if not training:
+            return inputs
+
+        batch_size = inputs.shape[0]
+        keep_prob = 1 - p
+        random_tensor = keep_prob
+        random_tensor += paddle.rand([batch_size, 1, 1, 1], dtype=inputs.dtype)
+        random_tensor = paddle.to_tensor(random_tensor, place=inputs.place)
+        binary_tensor = paddle.floor(random_tensor)
+        output = inputs / keep_prob * binary_tensor
+        return output
+
+    def forward(self, inputs, drop_connect_rate=None):
+        # expansion and depthwise conv
+        x = inputs
+        if self.block_args.expand_ratio != 1:
+            x = self.swish(self.bn0(self.expand_conv(inputs)))
+        x = self.swish(self.bn1(self.depthwise_conv(x)))
+
+        # squeeze and excitation
+        if self.has_se:
+            x_squeezed = F.adaptive_avg_pool2d(x, 1)
+            x_squeezed = self.se_expand(self.swish(self.se_reduce(x_squeezed)))
+            x = F.sigmoid(x_squeezed) * x
+        x = self.bn2(self.project_conv(x))
+
+        # skip conntection and drop connect
+        if self.id_skip and self.block_args.stride == 1 and \
+            self.input_filters == self.final_oup:
+            if drop_connect_rate:
+                x = self.drop_connect(
+                    x, p=drop_connect_rate, training=self.training)
+            x = x + inputs
+        return x
+
+
+class EfficientNetb3_PREN(nn.Layer):
+    def __init__(self, in_channels):
+        super(EfficientNetb3_PREN, self).__init__()
+        self.blocks_params = EffB3Params.get_block_params()
+        self.global_params = EffB3Params.get_global_params()
+        self.out_channels = []
+        # stem
+        stem_channels = EffUtils.round_filters(32, self.global_params)
+        self.conv_stem = nn.Conv2D(
+            in_channels, stem_channels, 3, 2, padding='same', bias_attr=False)
+        self.bn0 = nn.BatchNorm(stem_channels)
+
+        self.blocks = []
+        # to extract three feature maps for fpn based on efficientnetb3 backbone
+        self.concerned_block_idxes = [7, 17, 25]
+        concerned_idx = 0
+        for i, block_params in enumerate(self.blocks_params):
+            block_params = block_params._replace(
+                input_filters=EffUtils.round_filters(block_params.input_filters,
+                                                     self.global_params),
+                output_filters=EffUtils.round_filters(
+                    block_params.output_filters, self.global_params),
+                num_repeat=EffUtils.round_repeats(block_params.num_repeat,
+                                                  self.global_params))
+            self.blocks.append(
+                self.add_sublayer("{}-0".format(i), ConvBlock(block_params)))
+            concerned_idx += 1
+            if concerned_idx in self.concerned_block_idxes:
+                self.out_channels.append(block_params.output_filters)
+            if block_params.num_repeat > 1:
+                block_params = block_params._replace(
+                    input_filters=block_params.output_filters, stride=1)
+            for j in range(block_params.num_repeat - 1):
+                self.blocks.append(
+                    self.add_sublayer('{}-{}'.format(i, j + 1),
+                                      ConvBlock(block_params)))
+                concerned_idx += 1
+                if concerned_idx in self.concerned_block_idxes:
+                    self.out_channels.append(block_params.output_filters)
+
+        self.swish = nn.Swish()
+
+    def forward(self, inputs):
+        outs = []
+        
+        x = self.swish(self.bn0(self.conv_stem(inputs)))
+        for idx, block in enumerate(self.blocks):
+            drop_connect_rate = self.global_params.drop_connect_rate
+            if drop_connect_rate:
+                drop_connect_rate *= float(idx) / len(self.blocks)
+            x = block(x, drop_connect_rate=drop_connect_rate)
+            if idx in self.concerned_block_idxes:
+                outs.append(x)
+        return outs
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -21,6 +21,7 @@ def build_head(config):
    from .det_east_head import EASTHead
    from .det_sast_head import SASTHead
    from .det_pse_head import PSEHead
+    from .det_fce_head import FCEHead
    from .e2e_pg_head import PGHead

    # rec head
@@ -30,6 +31,7 @@ def build_head(config):
    from .rec_nrtr_head import Transformer
    from .rec_sar_head import SARHead
    from .rec_aster_head import AsterHead
+    from .rec_pren_head import PRENHead

    # cls head
    from .cls_head import ClsHead
@@ -40,9 +42,9 @@ def build_head(config):
    from .table_att_head import TableAttentionHead

    support_dict = [
-        'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead',
-        'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
-        'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead'
+        'DBHead', 'PSEHead', 'FCEHead', 'EASTHead', 'SASTHead', 'CTCHead',
+        'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
+        'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead'
    ]

    #table head

--- a/ppocr/modeling/heads/det_fce_head.py
+++ b/ppocr/modeling/heads/det_fce_head.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/dense_heads/fce_head.py
+"""
+
+from paddle import nn
+from paddle import ParamAttr
+import paddle.nn.functional as F
+from paddle.nn.initializer import Normal
+import paddle
+from functools import partial
+
+
+def multi_apply(func, *args, **kwargs):
+    pfunc = partial(func, **kwargs) if kwargs else func
+    map_results = map(pfunc, *args)
+    return tuple(map(list, zip(*map_results)))
+
+
+class FCEHead(nn.Layer):
+    """The class for implementing FCENet head.
+    FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped Text
+    Detection.
+
+    [https://arxiv.org/abs/2104.10442]
+
+    Args:
+        in_channels (int): The number of input channels.
+        scales (list[int]) : The scale of each layer.
+        fourier_degree (int) : The maximum Fourier transform degree k.
+    """
+
+    def __init__(self, in_channels, fourier_degree=5):
+        super().__init__()
+        assert isinstance(in_channels, int)
+
+        self.downsample_ratio = 1.0
+        self.in_channels = in_channels
+        self.fourier_degree = fourier_degree
+        self.out_channels_cls = 4
+        self.out_channels_reg = (2 * self.fourier_degree + 1) * 2
+
+        self.out_conv_cls = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels_cls,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(
+                name='cls_weights',
+                initializer=Normal(
+                    mean=paddle.to_tensor(0.), std=paddle.to_tensor(0.01))),
+            bias_attr=True)
+        self.out_conv_reg = nn.Conv2D(
+            in_channels=self.in_channels,
+            out_channels=self.out_channels_reg,
+            kernel_size=3,
+            stride=1,
+            padding=1,
+            groups=1,
+            weight_attr=ParamAttr(
+                name='reg_weights',
+                initializer=Normal(
+                    mean=paddle.to_tensor(0.), std=paddle.to_tensor(0.01))),
+            bias_attr=True)
+
+    def forward(self, feats, targets=None):
+        cls_res, reg_res = multi_apply(self.forward_single, feats)
+        level_num = len(cls_res)
+        outs = {}
+        if not self.training:
+            for i in range(level_num):
+                tr_pred = F.softmax(cls_res[i][:, 0:2, :, :], axis=1)
+                tcl_pred = F.softmax(cls_res[i][:, 2:, :, :], axis=1)
+                outs['level_{}'.format(i)] = paddle.concat(
+                    [tr_pred, tcl_pred, reg_res[i]], axis=1)
+        else:
+            preds = [[cls_res[i], reg_res[i]] for i in range(level_num)]
+            outs['levels'] = preds
+        return outs
+
+    def forward_single(self, x):
+        cls_predict = self.out_conv_cls(x)
+        reg_predict = self.out_conv_reg(x)
+        return cls_predict, reg_predict
--- a/ppocr/modeling/heads/rec_pren_head.py
+++ b/ppocr/modeling/heads/rec_pren_head.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+from paddle.nn import functional as F
+
+
+class PRENHead(nn.Layer):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(PRENHead, self).__init__()
+        self.linear = nn.Linear(in_channels, out_channels)
+
+    def forward(self, x, targets=None):
+        predicts = self.linear(x)
+
+        if not self.training:
+            predicts = F.softmax(predicts, axis=2)
+
+        return predicts
--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -23,7 +23,12 @@ def build_neck(config):
    from .pg_fpn import PGFPN
    from .table_fpn import TableFPN
    from .fpn import FPN
-    support_dict = ['FPN','DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder', 'PGFPN', 'TableFPN']
+    from .fce_fpn import FCEFPN
+    from .pren_fpn import PRENFPN
+    support_dict = [
+        'FPN', 'FCEFPN', 'DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder',
+        'PGFPN', 'TableFPN', 'PRENFPN'
+    ]

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/fce_fpn.py
+++ b/ppocr/modeling/necks/fce_fpn.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
+"""
+
+import paddle.nn as nn
+import paddle.nn.functional as F
+from paddle import ParamAttr
+from paddle.nn.initializer import XavierUniform
+from paddle.nn.initializer import Normal
+from paddle.regularizer import L2Decay
+
+__all__ = ['FCEFPN']
+
+
+class ConvNormLayer(nn.Layer):
+    def __init__(self,
+                 ch_in,
+                 ch_out,
+                 filter_size,
+                 stride,
+                 groups=1,
+                 norm_type='bn',
+                 norm_decay=0.,
+                 norm_groups=32,
+                 lr_scale=1.,
+                 freeze_norm=False,
+                 initializer=Normal(
+                     mean=0., std=0.01)):
+        super(ConvNormLayer, self).__init__()
+        assert norm_type in ['bn', 'sync_bn', 'gn']
+
+        bias_attr = False
+
+        self.conv = nn.Conv2D(
+            in_channels=ch_in,
+            out_channels=ch_out,
+            kernel_size=filter_size,
+            stride=stride,
+            padding=(filter_size - 1) // 2,
+            groups=groups,
+            weight_attr=ParamAttr(
+                initializer=initializer, learning_rate=1.),
+            bias_attr=bias_attr)
+
+        norm_lr = 0. if freeze_norm else 1.
+        param_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
+        bias_attr = ParamAttr(
+            learning_rate=norm_lr,
+            regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
+        if norm_type == 'bn':
+            self.norm = nn.BatchNorm2D(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        elif norm_type == 'sync_bn':
+            self.norm = nn.SyncBatchNorm(
+                ch_out, weight_attr=param_attr, bias_attr=bias_attr)
+        elif norm_type == 'gn':
+            self.norm = nn.GroupNorm(
+                num_groups=norm_groups,
+                num_channels=ch_out,
+                weight_attr=param_attr,
+                bias_attr=bias_attr)
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+        return out
+
+
+class FCEFPN(nn.Layer):
+    """
+    Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
+    Args:
+        in_channels (list[int]): input channels of each level which can be 
+            derived from the output shape of backbone by from_config
+        out_channels (list[int]): output channel of each level
+        spatial_scales (list[float]): the spatial scales between input feature
+            maps and original input image which can be derived from the output 
+            shape of backbone by from_config
+        has_extra_convs (bool): whether to add extra conv to the last level.
+            default False
+        extra_stage (int): the number of extra stages added to the last level.
+            default 1
+        use_c5 (bool): Whether to use c5 as the input of extra stage, 
+            otherwise p5 is used. default True
+        norm_type (string|None): The normalization type in FPN module. If 
+            norm_type is None, norm will not be used after conv and if 
+            norm_type is string, bn, gn, sync_bn are available. default None
+        norm_decay (float): weight decay for normalization layer weights.
+            default 0.
+        freeze_norm (bool): whether to freeze normalization layer.  
+            default False
+        relu_before_extra_convs (bool): whether to add relu before extra convs.
+            default False
+        
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
+                 has_extra_convs=False,
+                 extra_stage=1,
+                 use_c5=True,
+                 norm_type=None,
+                 norm_decay=0.,
+                 freeze_norm=False,
+                 relu_before_extra_convs=True):
+        super(FCEFPN, self).__init__()
+        self.out_channels = out_channels
+        for s in range(extra_stage):
+            spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
+        self.spatial_scales = spatial_scales
+        self.has_extra_convs = has_extra_convs
+        self.extra_stage = extra_stage
+        self.use_c5 = use_c5
+        self.relu_before_extra_convs = relu_before_extra_convs
+        self.norm_type = norm_type
+        self.norm_decay = norm_decay
+        self.freeze_norm = freeze_norm
+
+        self.lateral_convs = []
+        self.fpn_convs = []
+        fan = out_channels * 3 * 3
+
+        # stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
+        # 0 <= st_stage < ed_stage <= 3
+        st_stage = 4 - len(in_channels)
+        ed_stage = st_stage + len(in_channels) - 1
+        for i in range(st_stage, ed_stage + 1):
+            if i == 3:
+                lateral_name = 'fpn_inner_res5_sum'
+            else:
+                lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
+            in_c = in_channels[i - st_stage]
+            if self.norm_type is not None:
+                lateral = self.add_sublayer(
+                    lateral_name,
+                    ConvNormLayer(
+                        ch_in=in_c,
+                        ch_out=out_channels,
+                        filter_size=1,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        norm_decay=self.norm_decay,
+                        freeze_norm=self.freeze_norm,
+                        initializer=XavierUniform(fan_out=in_c)))
+            else:
+                lateral = self.add_sublayer(
+                    lateral_name,
+                    nn.Conv2D(
+                        in_channels=in_c,
+                        out_channels=out_channels,
+                        kernel_size=1,
+                        weight_attr=ParamAttr(
+                            initializer=XavierUniform(fan_out=in_c))))
+            self.lateral_convs.append(lateral)
+
+        for i in range(st_stage, ed_stage + 1):
+            fpn_name = 'fpn_res{}_sum'.format(i + 2)
+            if self.norm_type is not None:
+                fpn_conv = self.add_sublayer(
+                    fpn_name,
+                    ConvNormLayer(
+                        ch_in=out_channels,
+                        ch_out=out_channels,
+                        filter_size=3,
+                        stride=1,
+                        norm_type=self.norm_type,
+                        norm_decay=self.norm_decay,
+                        freeze_norm=self.freeze_norm,
+                        initializer=XavierUniform(fan_out=fan)))
+            else:
+                fpn_conv = self.add_sublayer(
+                    fpn_name,
+                    nn.Conv2D(
+                        in_channels=out_channels,
+                        out_channels=out_channels,
+                        kernel_size=3,
+                        padding=1,
+                        weight_attr=ParamAttr(
+                            initializer=XavierUniform(fan_out=fan))))
+            self.fpn_convs.append(fpn_conv)
+
+        # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
+        if self.has_extra_convs:
+            for i in range(self.extra_stage):
+                lvl = ed_stage + 1 + i
+                if i == 0 and self.use_c5:
+                    in_c = in_channels[-1]
+                else:
+                    in_c = out_channels
+                extra_fpn_name = 'fpn_{}'.format(lvl + 2)
+                if self.norm_type is not None:
+                    extra_fpn_conv = self.add_sublayer(
+                        extra_fpn_name,
+                        ConvNormLayer(
+                            ch_in=in_c,
+                            ch_out=out_channels,
+                            filter_size=3,
+                            stride=2,
+                            norm_type=self.norm_type,
+                            norm_decay=self.norm_decay,
+                            freeze_norm=self.freeze_norm,
+                            initializer=XavierUniform(fan_out=fan)))
+                else:
+                    extra_fpn_conv = self.add_sublayer(
+                        extra_fpn_name,
+                        nn.Conv2D(
+                            in_channels=in_c,
+                            out_channels=out_channels,
+                            kernel_size=3,
+                            stride=2,
+                            padding=1,
+                            weight_attr=ParamAttr(
+                                initializer=XavierUniform(fan_out=fan))))
+                self.fpn_convs.append(extra_fpn_conv)
+
+    @classmethod
+    def from_config(cls, cfg, input_shape):
+        return {
+            'in_channels': [i.channels for i in input_shape],
+            'spatial_scales': [1.0 / i.stride for i in input_shape],
+        }
+
+    def forward(self, body_feats):
+        laterals = []
+        num_levels = len(body_feats)
+
+        for i in range(num_levels):
+            laterals.append(self.lateral_convs[i](body_feats[i]))
+
+        for i in range(1, num_levels):
+            lvl = num_levels - i
+            upsample = F.interpolate(
+                laterals[lvl],
+                scale_factor=2.,
+                mode='nearest', )
+            laterals[lvl - 1] += upsample
+
+        fpn_output = []
+        for lvl in range(num_levels):
+            fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
+
+        if self.extra_stage > 0:
+            # use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
+            if not self.has_extra_convs:
+                assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
+                fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
+            # add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
+            else:
+                if self.use_c5:
+                    extra_source = body_feats[-1]
+                else:
+                    extra_source = fpn_output[-1]
+                fpn_output.append(self.fpn_convs[num_levels](extra_source))
+
+                for i in range(1, self.extra_stage):
+                    if self.relu_before_extra_convs:
+                        fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
+                            fpn_output[-1])))
+                    else:
+                        fpn_output.append(self.fpn_convs[num_levels + i](
+                            fpn_output[-1]))
+        return fpn_output
--- a/ppocr/modeling/necks/pren_fpn.py
+++ b/ppocr/modeling/necks/pren_fpn.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Code is refer from:
+https://github.com/RuijieJ/pren/blob/main/Nets/Aggregation.py
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+
+
+class PoolAggregate(nn.Layer):
+    def __init__(self, n_r, d_in, d_middle=None, d_out=None):
+        super(PoolAggregate, self).__init__()
+        if not d_middle:
+            d_middle = d_in
+        if not d_out:
+            d_out = d_in
+
+        self.d_in = d_in
+        self.d_middle = d_middle
+        self.d_out = d_out
+        self.act = nn.Swish()
+
+        self.n_r = n_r
+        self.aggs = self._build_aggs()
+
+    def _build_aggs(self):
+        aggs = []
+        for i in range(self.n_r):
+            aggs.append(
+                self.add_sublayer(
+                    '{}'.format(i),
+                    nn.Sequential(
+                        ('conv1', nn.Conv2D(
+                            self.d_in, self.d_middle, 3, 2, 1, bias_attr=False)
+                         ), ('bn1', nn.BatchNorm(self.d_middle)),
+                        ('act', self.act), ('conv2', nn.Conv2D(
+                            self.d_middle, self.d_out, 3, 2, 1, bias_attr=False
+                        )), ('bn2', nn.BatchNorm(self.d_out)))))
+        return aggs
+
+    def forward(self, x):
+        b = x.shape[0]
+        outs = []
+        for agg in self.aggs:
+            y = agg(x)
+            p = F.adaptive_avg_pool2d(y, 1)
+            outs.append(p.reshape((b, 1, self.d_out)))
+        out = paddle.concat(outs, 1)
+        return out
+
+
+class WeightAggregate(nn.Layer):
+    def __init__(self, n_r, d_in, d_middle=None, d_out=None):
+        super(WeightAggregate, self).__init__()
+        if not d_middle:
+            d_middle = d_in
+        if not d_out:
+            d_out = d_in
+
+        self.n_r = n_r
+        self.d_out = d_out
+        self.act = nn.Swish()
+
+        self.conv_n = nn.Sequential(
+            ('conv1', nn.Conv2D(
+                d_in, d_in, 3, 1, 1,
+                bias_attr=False)), ('bn1', nn.BatchNorm(d_in)),
+            ('act1', self.act), ('conv2', nn.Conv2D(
+                d_in, n_r, 1, bias_attr=False)), ('bn2', nn.BatchNorm(n_r)),
+            ('act2', nn.Sigmoid()))
+        self.conv_d = nn.Sequential(
+            ('conv1', nn.Conv2D(
+                d_in, d_middle, 3, 1, 1,
+                bias_attr=False)), ('bn1', nn.BatchNorm(d_middle)),
+            ('act1', self.act), ('conv2', nn.Conv2D(
+                d_middle, d_out, 1,
+                bias_attr=False)), ('bn2', nn.BatchNorm(d_out)))
+
+    def forward(self, x):
+        b, _, h, w = x.shape
+
+        hmaps = self.conv_n(x)
+        fmaps = self.conv_d(x)
+        r = paddle.bmm(
+            hmaps.reshape((b, self.n_r, h * w)),
+            fmaps.reshape((b, self.d_out, h * w)).transpose((0, 2, 1)))
+        return r
+
+
+class GCN(nn.Layer):
+    def __init__(self, d_in, n_in, d_out=None, n_out=None, dropout=0.1):
+        super(GCN, self).__init__()
+        if not d_out:
+            d_out = d_in
+        if not n_out:
+            n_out = d_in
+
+        self.conv_n = nn.Conv1D(n_in, n_out, 1)
+        self.linear = nn.Linear(d_in, d_out)
+        self.dropout = nn.Dropout(dropout)
+        self.act = nn.Swish()
+
+    def forward(self, x):
+        x = self.conv_n(x)
+        x = self.dropout(self.linear(x))
+        return self.act(x)
+
+
+class PRENFPN(nn.Layer):
+    def __init__(self, in_channels, n_r, d_model, max_len, dropout):
+        super(PRENFPN, self).__init__()
+        assert len(in_channels) == 3, "in_channels' length must be 3."
+        c1, c2, c3 = in_channels  # the depths are from big to small
+        # build fpn
+        assert d_model % 3 == 0, "{} can't be divided by 3.".format(d_model)
+        self.agg_p1 = PoolAggregate(n_r, c1, d_out=d_model // 3)
+        self.agg_p2 = PoolAggregate(n_r, c2, d_out=d_model // 3)
+        self.agg_p3 = PoolAggregate(n_r, c3, d_out=d_model // 3)
+
+        self.agg_w1 = WeightAggregate(n_r, c1, 4 * c1, d_model // 3)
+        self.agg_w2 = WeightAggregate(n_r, c2, 4 * c2, d_model // 3)
+        self.agg_w3 = WeightAggregate(n_r, c3, 4 * c3, d_model // 3)
+
+        self.gcn_pool = GCN(d_model, n_r, d_model, max_len, dropout)
+        self.gcn_weight = GCN(d_model, n_r, d_model, max_len, dropout)
+
+        self.out_channels = d_model
+
+    def forward(self, inputs):
+        f3, f5, f7 = inputs
+
+        rp1 = self.agg_p1(f3)
+        rp2 = self.agg_p2(f5)
+        rp3 = self.agg_p3(f7)
+        rp = paddle.concat([rp1, rp2, rp3], 2)  # [b,nr,d]
+
+        rw1 = self.agg_w1(f3)
+        rw2 = self.agg_w2(f5)
+        rw3 = self.agg_w3(f7)
+        rw = paddle.concat([rw1, rw2, rw3], 2)  # [b,nr,d]
+
+        y1 = self.gcn_pool(rp)
+        y2 = self.gcn_weight(rw)
+        y = 0.5 * (y1 + y2)
+        return y  # [b,max_len,d]
--- a/ppocr/postprocess/__init__.py
+++ b/ppocr/postprocess/__init__.py
@@ -24,8 +24,10 @@ __all__ = ['build_post_process']
 from .db_postprocess import DBPostProcess, DistillationDBPostProcess
 from .east_postprocess import EASTPostProcess
 from .sast_postprocess import SASTPostProcess
-from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, DistillationCTCLabelDecode, \
-    TableLabelDecode, NRTRLabelDecode, SARLabelDecode, SEEDLabelDecode
+from .fce_postprocess import FCEPostProcess
+from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, \
+    DistillationCTCLabelDecode, TableLabelDecode, NRTRLabelDecode, SARLabelDecode, \
+    SEEDLabelDecode, PRENLabelDecode
 from .cls_postprocess import ClsPostProcess
 from .pg_postprocess import PGPostProcess
 from .vqa_token_ser_layoutlm_postprocess import VQASerTokenLayoutLMPostProcess
@@ -34,12 +36,12 @@ from .vqa_token_re_layoutlm_postprocess import VQAReTokenLayoutLMPostProcess

 def build_post_process(config, global_config=None):
    support_dict = [
-        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'CTCLabelDecode',
-        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode', 'PGPostProcess',
-        'DistillationCTCLabelDecode', 'TableLabelDecode',
+        'DBPostProcess', 'EASTPostProcess', 'SASTPostProcess', 'FCEPostProcess',
+        'CTCLabelDecode', 'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode',
+        'PGPostProcess', 'DistillationCTCLabelDecode', 'TableLabelDecode',
        'DistillationDBPostProcess', 'NRTRLabelDecode', 'SARLabelDecode',
        'SEEDLabelDecode', 'VQASerTokenLayoutLMPostProcess',
-        'VQAReTokenLayoutLMPostProcess'
+        'VQAReTokenLayoutLMPostProcess', 'PRENLabelDecode'
    ]

    if config['name'] == 'PSEPostProcess':

--- a/ppocr/postprocess/fce_postprocess.py
+++ b/ppocr/postprocess/fce_postprocess.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/v0.3.0/mmocr/models/textdet/postprocess/wrapper.py
+"""
+
+import cv2
+import paddle
+import numpy as np
+from numpy.fft import ifft
+from ppocr.utils.poly_nms import poly_nms, valid_boundary
+
+
+def fill_hole(input_mask):
+    h, w = input_mask.shape
+    canvas = np.zeros((h + 2, w + 2), np.uint8)
+    canvas[1:h + 1, 1:w + 1] = input_mask.copy()
+
+    mask = np.zeros((h + 4, w + 4), np.uint8)
+
+    cv2.floodFill(canvas, mask, (0, 0), 1)
+    canvas = canvas[1:h + 1, 1:w + 1].astype(np.bool)
+
+    return ~canvas | input_mask
+
+
+def fourier2poly(fourier_coeff, num_reconstr_points=50):
+    """ Inverse Fourier transform
+        Args:
+            fourier_coeff (ndarray): Fourier coefficients shaped (n, 2k+1),
+                with n and k being candidates number and Fourier degree
+                respectively.
+            num_reconstr_points (int): Number of reconstructed polygon points.
+        Returns:
+            Polygons (ndarray): The reconstructed polygons shaped (n, n')
+        """
+
+    a = np.zeros((len(fourier_coeff), num_reconstr_points), dtype='complex')
+    k = (len(fourier_coeff[0]) - 1) // 2
+
+    a[:, 0:k + 1] = fourier_coeff[:, k:]
+    a[:, -k:] = fourier_coeff[:, :k]
+
+    poly_complex = ifft(a) * num_reconstr_points
+    polygon = np.zeros((len(fourier_coeff), num_reconstr_points, 2))
+    polygon[:, :, 0] = poly_complex.real
+    polygon[:, :, 1] = poly_complex.imag
+    return polygon.astype('int32').reshape((len(fourier_coeff), -1))
+
+
+class FCEPostProcess(object):
+    """
+    The post process for FCENet.
+    """
+
+    def __init__(self,
+                 scales,
+                 fourier_degree=5,
+                 num_reconstr_points=50,
+                 decoding_type='fcenet',
+                 score_thr=0.3,
+                 nms_thr=0.1,
+                 alpha=1.0,
+                 beta=1.0,
+                 box_type='poly',
+                 **kwargs):
+
+        self.scales = scales
+        self.fourier_degree = fourier_degree
+        self.num_reconstr_points = num_reconstr_points
+        self.decoding_type = decoding_type
+        self.score_thr = score_thr
+        self.nms_thr = nms_thr
+        self.alpha = alpha
+        self.beta = beta
+        self.box_type = box_type
+
+    def __call__(self, preds, shape_list):
+        score_maps = []
+        for key, value in preds.items():
+            if isinstance(value, paddle.Tensor):
+                value = value.numpy()
+            cls_res = value[:, :4, :, :]
+            reg_res = value[:, 4:, :, :]
+            score_maps.append([cls_res, reg_res])
+
+        return self.get_boundary(score_maps, shape_list)
+
+    def resize_boundary(self, boundaries, scale_factor):
+        """Rescale boundaries via scale_factor.
+
+        Args:
+            boundaries (list[list[float]]): The boundary list. Each boundary
+            with size 2k+1 with k>=4.
+            scale_factor(ndarray): The scale factor of size (4,).
+
+        Returns:
+            boundaries (list[list[float]]): The scaled boundaries.
+        """
+        boxes = []
+        scores = []
+        for b in boundaries:
+            sz = len(b)
+            valid_boundary(b, True)
+            scores.append(b[-1])
+            b = (np.array(b[:sz - 1]) *
+                 (np.tile(scale_factor[:2], int(
+                     (sz - 1) / 2)).reshape(1, sz - 1))).flatten().tolist()
+            boxes.append(np.array(b).reshape([-1, 2]))
+
+        return np.array(boxes, dtype=np.float32), scores
+
+    def get_boundary(self, score_maps, shape_list):
+        assert len(score_maps) == len(self.scales)
+        boundaries = []
+        for idx, score_map in enumerate(score_maps):
+            scale = self.scales[idx]
+            boundaries = boundaries + self._get_boundary_single(score_map,
+                                                                scale)
+
+        # nms
+        boundaries = poly_nms(boundaries, self.nms_thr)
+        boundaries, scores = self.resize_boundary(
+            boundaries, (1 / shape_list[0, 2:]).tolist()[::-1])
+
+        boxes_batch = [dict(points=boundaries, scores=scores)]
+        return boxes_batch
+
+    def _get_boundary_single(self, score_map, scale):
+        assert len(score_map) == 2
+        assert score_map[1].shape[1] == 4 * self.fourier_degree + 2
+
+        return self.fcenet_decode(
+            preds=score_map,
+            fourier_degree=self.fourier_degree,
+            num_reconstr_points=self.num_reconstr_points,
+            scale=scale,
+            alpha=self.alpha,
+            beta=self.beta,
+            box_type=self.box_type,
+            score_thr=self.score_thr,
+            nms_thr=self.nms_thr)
+
+    def fcenet_decode(self,
+                      preds,
+                      fourier_degree,
+                      num_reconstr_points,
+                      scale,
+                      alpha=1.0,
+                      beta=2.0,
+                      box_type='poly',
+                      score_thr=0.3,
+                      nms_thr=0.1):
+        """Decoding predictions of FCENet to instances.
+
+        Args:
+            preds (list(Tensor)): The head output tensors.
+            fourier_degree (int): The maximum Fourier transform degree k.
+            num_reconstr_points (int): The points number of the polygon
+                reconstructed from predicted Fourier coefficients.
+            scale (int): The down-sample scale of the prediction.
+            alpha (float) : The parameter to calculate final scores. Score_{final}
+                    = (Score_{text region} ^ alpha)
+                    * (Score_{text center region}^ beta)
+            beta (float) : The parameter to calculate final score.
+            box_type (str):  Boundary encoding type 'poly' or 'quad'.
+            score_thr (float) : The threshold used to filter out the final
+                candidates.
+            nms_thr (float) :  The threshold of nms.
+
+        Returns:
+            boundaries (list[list[float]]): The instance boundary and confidence
+                list.
+        """
+        assert isinstance(preds, list)
+        assert len(preds) == 2
+        assert box_type in ['poly', 'quad']
+
+        cls_pred = preds[0][0]
+        tr_pred = cls_pred[0:2]
+        tcl_pred = cls_pred[2:]
+
+        reg_pred = preds[1][0].transpose([1, 2, 0])
+        x_pred = reg_pred[:, :, :2 * fourier_degree + 1]
+        y_pred = reg_pred[:, :, 2 * fourier_degree + 1:]
+
+        score_pred = (tr_pred[1]**alpha) * (tcl_pred[1]**beta)
+        tr_pred_mask = (score_pred) > score_thr
+        tr_mask = fill_hole(tr_pred_mask)
+
+        tr_contours, _ = cv2.findContours(
+            tr_mask.astype(np.uint8), cv2.RETR_TREE,
+            cv2.CHAIN_APPROX_SIMPLE)  # opencv4
+
+        mask = np.zeros_like(tr_mask)
+        boundaries = []
+        for cont in tr_contours:
+            deal_map = mask.copy().astype(np.int8)
+            cv2.drawContours(deal_map, [cont], -1, 1, -1)
+
+            score_map = score_pred * deal_map
+            score_mask = score_map > 0
+            xy_text = np.argwhere(score_mask)
+            dxy = xy_text[:, 1] + xy_text[:, 0] * 1j
+
+            x, y = x_pred[score_mask], y_pred[score_mask]
+            c = x + y * 1j
+            c[:, fourier_degree] = c[:, fourier_degree] + dxy
+            c *= scale
+
+            polygons = fourier2poly(c, num_reconstr_points)
+            score = score_map[score_mask].reshape(-1, 1)
+            polygons = poly_nms(np.hstack((polygons, score)).tolist(), nms_thr)
+
+            boundaries = boundaries + polygons
+
+        boundaries = poly_nms(boundaries, nms_thr)
+
+        if box_type == 'quad':
+            new_boundaries = []
+            for boundary in boundaries:
+                poly = np.array(boundary[:-1]).reshape(-1, 2).astype(np.float32)
+                score = boundary[-1]
+                points = cv2.boxPoints(cv2.minAreaRect(poly))
+                points = np.int0(points)
+                new_boundaries.append(points.reshape(-1).tolist() + [score])
+                boundaries = new_boundaries
+
+        return boundaries
--- a/ppocr/postprocess/pse_postprocess/pse_postprocess.py
+++ b/ppocr/postprocess/pse_postprocess/pse_postprocess.py
@@ -37,10 +37,10 @@ class PSEPostProcess(object):
                 thresh=0.5,
                 box_thresh=0.85,
                 min_area=16,
-                 box_type='box',
+                 box_type='quad',
                 scale=4,
                 **kwargs):
-        assert box_type in ['box', 'poly'], 'Only box and poly is supported'
+        assert box_type in ['quad', 'poly'], 'Only quad and poly is supported'
        self.thresh = thresh
        self.box_thresh = box_thresh
        self.min_area = min_area
@@ -95,7 +95,7 @@ class PSEPostProcess(object):
                label[ind] = 0
                continue

-            if self.box_type == 'box':
+            if self.box_type == 'quad':
                rect = cv2.minAreaRect(points)
                bbox = cv2.boxPoints(rect)
            elif self.box_type == 'poly':

--- a/ppocr/postprocess/rec_postprocess.py
+++ b/ppocr/postprocess/rec_postprocess.py
@@ -11,8 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import numpy as np
-import string
 import paddle
 from paddle.nn import functional as F
 import re
@@ -652,3 +652,63 @@ class SARLabelDecode(BaseRecLabelDecode):

    def get_ignored_tokens(self):
        return [self.padding_idx]
+
+
+class PRENLabelDecode(BaseRecLabelDecode):
+    """ Convert between text-label and text-index """
+
+    def __init__(self, character_dict_path=None, use_space_char=False,
+                 **kwargs):
+        super(PRENLabelDecode, self).__init__(character_dict_path,
+                                              use_space_char)
+
+    def add_special_char(self, dict_character):
+        padding_str = '<PAD>'  # 0 
+        end_str = '<EOS>'  # 1
+        unknown_str = '<UNK>'  # 2
+
+        dict_character = [padding_str, end_str, unknown_str] + dict_character
+        self.padding_idx = 0
+        self.end_idx = 1
+        self.unknown_idx = 2
+
+        return dict_character
+
+    def decode(self, text_index, text_prob=None):
+        """ convert text-index into text-label. """
+        result_list = []
+        batch_size = len(text_index)
+
+        for batch_idx in range(batch_size):
+            char_list = []
+            conf_list = []
+            for idx in range(len(text_index[batch_idx])):
+                if text_index[batch_idx][idx] == self.end_idx:
+                    break
+                if text_index[batch_idx][idx] in \
+                    [self.padding_idx, self.unknown_idx]:
+                    continue
+                char_list.append(self.character[int(text_index[batch_idx][
+                    idx])])
+                if text_prob is not None:
+                    conf_list.append(text_prob[batch_idx][idx])
+                else:
+                    conf_list.append(1)
+
+            text = ''.join(char_list)
+            if len(text) > 0:
+                result_list.append((text, np.mean(conf_list)))
+            else:
+                # here confidence of empty recog result is 1
+                result_list.append(('', 1))
+        return result_list
+
+    def __call__(self, preds, label=None, *args, **kwargs):
+        preds = preds.numpy()
+        preds_idx = preds.argmax(axis=2)
+        preds_prob = preds.max(axis=2)
+        text = self.decode(preds_idx, preds_prob)
+        if label is None:
+            return text
+        label = self.decode(label)
+        return text, label
--- a/ppocr/utils/poly_nms.py
+++ b/ppocr/utils/poly_nms.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import numpy as np
+from shapely.geometry import Polygon
+
+
+def points2polygon(points):
+    """Convert k points to 1 polygon.
+
+    Args:
+        points (ndarray or list): A ndarray or a list of shape (2k)
+            that indicates k points.
+
+    Returns:
+        polygon (Polygon): A polygon object.
+    """
+    if isinstance(points, list):
+        points = np.array(points)
+
+    assert isinstance(points, np.ndarray)
+    assert (points.size % 2 == 0) and (points.size >= 8)
+
+    point_mat = points.reshape([-1, 2])
+    return Polygon(point_mat)
+
+
+def poly_intersection(poly_det, poly_gt, buffer=0.0001):
+    """Calculate the intersection area between two polygon.
+
+    Args:
+        poly_det (Polygon): A polygon predicted by detector.
+        poly_gt (Polygon): A gt polygon.
+
+    Returns:
+        intersection_area (float): The intersection area between two polygons.
+    """
+    assert isinstance(poly_det, Polygon)
+    assert isinstance(poly_gt, Polygon)
+
+    if buffer == 0:
+        poly_inter = poly_det & poly_gt
+    else:
+        poly_inter = poly_det.buffer(buffer) & poly_gt.buffer(buffer)
+    return poly_inter.area, poly_inter
+
+
+def poly_union(poly_det, poly_gt):
+    """Calculate the union area between two polygon.
+
+    Args:
+        poly_det (Polygon): A polygon predicted by detector.
+        poly_gt (Polygon): A gt polygon.
+
+    Returns:
+        union_area (float): The union area between two polygons.
+    """
+    assert isinstance(poly_det, Polygon)
+    assert isinstance(poly_gt, Polygon)
+
+    area_det = poly_det.area
+    area_gt = poly_gt.area
+    area_inters, _ = poly_intersection(poly_det, poly_gt)
+    return area_det + area_gt - area_inters
+
+
+def valid_boundary(x, with_score=True):
+    num = len(x)
+    if num < 8:
+        return False
+    if num % 2 == 0 and (not with_score):
+        return True
+    if num % 2 == 1 and with_score:
+        return True
+
+    return False
+
+
+def boundary_iou(src, target):
+    """Calculate the IOU between two boundaries.
+
+    Args:
+       src (list): Source boundary.
+       target (list): Target boundary.
+
+    Returns:
+       iou (float): The iou between two boundaries.
+    """
+    assert valid_boundary(src, False)
+    assert valid_boundary(target, False)
+    src_poly = points2polygon(src)
+    target_poly = points2polygon(target)
+
+    return poly_iou(src_poly, target_poly)
+
+
+def poly_iou(poly_det, poly_gt):
+    """Calculate the IOU between two polygons.
+
+    Args:
+        poly_det (Polygon): A polygon predicted by detector.
+        poly_gt (Polygon): A gt polygon.
+
+    Returns:
+        iou (float): The IOU between two polygons.
+    """
+    assert isinstance(poly_det, Polygon)
+    assert isinstance(poly_gt, Polygon)
+    area_inters, _ = poly_intersection(poly_det, poly_gt)
+    area_union = poly_union(poly_det, poly_gt)
+    if area_union == 0:
+        return 0.0
+    return area_inters / area_union
+
+
+def poly_nms(polygons, threshold):
+    assert isinstance(polygons, list)
+
+    polygons = np.array(sorted(polygons, key=lambda x: x[-1]))
+
+    keep_poly = []
+    index = [i for i in range(polygons.shape[0])]
+
+    while len(index) > 0:
+        keep_poly.append(polygons[index[-1]].tolist())
+        A = polygons[index[-1]][:-1]
+        index = np.delete(index, -1)
+        iou_list = np.zeros((len(index), ))
+        for i in range(len(index)):
+            B = polygons[index[i]][:-1]
+            iou_list[i] = boundary_iou(A, B)
+        remove_index = np.where(iou_list > threshold)
+        index = np.delete(index, remove_index)
+
+    return keep_poly
--- a/ppstructure/README.md
+++ b/ppstructure/README.md
@@ -98,9 +98,9 @@ PP-Structure Series Model List (Updating)

 ### 7.1 Layout analysis model

-|model name|description|download|
-| --- | --- | --- |
-| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis model trained on the PubLayNet dataset can divide image into 5 types of areas **text, title, table, picture, and list** | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
+|model name|description|download|label_map|
+| --- | --- | --- |--- |
+| ppyolov2_r50vd_dcn_365e_publaynet | The layout analysis model trained on the PubLayNet dataset can divide image into 5 types of areas **text, title, table, picture, and list** | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}|

 ### 7.2 OCR and table recognition model


--- a/ppstructure/README_ch.md
+++ b/ppstructure/README_ch.md
@@ -96,9 +96,9 @@ PP-Structure系列模型列表（更新中）

 ### 7.1 版面分析模型

-|模型名称|模型简介|下载地址|
-| --- | --- | --- |
-| ppyolov2_r50vd_dcn_365e_publaynet | PubLayNet 数据集训练的版面分析模型，可以划分**文字、标题、表格、图片以及列表**5类区域 | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
+|模型名称|模型简介|下载地址| label_map|
+| --- | --- | --- | --- |
+| ppyolov2_r50vd_dcn_365e_publaynet | PubLayNet 数据集训练的版面分析模型，可以划分**文字、标题、表格、图片以及列表**5类区域 | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) | {0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}|

 ### 7.2 OCR和表格识别模型


--- a/ppstructure/docs/models_list.md
+++ b/ppstructure/docs/models_list.md
@@ -11,11 +11,11 @@

 ## 1. LayoutParser 模型

-|模型名称|模型简介|下载地址|
-| --- | --- | --- |
-| ppyolov2_r50vd_dcn_365e_publaynet | PubLayNet 数据集训练的版面分析模型，可以划分**文字、标题、表格、图片以及列表**5类区域 | [PubLayNet](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) |
-| ppyolov2_r50vd_dcn_365e_tableBank_word | TableBank Word 数据集训练的版面分析模型，只能检测表格 | [TableBank Word](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) |
-| ppyolov2_r50vd_dcn_365e_tableBank_latex | TableBank Latex 数据集训练的版面分析模型，只能检测表格 | [TableBank Latex](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) |
+|模型名称|模型简介|下载地址|label_map|
+| --- | --- | --- | --- |
+| ppyolov2_r50vd_dcn_365e_publaynet | PubLayNet 数据集训练的版面分析模型，可以划分**文字、标题、表格、图片以及列表**5类区域 | [推理模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet.tar) / [训练模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_publaynet_pretrained.pdparams) |{0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}|
+| ppyolov2_r50vd_dcn_365e_tableBank_word | TableBank Word 数据集训练的版面分析模型，只能检测表格 | [推理模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_word.tar) | {0:"Table"}|
+| ppyolov2_r50vd_dcn_365e_tableBank_latex | TableBank Latex 数据集训练的版面分析模型，只能检测表格 | [推理模型](https://paddle-model-ecology.bj.bcebos.com/model/layout-parser/ppyolov2_r50vd_dcn_365e_tableBank_latex.tar) | {0:"Table"}|

 ## 2. OCR和表格识别模型


--- a/ppstructure/docs/quickstart.md
+++ b/ppstructure/docs/quickstart.md
@@ -100,7 +100,9 @@ dict 里各个字段说明如下
 | output          | excel和识别结果保存的地址                | ./output/table                              |
 | table_max_len   | 表格结构模型预测时，图像的长边resize尺度 | 488                                         |
 | table_model_dir | 表格结构模型 inference 模型地址          | None                                        |
-| table_char_type | 表格结构模型所用字典地址                 | ../ppocr/utils/dict/table_structure_dict.txt |
+| table_char_dict_path | 表格结构模型所用字典地址                 | ../ppocr/utils/dict/table_structure_dict.txt |
+| layout_path_model | 版面分析模型模型地址，可以为在线地址或者本地地址，当为本地地址时，需要指定 layout_label_map, 命令行模式下可通过--layout_label_map='{0: "Text", 1: "Title", 2: "List", 3:"Table", 4:"Figure"}' 指定              | lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config |
+| layout_label_map | 版面分析模型模型label映射字典                 | None |
 | model_name_or_path | VQA SER模型地址                | None |
 | max_seq_length | VQA SER模型最大支持token长度              | 512 |
 | label_map_path | VQA SER 标签文件地址              | ./vqa/labels/labels_ser.txt |

--- a/ppstructure/layout/README.md
+++ b/ppstructure/layout/README.md
@@ -52,7 +52,7 @@ The following figure shows the result, with different colored detection boxes re
 |   threshold    |              threshold of prediction score               |     0.5     |                              \                               |
 |  input_shape   |                 picture size of reshape                  | [3,640,640] |                              \                               |
 |   batch_size   |                    testing batch size                    |      1      |                              \                               |
-|   label_map    |                  category mapping table                  |    None     | Setting config_ path, it can be none, and the label is automatically obtained according to the dataset name_ map |
+|   label_map    |                  category mapping table                  |    None     | Setting config_ path, it can be none, and the label is automatically obtained according to the dataset name_ map, You need to specify it manually when setting model_path |
 |  enforce_cpu   |                    whether to use CPU                    |    False    |      False to use GPU, and True to force the use of CPU      |
 | enforce_mkldnn | whether mkldnn acceleration is enabled in CPU prediction |    True     |                              \                               |
 |   thread_num   |                the number of CPU threads                 |     10      |                              \                               |

--- a/ppstructure/layout/README_ch.md
+++ b/ppstructure/layout/README_ch.md
@@ -52,7 +52,7 @@ show_img.show()
 |   threshold    |       预测得分的阈值        |     0.5     |                              \                               |
 |  input_shape   |     reshape之后图片尺寸     | [3,640,640] |                              \                               |
 |   batch_size   |       测试batch size        |      1      |                              \                               |
-|   label_map    |         类别映射表          |    None     | 设置config_path时，可以为None，根据数据集名称自动获取label_map |
+|   label_map    |         类别映射表          |    None     | 设置config_path时，可以为None，根据数据集名称自动获取label_map,设置model_path时需要手动指定 |
 |  enforce_cpu   |     代码是否使用CPU运行     |    False    |         设置为False表示使用GPU，True表示强制使用CPU          |
 | enforce_mkldnn | CPU预测中是否开启MKLDNN加速 |    True     |                              \                               |
 |   thread_num   |        设置CPU线程数        |     10      |                              \                               |

--- a/ppstructure/predict_system.py
+++ b/ppstructure/predict_system.py
@@ -58,6 +58,7 @@ class OCRSystem(object):
            self.table_layout = lp.PaddleDetectionLayoutModel(
                config_path=config_path,
                model_path=model_path,
+                label_map=args.layout_label_map,
                threshold=0.5,
                enable_mkldnn=args.enable_mkldnn,
                enforce_cpu=not args.use_gpu,

--- a/ppstructure/utility.py
+++ b/ppstructure/utility.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import ast
 from PIL import Image
 import numpy as np
 from tools.infer.utility import draw_ocr_box_txt, init_args as infer_args
@@ -34,7 +35,11 @@ def init_args():
        "--layout_path_model",
        type=str,
        default="lp://PubLayNet/ppyolov2_r50vd_dcn_365e_publaynet/config")
-
+    parser.add_argument(
+        "--layout_label_map",
+        type=ast.literal_eval,
+        default=None,
+        help='label map according to ppstructure/layout/README_ch.md')
    # params for ser
    parser.add_argument("--model_name_or_path", type=str)
    parser.add_argument("--max_seq_length", type=int, default=512)