Merge remote-tracking branch 'origin/dygraph' into dygraph

aa59fca5 · Leif · 12d15752 · f01f24c7 · aa59fca5 · aa59fca5
Commit aa59fca5 authored Apr 28, 2022 by Leif
20 changed files
--- a/ppocr/data/imaug/ssl_img_aug.py
+++ b/ppocr/data/imaug/ssl_img_aug.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import math
+import cv2
+import numpy as np
+import random
+from PIL import Image
+
+from .rec_img_aug import resize_norm_img
+
+
+class SSLRotateResize(object):
+    def __init__(self,
+                 image_shape,
+                 padding=False,
+                 select_all=True,
+                 mode="train",
+                 **kwargs):
+        self.image_shape = image_shape
+        self.padding = padding
+        self.select_all = select_all
+        self.mode = mode
+
+    def __call__(self, data):
+        img = data["image"]
+
+        data["image_r90"] = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
+        data["image_r180"] = cv2.rotate(data["image_r90"],
+                                        cv2.ROTATE_90_CLOCKWISE)
+        data["image_r270"] = cv2.rotate(data["image_r180"],
+                                        cv2.ROTATE_90_CLOCKWISE)
+
+        images = []
+        for key in ["image", "image_r90", "image_r180", "image_r270"]:
+            images.append(
+                resize_norm_img(
+                    data.pop(key),
+                    image_shape=self.image_shape,
+                    padding=self.padding)[0])
+        data["image"] = np.stack(images, axis=0)
+        data["label"] = np.array(list(range(4)))
+        if not self.select_all:
+            data["image"] = data["image"][0::2]  # just choose 0 and 180
+            data["label"] = data["label"][0:2]  # label needs to be continuous
+        if self.mode == "test":
+            data["image"] = data["image"][0]
+            data["label"] = data["label"][0]
+        return data
--- a/ppocr/data/simple_dataset.py
+++ b/ppocr/data/simple_dataset.py
@@ -49,7 +49,8 @@ class SimpleDataSet(Dataset):
        if self.mode == "train" and self.do_shuffle:
            self.shuffle_data_random()
        self.ops = create_operators(dataset_config['transforms'], global_config)
-
+        self.ext_op_transform_idx = dataset_config.get("ext_op_transform_idx",
+                                                       2)
        self.need_reset = True in [x < 1 for x in ratio_list]

    def get_image_info_list(self, file_list, ratio_list):
@@ -87,7 +88,7 @@ class SimpleDataSet(Dataset):
            if hasattr(op, 'ext_data_num'):
                ext_data_num = getattr(op, 'ext_data_num')
                break
-        load_data_ops = self.ops[:2]
+        load_data_ops = self.ops[:self.ext_op_transform_idx]
        ext_data = []

        while len(ext_data) < ext_data_num:
@@ -108,8 +109,11 @@ class SimpleDataSet(Dataset):
                data['image'] = img
            data = transform(data, load_data_ops)

-            if data is None or data['polys'].shape[1] != 4:
+            if data is None:
                continue
+            if 'polys' in data.keys():
+                if data['polys'].shape[1] != 4:
+                    continue
            ext_data.append(data)
        return ext_data


--- a/ppocr/losses/__init__.py
+++ b/ppocr/losses/__init__.py
@@ -34,6 +34,7 @@ from .rec_nrtr_loss import NRTRLoss
 from .rec_sar_loss import SARLoss
 from .rec_aster_loss import AsterLoss
 from .rec_pren_loss import PRENLoss
+from .rec_multi_loss import MultiLoss

 # cls loss
 from .cls_loss import ClsLoss
@@ -60,7 +61,7 @@ def build_loss(config):
        'DBLoss', 'PSELoss', 'EASTLoss', 'SASTLoss', 'FCELoss', 'CTCLoss',
        'ClsLoss', 'AttentionLoss', 'SRNLoss', 'PGLoss', 'CombinedLoss',
        'NRTRLoss', 'TableAttentionLoss', 'SARLoss', 'AsterLoss', 'SDMGRLoss',
-        'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss'
+        'VQASerTokenLayoutLMLoss', 'LossFromOutput', 'PRENLoss', 'MultiLoss'
    ]
    config = copy.deepcopy(config)
    module_name = config.pop('name')

--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
@@ -106,8 +106,8 @@ class DMLLoss(nn.Layer):

    def forward(self, out1, out2):
        if self.act is not None:
-            out1 = self.act(out1)
-            out2 = self.act(out2)
+            out1 = self.act(out1) + 1e-10
+            out2 = self.act(out2) + 1e-10
        if self.use_log:
            # for recognition distillation, log is needed for feature map
            log_out1 = paddle.log(out1)

--- a/ppocr/losses/combined_loss.py
+++ b/ppocr/losses/combined_loss.py
@@ -18,8 +18,10 @@ import paddle.nn as nn
 from .rec_ctc_loss import CTCLoss
 from .center_loss import CenterLoss
 from .ace_loss import ACELoss
+from .rec_sar_loss import SARLoss

 from .distillation_loss import DistillationCTCLoss
+from .distillation_loss import DistillationSARLoss
 from .distillation_loss import DistillationDMLLoss
 from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss


--- a/ppocr/losses/distillation_loss.py
+++ b/ppocr/losses/distillation_loss.py
@@ -18,6 +18,7 @@ import numpy as np
 import cv2

 from .rec_ctc_loss import CTCLoss
+from .rec_sar_loss import SARLoss
 from .basic_loss import DMLLoss
 from .basic_loss import DistanceLoss
 from .det_db_loss import DBLoss
@@ -46,11 +47,15 @@ class DistillationDMLLoss(DMLLoss):
                 act=None,
                 use_log=False,
                 key=None,
+                 multi_head=False,
+                 dis_head='ctc',
                 maps_name=None,
                 name="dml"):
        super().__init__(act=act, use_log=use_log)
        assert isinstance(model_name_pairs, list)
        self.key = key
+        self.multi_head = multi_head
+        self.dis_head = dis_head
        self.model_name_pairs = self._check_model_name_pairs(model_name_pairs)
        self.name = name
        self.maps_name = self._check_maps_name(maps_name)
@@ -97,7 +102,11 @@ class DistillationDMLLoss(DMLLoss):
                out2 = out2[self.key]

            if self.maps_name is None:
-                loss = super().forward(out1, out2)
+                if self.multi_head:
+                    loss = super().forward(out1[self.dis_head],
+                                           out2[self.dis_head])
+                else:
+                    loss = super().forward(out1, out2)
                if isinstance(loss, dict):
                    for key in loss:
                        loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1],
@@ -123,11 +132,16 @@ class DistillationDMLLoss(DMLLoss):


 class DistillationCTCLoss(CTCLoss):
-    def __init__(self, model_name_list=[], key=None, name="loss_ctc"):
+    def __init__(self,
+                 model_name_list=[],
+                 key=None,
+                 multi_head=False,
+                 name="loss_ctc"):
        super().__init__()
        self.model_name_list = model_name_list
        self.key = key
        self.name = name
+        self.multi_head = multi_head

    def forward(self, predicts, batch):
        loss_dict = dict()
@@ -135,7 +149,45 @@ class DistillationCTCLoss(CTCLoss):
            out = predicts[model_name]
            if self.key is not None:
                out = out[self.key]
-            loss = super().forward(out, batch)
+            if self.multi_head:
+                assert 'ctc' in out, 'multi head has multi out'
+                loss = super().forward(out['ctc'], batch[:2] + batch[3:])
+            else:
+                loss = super().forward(out, batch)
+            if isinstance(loss, dict):
+                for key in loss:
+                    loss_dict["{}_{}_{}".format(self.name, model_name,
+                                                idx)] = loss[key]
+            else:
+                loss_dict["{}_{}".format(self.name, model_name)] = loss
+        return loss_dict
+
+
+class DistillationSARLoss(SARLoss):
+    def __init__(self,
+                 model_name_list=[],
+                 key=None,
+                 multi_head=False,
+                 name="loss_sar",
+                 **kwargs):
+        ignore_index = kwargs.get('ignore_index', 92)
+        super().__init__(ignore_index=ignore_index)
+        self.model_name_list = model_name_list
+        self.key = key
+        self.name = name
+        self.multi_head = multi_head
+
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, model_name in enumerate(self.model_name_list):
+            out = predicts[model_name]
+            if self.key is not None:
+                out = out[self.key]
+            if self.multi_head:
+                assert 'sar' in out, 'multi head has multi out'
+                loss = super().forward(out['sar'], batch[:1] + batch[2:])
+            else:
+                loss = super().forward(out, batch)
            if isinstance(loss, dict):
                for key in loss:
                    loss_dict["{}_{}_{}".format(self.name, model_name,

--- a/ppocr/losses/rec_multi_loss.py
+++ b/ppocr/losses/rec_multi_loss.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+
+from .rec_ctc_loss import CTCLoss
+from .rec_sar_loss import SARLoss
+
+
+class MultiLoss(nn.Layer):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.loss_funcs = {}
+        self.loss_list = kwargs.pop('loss_config_list')
+        self.weight_1 = kwargs.get('weight_1', 1.0)
+        self.weight_2 = kwargs.get('weight_2', 1.0)
+        self.gtc_loss = kwargs.get('gtc_loss', 'sar')
+        for loss_info in self.loss_list:
+            for name, param in loss_info.items():
+                if param is not None:
+                    kwargs.update(param)
+                loss = eval(name)(**kwargs)
+                self.loss_funcs[name] = loss
+
+    def forward(self, predicts, batch):
+        self.total_loss = {}
+        total_loss = 0.0
+        # batch [image, label_ctc, label_sar, length, valid_ratio]
+        for name, loss_func in self.loss_funcs.items():
+            if name == 'CTCLoss':
+                loss = loss_func(predicts['ctc'],
+                                 batch[:2] + batch[3:])['loss'] * self.weight_1
+            elif name == 'SARLoss':
+                loss = loss_func(predicts['sar'],
+                                 batch[:1] + batch[2:])['loss'] * self.weight_2
+            else:
+                raise NotImplementedError(
+                    '{} is not supported in MultiLoss yet'.format(name))
+            self.total_loss[name] = loss
+            total_loss += loss
+        self.total_loss['loss'] = total_loss
+        return self.total_loss
--- a/ppocr/losses/rec_sar_loss.py
+++ b/ppocr/losses/rec_sar_loss.py
@@ -9,8 +9,9 @@ from paddle import nn
 class SARLoss(nn.Layer):
    def __init__(self, **kwargs):
        super(SARLoss, self).__init__()
+        ignore_index = kwargs.get('ignore_index', 92)  # 6626
        self.loss_func = paddle.nn.loss.CrossEntropyLoss(
-            reduction="mean", ignore_index=92)
+            reduction="mean", ignore_index=ignore_index)

    def forward(self, predicts, batch):
        predict = predicts[:, :

--- a/ppocr/metrics/rec_metric.py
+++ b/ppocr/metrics/rec_metric.py
@@ -17,9 +17,14 @@ import string


 class RecMetric(object):
-    def __init__(self, main_indicator='acc', is_filter=False, **kwargs):
+    def __init__(self,
+                 main_indicator='acc',
+                 is_filter=False,
+                 ignore_space=True,
+                 **kwargs):
        self.main_indicator = main_indicator
        self.is_filter = is_filter
+        self.ignore_space = ignore_space
        self.eps = 1e-5
        self.reset()

@@ -34,8 +39,9 @@ class RecMetric(object):
        all_num = 0
        norm_edit_dis = 0.0
        for (pred, pred_conf), (target, _) in zip(preds, labels):
-            pred = pred.replace(" ", "")
-            target = target.replace(" ", "")
+            if self.ignore_space:
+                pred = pred.replace(" ", "")
+                target = target.replace(" ", "")
            if self.is_filter:
                pred = self._normalize_text(pred)
                target = self._normalize_text(target)

--- a/ppocr/modeling/architectures/base_model.py
+++ b/ppocr/modeling/architectures/base_model.py
@@ -83,7 +83,11 @@ class BaseModel(nn.Layer):
        y["neck_out"] = x
        if self.use_head:
            x = self.head(x, targets=data)
-        if isinstance(x, dict):
+        # for multi head, save ctc neck out for udml
+        if isinstance(x, dict) and 'ctc_neck' in x.keys():
+            y["neck_out"] = x["ctc_neck"]
+            y["head_out"] = x
+        elif isinstance(x, dict):
            y.update(x)
        else:
            y["head_out"] = x

--- a/ppocr/modeling/architectures/distillation_model.py
+++ b/ppocr/modeling/architectures/distillation_model.py
@@ -53,8 +53,8 @@ class DistillationModel(nn.Layer):
            self.model_list.append(self.add_sublayer(key, model))
            self.model_name_list.append(key)

-    def forward(self, x):
+    def forward(self, x, data=None):
        result_dict = dict()
        for idx, model_name in enumerate(self.model_name_list):
-            result_dict[model_name] = self.model_list[idx](x)
+            result_dict[model_name] = self.model_list[idx](x, data)
        return result_dict
--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -31,9 +31,11 @@ def build_backbone(config, model_type):
        from .rec_resnet_aster import ResNet_ASTER
        from .rec_micronet import MicroNet
        from .rec_efficientb3_pren import EfficientNetb3_PREN
+        from .rec_svtrnet import SVTRNet
        support_dict = [
            'MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
-            "ResNet31", "ResNet_ASTER", 'MicroNet', 'EfficientNetb3_PREN'
+            "ResNet31", "ResNet_ASTER", 'MicroNet', 'EfficientNetb3_PREN',
+            'SVTRNet'
        ]
    elif model_type == "e2e":
        from .e2e_resnet_vd_pg import ResNet

--- a/ppocr/modeling/backbones/rec_mv1_enhance.py
+++ b/ppocr/modeling/backbones/rec_mv1_enhance.py
@@ -103,7 +103,12 @@ class DepthwiseSeparable(nn.Layer):


 class MobileNetV1Enhance(nn.Layer):
-    def __init__(self, in_channels=3, scale=0.5, **kwargs):
+    def __init__(self,
+                 in_channels=3,
+                 scale=0.5,
+                 last_conv_stride=1,
+                 last_pool_type='max',
+                 **kwargs):
        super().__init__()
        self.scale = scale
        self.block_list = []
@@ -200,7 +205,7 @@ class MobileNetV1Enhance(nn.Layer):
            num_filters1=1024,
            num_filters2=1024,
            num_groups=1024,
-            stride=1,
+            stride=last_conv_stride,
            dw_size=5,
            padding=2,
            use_se=True,
@@ -208,8 +213,10 @@ class MobileNetV1Enhance(nn.Layer):
        self.block_list.append(conv6)

        self.block_list = nn.Sequential(*self.block_list)
-
-        self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
+        if last_pool_type == 'avg':
+            self.pool = nn.AvgPool2D(kernel_size=2, stride=2, padding=0)
+        else:
+            self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
        self.out_channels = int(1024 * scale)

    def forward(self, inputs):

--- a/ppocr/modeling/backbones/rec_svtrnet.py
+++ b/ppocr/modeling/backbones/rec_svtrnet.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from collections import Callable
+from paddle import ParamAttr
+from paddle.nn.initializer import KaimingNormal
+import numpy as np
+import paddle
+import paddle.nn as nn
+from paddle.nn.initializer import TruncatedNormal, Constant, Normal
+
+trunc_normal_ = TruncatedNormal(std=.02)
+normal_ = Normal
+zeros_ = Constant(value=0.)
+ones_ = Constant(value=1.)
+
+
+def drop_path(x, drop_prob=0., training=False):
+    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
+    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
+    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+    """
+    if drop_prob == 0. or not training:
+        return x
+    keep_prob = paddle.to_tensor(1 - drop_prob)
+    shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
+    random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
+    random_tensor = paddle.floor(random_tensor)  # binarize
+    output = x.divide(keep_prob) * random_tensor
+    return output
+
+
+class ConvBNLayer(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size=3,
+                 stride=1,
+                 padding=0,
+                 bias_attr=False,
+                 groups=1,
+                 act=nn.GELU):
+        super().__init__()
+        self.conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            weight_attr=paddle.ParamAttr(
+                initializer=nn.initializer.KaimingUniform()),
+            bias_attr=bias_attr)
+        self.norm = nn.BatchNorm2D(out_channels)
+        self.act = act()
+
+    def forward(self, inputs):
+        out = self.conv(inputs)
+        out = self.norm(out)
+        out = self.act(out)
+        return out
+
+
+class DropPath(nn.Layer):
+    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
+    """
+
+    def __init__(self, drop_prob=None):
+        super(DropPath, self).__init__()
+        self.drop_prob = drop_prob
+
+    def forward(self, x):
+        return drop_path(x, self.drop_prob, self.training)
+
+
+class Identity(nn.Layer):
+    def __init__(self):
+        super(Identity, self).__init__()
+
+    def forward(self, input):
+        return input
+
+
+class Mlp(nn.Layer):
+    def __init__(self,
+                 in_features,
+                 hidden_features=None,
+                 out_features=None,
+                 act_layer=nn.GELU,
+                 drop=0.):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = act_layer()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+
+
+class ConvMixer(nn.Layer):
+    def __init__(
+            self,
+            dim,
+            num_heads=8,
+            HW=[8, 25],
+            local_k=[3, 3], ):
+        super().__init__()
+        self.HW = HW
+        self.dim = dim
+        self.local_mixer = nn.Conv2D(
+            dim,
+            dim,
+            local_k,
+            1, [local_k[0] // 2, local_k[1] // 2],
+            groups=num_heads,
+            weight_attr=ParamAttr(initializer=KaimingNormal()))
+
+    def forward(self, x):
+        h = self.HW[0]
+        w = self.HW[1]
+        x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w])
+        x = self.local_mixer(x)
+        x = x.flatten(2).transpose([0, 2, 1])
+        return x
+
+
+class Attention(nn.Layer):
+    def __init__(self,
+                 dim,
+                 num_heads=8,
+                 mixer='Global',
+                 HW=[8, 25],
+                 local_k=[7, 11],
+                 qkv_bias=False,
+                 qk_scale=None,
+                 attn_drop=0.,
+                 proj_drop=0.):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = qk_scale or head_dim**-0.5
+
+        self.qkv = nn.Linear(dim, dim * 3, bias_attr=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+        self.HW = HW
+        if HW is not None:
+            H = HW[0]
+            W = HW[1]
+            self.N = H * W
+            self.C = dim
+        if mixer == 'Local' and HW is not None:
+
+            hk = local_k[0]
+            wk = local_k[1]
+            mask = np.ones([H * W, H * W])
+            for h in range(H):
+                for w in range(W):
+                    for kh in range(-(hk // 2), (hk // 2) + 1):
+                        for kw in range(-(wk // 2), (wk // 2) + 1):
+                            if H > (h + kh) >= 0 and W > (w + kw) >= 0:
+                                mask[h * W + w][(h + kh) * W + (w + kw)] = 0
+            mask_paddle = paddle.to_tensor(mask, dtype='float32')
+            mask_inf = paddle.full([H * W, H * W], '-inf', dtype='float32')
+            mask = paddle.where(mask_paddle < 1, mask_paddle, mask_inf)
+            self.mask = mask.unsqueeze([0, 1])
+        self.mixer = mixer
+
+    def forward(self, x):
+        if self.HW is not None:
+            N = self.N
+            C = self.C
+        else:
+            _, N, C = x.shape
+        qkv = self.qkv(x).reshape((0, N, 3, self.num_heads, C //
+                                   self.num_heads)).transpose((2, 0, 3, 1, 4))
+        q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
+
+        attn = (q.matmul(k.transpose((0, 1, 3, 2))))
+        if self.mixer == 'Local':
+            attn += self.mask
+        attn = nn.functional.softmax(attn, axis=-1)
+        attn = self.attn_drop(attn)
+
+        x = (attn.matmul(v)).transpose((0, 2, 1, 3)).reshape((0, N, C))
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+
+
+class Block(nn.Layer):
+    def __init__(self,
+                 dim,
+                 num_heads,
+                 mixer='Global',
+                 local_mixer=[7, 11],
+                 HW=[8, 25],
+                 mlp_ratio=4.,
+                 qkv_bias=False,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.GELU,
+                 norm_layer='nn.LayerNorm',
+                 epsilon=1e-6,
+                 prenorm=True):
+        super().__init__()
+        if isinstance(norm_layer, str):
+            self.norm1 = eval(norm_layer)(dim, epsilon=epsilon)
+        elif isinstance(norm_layer, Callable):
+            self.norm1 = norm_layer(dim)
+        else:
+            raise TypeError(
+                "The norm_layer must be str or paddle.nn.layer.Layer class")
+        if mixer == 'Global' or mixer == 'Local':
+            self.mixer = Attention(
+                dim,
+                num_heads=num_heads,
+                mixer=mixer,
+                HW=HW,
+                local_k=local_mixer,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                attn_drop=attn_drop,
+                proj_drop=drop)
+        elif mixer == 'Conv':
+            self.mixer = ConvMixer(
+                dim, num_heads=num_heads, HW=HW, local_k=local_mixer)
+        else:
+            raise TypeError("The mixer must be one of [Global, Local, Conv]")
+
+        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else Identity()
+        if isinstance(norm_layer, str):
+            self.norm2 = eval(norm_layer)(dim, epsilon=epsilon)
+        elif isinstance(norm_layer, Callable):
+            self.norm2 = norm_layer(dim)
+        else:
+            raise TypeError(
+                "The norm_layer must be str or paddle.nn.layer.Layer class")
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp_ratio = mlp_ratio
+        self.mlp = Mlp(in_features=dim,
+                       hidden_features=mlp_hidden_dim,
+                       act_layer=act_layer,
+                       drop=drop)
+        self.prenorm = prenorm
+
+    def forward(self, x):
+        if self.prenorm:
+            x = self.norm1(x + self.drop_path(self.mixer(x)))
+            x = self.norm2(x + self.drop_path(self.mlp(x)))
+        else:
+            x = x + self.drop_path(self.mixer(self.norm1(x)))
+            x = x + self.drop_path(self.mlp(self.norm2(x)))
+        return x
+
+
+class PatchEmbed(nn.Layer):
+    """ Image to Patch Embedding
+    """
+
+    def __init__(self,
+                 img_size=[32, 100],
+                 in_channels=3,
+                 embed_dim=768,
+                 sub_num=2):
+        super().__init__()
+        num_patches = (img_size[1] // (2 ** sub_num)) * \
+                      (img_size[0] // (2 ** sub_num))
+        self.img_size = img_size
+        self.num_patches = num_patches
+        self.embed_dim = embed_dim
+        self.norm = None
+        if sub_num == 2:
+            self.proj = nn.Sequential(
+                ConvBNLayer(
+                    in_channels=in_channels,
+                    out_channels=embed_dim // 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    act=nn.GELU,
+                    bias_attr=None),
+                ConvBNLayer(
+                    in_channels=embed_dim // 2,
+                    out_channels=embed_dim,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    act=nn.GELU,
+                    bias_attr=None))
+        if sub_num == 3:
+            self.proj = nn.Sequential(
+                ConvBNLayer(
+                    in_channels=in_channels,
+                    out_channels=embed_dim // 4,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    act=nn.GELU,
+                    bias_attr=None),
+                ConvBNLayer(
+                    in_channels=embed_dim // 4,
+                    out_channels=embed_dim // 2,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    act=nn.GELU,
+                    bias_attr=None),
+                ConvBNLayer(
+                    embed_dim // 2,
+                    embed_dim,
+                    in_channels=embed_dim // 2,
+                    out_channels=embed_dim,
+                    kernel_size=3,
+                    stride=2,
+                    padding=1,
+                    act=nn.GELU,
+                    bias_attr=None))
+
+    def forward(self, x):
+        B, C, H, W = x.shape
+        assert H == self.img_size[0] and W == self.img_size[1], \
+            f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
+        x = self.proj(x).flatten(2).transpose((0, 2, 1))
+        return x
+
+
+class SubSample(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 types='Pool',
+                 stride=[2, 1],
+                 sub_norm='nn.LayerNorm',
+                 act=None):
+        super().__init__()
+        self.types = types
+        if types == 'Pool':
+            self.avgpool = nn.AvgPool2D(
+                kernel_size=[3, 5], stride=stride, padding=[1, 2])
+            self.maxpool = nn.MaxPool2D(
+                kernel_size=[3, 5], stride=stride, padding=[1, 2])
+            self.proj = nn.Linear(in_channels, out_channels)
+        else:
+            self.conv = nn.Conv2D(
+                in_channels,
+                out_channels,
+                kernel_size=3,
+                stride=stride,
+                padding=1,
+                weight_attr=ParamAttr(initializer=KaimingNormal()))
+        self.norm = eval(sub_norm)(out_channels)
+        if act is not None:
+            self.act = act()
+        else:
+            self.act = None
+
+    def forward(self, x):
+
+        if self.types == 'Pool':
+            x1 = self.avgpool(x)
+            x2 = self.maxpool(x)
+            x = (x1 + x2) * 0.5
+            out = self.proj(x.flatten(2).transpose((0, 2, 1)))
+        else:
+            x = self.conv(x)
+            out = x.flatten(2).transpose((0, 2, 1))
+        out = self.norm(out)
+        if self.act is not None:
+            out = self.act(out)
+
+        return out
+
+
+class SVTRNet(nn.Layer):
+    def __init__(
+            self,
+            img_size=[32, 100],
+            in_channels=3,
+            embed_dim=[64, 128, 256],
+            depth=[3, 6, 3],
+            num_heads=[2, 4, 8],
+            mixer=['Local'] * 6 + ['Global'] *
+            6,  # Local atten, Global atten, Conv
+            local_mixer=[[7, 11], [7, 11], [7, 11]],
+            patch_merging='Conv',  # Conv, Pool, None
+            mlp_ratio=4,
+            qkv_bias=True,
+            qk_scale=None,
+            drop_rate=0.,
+            last_drop=0.1,
+            attn_drop_rate=0.,
+            drop_path_rate=0.1,
+            norm_layer='nn.LayerNorm',
+            sub_norm='nn.LayerNorm',
+            epsilon=1e-6,
+            out_channels=192,
+            out_char_num=25,
+            block_unit='Block',
+            act='nn.GELU',
+            last_stage=True,
+            sub_num=2,
+            prenorm=True,
+            use_lenhead=False,
+            **kwargs):
+        super().__init__()
+        self.img_size = img_size
+        self.embed_dim = embed_dim
+        self.out_channels = out_channels
+        self.prenorm = prenorm
+        patch_merging = None if patch_merging != 'Conv' and patch_merging != 'Pool' else patch_merging
+        self.patch_embed = PatchEmbed(
+            img_size=img_size,
+            in_channels=in_channels,
+            embed_dim=embed_dim[0],
+            sub_num=sub_num)
+        num_patches = self.patch_embed.num_patches
+        self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)]
+        self.pos_embed = self.create_parameter(
+            shape=[1, num_patches, embed_dim[0]], default_initializer=zeros_)
+        self.add_parameter("pos_embed", self.pos_embed)
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        Block_unit = eval(block_unit)
+
+        dpr = np.linspace(0, drop_path_rate, sum(depth))
+        self.blocks1 = nn.LayerList([
+            Block_unit(
+                dim=embed_dim[0],
+                num_heads=num_heads[0],
+                mixer=mixer[0:depth[0]][i],
+                HW=self.HW,
+                local_mixer=local_mixer[0],
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                act_layer=eval(act),
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[0:depth[0]][i],
+                norm_layer=norm_layer,
+                epsilon=epsilon,
+                prenorm=prenorm) for i in range(depth[0])
+        ])
+        if patch_merging is not None:
+            self.sub_sample1 = SubSample(
+                embed_dim[0],
+                embed_dim[1],
+                sub_norm=sub_norm,
+                stride=[2, 1],
+                types=patch_merging)
+            HW = [self.HW[0] // 2, self.HW[1]]
+        else:
+            HW = self.HW
+        self.patch_merging = patch_merging
+        self.blocks2 = nn.LayerList([
+            Block_unit(
+                dim=embed_dim[1],
+                num_heads=num_heads[1],
+                mixer=mixer[depth[0]:depth[0] + depth[1]][i],
+                HW=HW,
+                local_mixer=local_mixer[1],
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                act_layer=eval(act),
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[depth[0]:depth[0] + depth[1]][i],
+                norm_layer=norm_layer,
+                epsilon=epsilon,
+                prenorm=prenorm) for i in range(depth[1])
+        ])
+        if patch_merging is not None:
+            self.sub_sample2 = SubSample(
+                embed_dim[1],
+                embed_dim[2],
+                sub_norm=sub_norm,
+                stride=[2, 1],
+                types=patch_merging)
+            HW = [self.HW[0] // 4, self.HW[1]]
+        else:
+            HW = self.HW
+        self.blocks3 = nn.LayerList([
+            Block_unit(
+                dim=embed_dim[2],
+                num_heads=num_heads[2],
+                mixer=mixer[depth[0] + depth[1]:][i],
+                HW=HW,
+                local_mixer=local_mixer[2],
+                mlp_ratio=mlp_ratio,
+                qkv_bias=qkv_bias,
+                qk_scale=qk_scale,
+                drop=drop_rate,
+                act_layer=eval(act),
+                attn_drop=attn_drop_rate,
+                drop_path=dpr[depth[0] + depth[1]:][i],
+                norm_layer=norm_layer,
+                epsilon=epsilon,
+                prenorm=prenorm) for i in range(depth[2])
+        ])
+        self.last_stage = last_stage
+        if last_stage:
+            self.avg_pool = nn.AdaptiveAvgPool2D([1, out_char_num])
+            self.last_conv = nn.Conv2D(
+                in_channels=embed_dim[2],
+                out_channels=self.out_channels,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                bias_attr=False)
+            self.hardswish = nn.Hardswish()
+            self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer")
+        if not prenorm:
+            self.norm = eval(norm_layer)(embed_dim[-1], epsilon=epsilon)
+        self.use_lenhead = use_lenhead
+        if use_lenhead:
+            self.len_conv = nn.Linear(embed_dim[2], self.out_channels)
+            self.hardswish_len = nn.Hardswish()
+            self.dropout_len = nn.Dropout(
+                p=last_drop, mode="downscale_in_infer")
+
+        trunc_normal_(self.pos_embed)
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            trunc_normal_(m.weight)
+            if isinstance(m, nn.Linear) and m.bias is not None:
+                zeros_(m.bias)
+        elif isinstance(m, nn.LayerNorm):
+            zeros_(m.bias)
+            ones_(m.weight)
+
+    def forward_features(self, x):
+        x = self.patch_embed(x)
+        x = x + self.pos_embed
+        x = self.pos_drop(x)
+        for blk in self.blocks1:
+            x = blk(x)
+        if self.patch_merging is not None:
+            x = self.sub_sample1(
+                x.transpose([0, 2, 1]).reshape(
+                    [0, self.embed_dim[0], self.HW[0], self.HW[1]]))
+        for blk in self.blocks2:
+            x = blk(x)
+        if self.patch_merging is not None:
+            x = self.sub_sample2(
+                x.transpose([0, 2, 1]).reshape(
+                    [0, self.embed_dim[1], self.HW[0] // 2, self.HW[1]]))
+        for blk in self.blocks3:
+            x = blk(x)
+        if not self.prenorm:
+            x = self.norm(x)
+        return x
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        if self.use_lenhead:
+            len_x = self.len_conv(x.mean(1))
+            len_x = self.dropout_len(self.hardswish_len(len_x))
+        if self.last_stage:
+            if self.patch_merging is not None:
+                h = self.HW[0] // 4
+            else:
+                h = self.HW[0]
+            x = self.avg_pool(
+                x.transpose([0, 2, 1]).reshape(
+                    [0, self.embed_dim[2], h, self.HW[1]]))
+            x = self.last_conv(x)
+            x = self.hardswish(x)
+            x = self.dropout(x)
+        if self.use_lenhead:
+            return x, len_x
+        return x
--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -32,6 +32,7 @@ def build_head(config):
    from .rec_sar_head import SARHead
    from .rec_aster_head import AsterHead
    from .rec_pren_head import PRENHead
+    from .rec_multi_head import MultiHead

    # cls head
    from .cls_head import ClsHead
@@ -44,7 +45,8 @@ def build_head(config):
    support_dict = [
        'DBHead', 'PSEHead', 'FCEHead', 'EASTHead', 'SASTHead', 'CTCHead',
        'ClsHead', 'AttentionHead', 'SRNHead', 'PGHead', 'Transformer',
-        'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead'
+        'TableAttentionHead', 'SARHead', 'AsterHead', 'SDMGRHead', 'PRENHead',
+        'MultiHead'
    ]

    #table head

--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@@ -31,13 +31,14 @@ def get_bias_attr(k):


 class Head(nn.Layer):
-    def __init__(self, in_channels, name_list):
+    def __init__(self, in_channels, name_list, kernel_list=[3, 2, 2], **kwargs):
        super(Head, self).__init__()
+
        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels // 4,
-            kernel_size=3,
-            padding=1,
+            kernel_size=kernel_list[0],
+            padding=int(kernel_list[0] // 2),
            weight_attr=ParamAttr(),
            bias_attr=False)
        self.conv_bn1 = nn.BatchNorm(
@@ -50,7 +51,7 @@ class Head(nn.Layer):
        self.conv2 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=in_channels // 4,
-            kernel_size=2,
+            kernel_size=kernel_list[1],
            stride=2,
            weight_attr=ParamAttr(
                initializer=paddle.nn.initializer.KaimingUniform()),
@@ -65,7 +66,7 @@ class Head(nn.Layer):
        self.conv3 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=1,
-            kernel_size=2,
+            kernel_size=kernel_list[2],
            stride=2,
            weight_attr=ParamAttr(
                initializer=paddle.nn.initializer.KaimingUniform()),
@@ -100,8 +101,8 @@ class DBHead(nn.Layer):
            'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
            'conv2d_transpose_3', 'thresh'
        ]
-        self.binarize = Head(in_channels, binarize_name_list)
-        self.thresh = Head(in_channels, thresh_name_list)
+        self.binarize = Head(in_channels, binarize_name_list, **kwargs)
+        self.thresh = Head(in_channels, thresh_name_list, **kwargs)

    def step_function(self, x, y):
        return paddle.reciprocal(1 + paddle.exp(-self.k * (x - y)))

--- a/ppocr/modeling/heads/rec_multi_head.py
+++ b/ppocr/modeling/heads/rec_multi_head.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import paddle
+from paddle import ParamAttr
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from ppocr.modeling.necks.rnn import Im2Seq, EncoderWithRNN, EncoderWithFC, SequenceEncoder, EncoderWithSVTR
+from .rec_ctc_head import CTCHead
+from .rec_sar_head import SARHead
+
+
+class MultiHead(nn.Layer):
+    def __init__(self, in_channels, out_channels_list, **kwargs):
+        super().__init__()
+        self.head_list = kwargs.pop('head_list')
+        self.gtc_head = 'sar'
+        assert len(self.head_list) >= 2
+        for idx, head_name in enumerate(self.head_list):
+            name = list(head_name)[0]
+            if name == 'SARHead':
+                # sar head
+                sar_args = self.head_list[idx][name]
+                self.sar_head = eval(name)(in_channels=in_channels, \
+                    out_channels=out_channels_list['SARLabelDecode'], **sar_args)
+            elif name == 'CTCHead':
+                # ctc neck
+                self.encoder_reshape = Im2Seq(in_channels)
+                neck_args = self.head_list[idx][name]['Neck']
+                encoder_type = neck_args.pop('name')
+                self.encoder = encoder_type
+                self.ctc_encoder = SequenceEncoder(in_channels=in_channels, \
+                    encoder_type=encoder_type, **neck_args)
+                # ctc head
+                head_args = self.head_list[idx][name]['Head']
+                self.ctc_head = eval(name)(in_channels=self.ctc_encoder.out_channels, \
+                    out_channels=out_channels_list['CTCLabelDecode'], **head_args)
+            else:
+                raise NotImplementedError(
+                    '{} is not supported in MultiHead yet'.format(name))
+
+    def forward(self, x, targets=None):
+        ctc_encoder = self.ctc_encoder(x)
+        ctc_out = self.ctc_head(ctc_encoder, targets)
+        head_out = dict()
+        head_out['ctc'] = ctc_out
+        head_out['ctc_neck'] = ctc_encoder
+        # eval mode
+        if not self.training:
+            return ctc_out
+        if self.gtc_head == 'sar':
+            sar_out = self.sar_head(x, targets[1:])
+            head_out['sar'] = sar_out
+            return head_out
+        else:
+            return head_out
--- a/ppocr/modeling/heads/rec_sar_head.py
+++ b/ppocr/modeling/heads/rec_sar_head.py
@@ -349,7 +349,10 @@ class ParallelSARDecoder(BaseDecoder):

 class SARHead(nn.Layer):
    def __init__(self,
+                 in_channels,
                 out_channels,
+                 enc_dim=512,
+                 max_text_length=30,
                 enc_bi_rnn=False,
                 enc_drop_rnn=0.1,
                 enc_gru=False,
@@ -358,14 +361,17 @@ class SARHead(nn.Layer):
                 dec_gru=False,
                 d_k=512,
                 pred_dropout=0.1,
-                 max_text_length=30,
                 pred_concat=True,
                 **kwargs):
        super(SARHead, self).__init__()

        # encoder module
        self.encoder = SAREncoder(
-            enc_bi_rnn=enc_bi_rnn, enc_drop_rnn=enc_drop_rnn, enc_gru=enc_gru)
+            enc_bi_rnn=enc_bi_rnn,
+            enc_drop_rnn=enc_drop_rnn,
+            enc_gru=enc_gru,
+            d_model=in_channels,
+            d_enc=enc_dim)

        # decoder module
        self.decoder = ParallelSARDecoder(
@@ -374,6 +380,8 @@ class SARHead(nn.Layer):
            dec_bi_rnn=dec_bi_rnn,
            dec_drop_rnn=dec_drop_rnn,
            dec_gru=dec_gru,
+            d_model=in_channels,
+            d_enc=enc_dim,
            d_k=d_k,
            pred_dropout=pred_dropout,
            max_text_length=max_text_length,
@@ -390,7 +398,7 @@ class SARHead(nn.Layer):
            label = paddle.to_tensor(label, dtype='int64')
            final_out = self.decoder(
                feat, holistic_feat, label, img_metas=targets)
-        if not self.training:
+        else:
            final_out = self.decoder(
                feat,
                holistic_feat,

--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -16,7 +16,7 @@ __all__ = ['build_neck']


 def build_neck(config):
-    from .db_fpn import DBFPN
+    from .db_fpn import DBFPN, RSEFPN, LKPAN
    from .east_fpn import EASTFPN
    from .sast_fpn import SASTFPN
    from .rnn import SequenceEncoder
@@ -26,8 +26,8 @@ def build_neck(config):
    from .fce_fpn import FCEFPN
    from .pren_fpn import PRENFPN
    support_dict = [
-        'FPN', 'FCEFPN', 'DBFPN', 'EASTFPN', 'SASTFPN', 'SequenceEncoder',
-        'PGFPN', 'TableFPN', 'PRENFPN'
+        'FPN', 'FCEFPN', 'LKPAN', 'DBFPN', 'RSEFPN', 'EASTFPN', 'SASTFPN',
+        'SequenceEncoder', 'PGFPN', 'TableFPN', 'PRENFPN'
    ]

    module_name = config.pop('name')

--- a/ppocr/modeling/necks/db_fpn.py
+++ b/ppocr/modeling/necks/db_fpn.py
@@ -20,6 +20,88 @@ import paddle
 from paddle import nn
 import paddle.nn.functional as F
 from paddle import ParamAttr
+import os
+import sys
+
+__dir__ = os.path.dirname(os.path.abspath(__file__))
+sys.path.append(__dir__)
+sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
+
+from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
+
+
+class DSConv(nn.Layer):
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 padding,
+                 stride=1,
+                 groups=None,
+                 if_act=True,
+                 act="relu",
+                 **kwargs):
+        super(DSConv, self).__init__()
+        if groups == None:
+            groups = in_channels
+        self.if_act = if_act
+        self.act = act
+        self.conv1 = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=in_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            groups=groups,
+            bias_attr=False)
+
+        self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)
+
+        self.conv2 = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=int(in_channels * 4),
+            kernel_size=1,
+            stride=1,
+            bias_attr=False)
+
+        self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)
+
+        self.conv3 = nn.Conv2D(
+            in_channels=int(in_channels * 4),
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            bias_attr=False)
+        self._c = [in_channels, out_channels]
+        if in_channels != out_channels:
+            self.conv_end = nn.Conv2D(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=1,
+                stride=1,
+                bias_attr=False)
+
+    def forward(self, inputs):
+
+        x = self.conv1(inputs)
+        x = self.bn1(x)
+
+        x = self.conv2(x)
+        x = self.bn2(x)
+        if self.if_act:
+            if self.act == "relu":
+                x = F.relu(x)
+            elif self.act == "hardswish":
+                x = F.hardswish(x)
+            else:
+                print("The activation function({}) is selected incorrectly.".
+                      format(self.act))
+                exit()
+
+        x = self.conv3(x)
+        if self._c[0] != self._c[1]:
+            x = x + self.conv_end(inputs)
+        return x


 class DBFPN(nn.Layer):
@@ -106,3 +188,171 @@ class DBFPN(nn.Layer):

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
        return fuse
+
+
+class RSELayer(nn.Layer):
+    def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
+        super(RSELayer, self).__init__()
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+        self.out_channels = out_channels
+        self.in_conv = nn.Conv2D(
+            in_channels=in_channels,
+            out_channels=self.out_channels,
+            kernel_size=kernel_size,
+            padding=int(kernel_size // 2),
+            weight_attr=ParamAttr(initializer=weight_attr),
+            bias_attr=False)
+        self.se_block = SEModule(self.out_channels)
+        self.shortcut = shortcut
+
+    def forward(self, ins):
+        x = self.in_conv(ins)
+        if self.shortcut:
+            out = x + self.se_block(x)
+        else:
+            out = self.se_block(x)
+        return out
+
+
+class RSEFPN(nn.Layer):
+    def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
+        super(RSEFPN, self).__init__()
+        self.out_channels = out_channels
+        self.ins_conv = nn.LayerList()
+        self.inp_conv = nn.LayerList()
+
+        for i in range(len(in_channels)):
+            self.ins_conv.append(
+                RSELayer(
+                    in_channels[i],
+                    out_channels,
+                    kernel_size=1,
+                    shortcut=shortcut))
+            self.inp_conv.append(
+                RSELayer(
+                    out_channels,
+                    out_channels // 4,
+                    kernel_size=3,
+                    shortcut=shortcut))
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.ins_conv[3](c5)
+        in4 = self.ins_conv[2](c4)
+        in3 = self.ins_conv[1](c3)
+        in2 = self.ins_conv[0](c2)
+
+        out4 = in4 + F.upsample(
+            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4
+
+        p5 = self.inp_conv[3](in5)
+        p4 = self.inp_conv[2](out4)
+        p3 = self.inp_conv[1](out3)
+        p2 = self.inp_conv[0](out2)
+
+        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
+        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
+        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
+
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse
+
+
+class LKPAN(nn.Layer):
+    def __init__(self, in_channels, out_channels, mode='large', **kwargs):
+        super(LKPAN, self).__init__()
+        self.out_channels = out_channels
+        weight_attr = paddle.nn.initializer.KaimingUniform()
+
+        self.ins_conv = nn.LayerList()
+        self.inp_conv = nn.LayerList()
+        # pan head
+        self.pan_head_conv = nn.LayerList()
+        self.pan_lat_conv = nn.LayerList()
+
+        if mode.lower() == 'lite':
+            p_layer = DSConv
+        elif mode.lower() == 'large':
+            p_layer = nn.Conv2D
+        else:
+            raise ValueError(
+                "mode can only be one of ['lite', 'large'], but received {}".
+                format(mode))
+
+        for i in range(len(in_channels)):
+            self.ins_conv.append(
+                nn.Conv2D(
+                    in_channels=in_channels[i],
+                    out_channels=self.out_channels,
+                    kernel_size=1,
+                    weight_attr=ParamAttr(initializer=weight_attr),
+                    bias_attr=False))
+
+            self.inp_conv.append(
+                p_layer(
+                    in_channels=self.out_channels,
+                    out_channels=self.out_channels // 4,
+                    kernel_size=9,
+                    padding=4,
+                    weight_attr=ParamAttr(initializer=weight_attr),
+                    bias_attr=False))
+
+            if i > 0:
+                self.pan_head_conv.append(
+                    nn.Conv2D(
+                        in_channels=self.out_channels // 4,
+                        out_channels=self.out_channels // 4,
+                        kernel_size=3,
+                        padding=1,
+                        stride=2,
+                        weight_attr=ParamAttr(initializer=weight_attr),
+                        bias_attr=False))
+            self.pan_lat_conv.append(
+                p_layer(
+                    in_channels=self.out_channels // 4,
+                    out_channels=self.out_channels // 4,
+                    kernel_size=9,
+                    padding=4,
+                    weight_attr=ParamAttr(initializer=weight_attr),
+                    bias_attr=False))
+
+    def forward(self, x):
+        c2, c3, c4, c5 = x
+
+        in5 = self.ins_conv[3](c5)
+        in4 = self.ins_conv[2](c4)
+        in3 = self.ins_conv[1](c3)
+        in2 = self.ins_conv[0](c2)
+
+        out4 = in4 + F.upsample(
+            in5, scale_factor=2, mode="nearest", align_mode=1)  # 1/16
+        out3 = in3 + F.upsample(
+            out4, scale_factor=2, mode="nearest", align_mode=1)  # 1/8
+        out2 = in2 + F.upsample(
+            out3, scale_factor=2, mode="nearest", align_mode=1)  # 1/4
+
+        f5 = self.inp_conv[3](in5)
+        f4 = self.inp_conv[2](out4)
+        f3 = self.inp_conv[1](out3)
+        f2 = self.inp_conv[0](out2)
+
+        pan3 = f3 + self.pan_head_conv[0](f2)
+        pan4 = f4 + self.pan_head_conv[1](pan3)
+        pan5 = f5 + self.pan_head_conv[2](pan4)
+
+        p2 = self.pan_lat_conv[0](f2)
+        p3 = self.pan_lat_conv[1](pan3)
+        p4 = self.pan_lat_conv[2](pan4)
+        p5 = self.pan_lat_conv[3](pan5)
+
+        p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
+        p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
+        p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
+
+        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
+        return fuse