first commit

f1506916 · sugon_cxj · 55c28ed5 · f1506916 · f1506916 · f1506916
Commit f1506916 authored May 18, 2023 by sugon_cxj
20 changed files
--- a/ppocr/losses/__pycache__/rec_pren_loss.cpython-37.pyc
+++ b/ppocr/losses/__pycache__/rec_pren_loss.cpython-37.pyc
--- a/ppocr/losses/__pycache__/rec_sar_loss.cpython-37.pyc
+++ b/ppocr/losses/__pycache__/rec_sar_loss.cpython-37.pyc
--- a/ppocr/losses/__pycache__/rec_srn_loss.cpython-37.pyc
+++ b/ppocr/losses/__pycache__/rec_srn_loss.cpython-37.pyc
--- a/ppocr/losses/__pycache__/table_att_loss.cpython-37.pyc
+++ b/ppocr/losses/__pycache__/table_att_loss.cpython-37.pyc
--- a/ppocr/losses/__pycache__/vqa_token_layoutlm_loss.cpython-37.pyc
+++ b/ppocr/losses/__pycache__/vqa_token_layoutlm_loss.cpython-37.pyc
--- a/ppocr/losses/ace_loss.py
+++ b/ppocr/losses/ace_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This code is refer from: https://github.com/viig99/LS-ACELoss
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+import paddle.nn as nn
+
+
+class ACELoss(nn.Layer):
+    def __init__(self, **kwargs):
+        super().__init__()
+        self.loss_func = nn.CrossEntropyLoss(
+            weight=None,
+            ignore_index=0,
+            reduction='none',
+            soft_label=True,
+            axis=-1)
+
+    def __call__(self, predicts, batch):
+        if isinstance(predicts, (list, tuple)):
+            predicts = predicts[-1]
+
+        B, N = predicts.shape[:2]
+        div = paddle.to_tensor([N]).astype('float32')
+
+        predicts = nn.functional.softmax(predicts, axis=-1)
+        aggregation_preds = paddle.sum(predicts, axis=1)
+        aggregation_preds = paddle.divide(aggregation_preds, div)
+
+        length = batch[2].astype("float32")
+        batch = batch[3].astype("float32")
+        batch[:, 0] = paddle.subtract(div, length)
+        batch = paddle.divide(batch, div)
+
+        loss = self.loss_func(aggregation_preds, batch)
+        return {"loss_ace": loss}
--- a/ppocr/losses/basic_loss.py
+++ b/ppocr/losses/basic_loss.py
+#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+from paddle.nn import L1Loss
+from paddle.nn import MSELoss as L2Loss
+from paddle.nn import SmoothL1Loss
+
+
+class CELoss(nn.Layer):
+    def __init__(self, epsilon=None):
+        super().__init__()
+        if epsilon is not None and (epsilon <= 0 or epsilon >= 1):
+            epsilon = None
+        self.epsilon = epsilon
+
+    def _labelsmoothing(self, target, class_num):
+        if target.shape[-1] != class_num:
+            one_hot_target = F.one_hot(target, class_num)
+        else:
+            one_hot_target = target
+        soft_target = F.label_smooth(one_hot_target, epsilon=self.epsilon)
+        soft_target = paddle.reshape(soft_target, shape=[-1, class_num])
+        return soft_target
+
+    def forward(self, x, label):
+        loss_dict = {}
+        if self.epsilon is not None:
+            class_num = x.shape[-1]
+            label = self._labelsmoothing(label, class_num)
+            x = -F.log_softmax(x, axis=-1)
+            loss = paddle.sum(x * label, axis=-1)
+        else:
+            if label.shape[-1] == x.shape[-1]:
+                label = F.softmax(label, axis=-1)
+                soft_label = True
+            else:
+                soft_label = False
+            loss = F.cross_entropy(x, label=label, soft_label=soft_label)
+        return loss
+
+
+class KLJSLoss(object):
+    def __init__(self, mode='kl'):
+        assert mode in ['kl', 'js', 'KL', 'JS'
+                        ], "mode can only be one of ['kl', 'KL', 'js', 'JS']"
+        self.mode = mode
+
+    def __call__(self, p1, p2, reduction="mean", eps=1e-5):
+
+        if self.mode.lower() == 'kl':
+            loss = paddle.multiply(p2,
+                                   paddle.log((p2 + eps) / (p1 + eps) + eps))
+            loss += paddle.multiply(p1,
+                                    paddle.log((p1 + eps) / (p2 + eps) + eps))
+            loss *= 0.5
+        elif self.mode.lower() == "js":
+            loss = paddle.multiply(
+                p2, paddle.log((2 * p2 + eps) / (p1 + p2 + eps) + eps))
+            loss += paddle.multiply(
+                p1, paddle.log((2 * p1 + eps) / (p1 + p2 + eps) + eps))
+            loss *= 0.5
+        else:
+            raise ValueError(
+                "The mode.lower() if KLJSLoss should be one of ['kl', 'js']")
+
+        if reduction == "mean":
+            loss = paddle.mean(loss, axis=[1, 2])
+        elif reduction == "none" or reduction is None:
+            return loss
+        else:
+            loss = paddle.sum(loss, axis=[1, 2])
+
+        return loss
+
+
+class DMLLoss(nn.Layer):
+    """
+    DMLLoss
+    """
+
+    def __init__(self, act=None, use_log=False):
+        super().__init__()
+        if act is not None:
+            assert act in ["softmax", "sigmoid"]
+        if act == "softmax":
+            self.act = nn.Softmax(axis=-1)
+        elif act == "sigmoid":
+            self.act = nn.Sigmoid()
+        else:
+            self.act = None
+
+        self.use_log = use_log
+        self.jskl_loss = KLJSLoss(mode="kl")
+
+    def _kldiv(self, x, target):
+        eps = 1.0e-10
+        loss = target * (paddle.log(target + eps) - x)
+        # batch mean loss
+        loss = paddle.sum(loss) / loss.shape[0]
+        return loss
+
+    def forward(self, out1, out2):
+        if self.act is not None:
+            out1 = self.act(out1) + 1e-10
+            out2 = self.act(out2) + 1e-10
+        if self.use_log:
+            # for recognition distillation, log is needed for feature map
+            log_out1 = paddle.log(out1)
+            log_out2 = paddle.log(out2)
+            loss = (
+                self._kldiv(log_out1, out2) + self._kldiv(log_out2, out1)) / 2.0
+        else:
+            # log is not needed for detection 
+            loss = self.jskl_loss(out1, out2)
+        return loss
+
+
+class DistanceLoss(nn.Layer):
+    """
+    DistanceLoss:
+        mode: loss mode
+    """
+
+    def __init__(self, mode="l2", **kargs):
+        super().__init__()
+        assert mode in ["l1", "l2", "smooth_l1"]
+        if mode == "l1":
+            self.loss_func = nn.L1Loss(**kargs)
+        elif mode == "l2":
+            self.loss_func = nn.MSELoss(**kargs)
+        elif mode == "smooth_l1":
+            self.loss_func = nn.SmoothL1Loss(**kargs)
+
+    def forward(self, x, y):
+        return self.loss_func(x, y)
+
+
+class LossFromOutput(nn.Layer):
+    def __init__(self, key='loss', reduction='none'):
+        super().__init__()
+        self.key = key
+        self.reduction = reduction
+
+    def forward(self, predicts, batch):
+        loss = predicts[self.key]
+        if self.reduction == 'mean':
+            loss = paddle.mean(loss)
+        elif self.reduction == 'sum':
+            loss = paddle.sum(loss)
+        return {'loss': loss}
--- a/ppocr/losses/center_loss.py
+++ b/ppocr/losses/center_loss.py
+#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import pickle
+
+import paddle
+import paddle.nn as nn
+import paddle.nn.functional as F
+
+
+class CenterLoss(nn.Layer):
+    """
+    Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
+    """
+
+    def __init__(self, num_classes=6625, feat_dim=96, center_file_path=None):
+        super().__init__()
+        self.num_classes = num_classes
+        self.feat_dim = feat_dim
+        self.centers = paddle.randn(
+            shape=[self.num_classes, self.feat_dim]).astype("float64")
+
+        if center_file_path is not None:
+            assert os.path.exists(
+                center_file_path
+            ), f"center path({center_file_path}) must exist when it is not None."
+            with open(center_file_path, 'rb') as f:
+                char_dict = pickle.load(f)
+                for key in char_dict.keys():
+                    self.centers[key] = paddle.to_tensor(char_dict[key])
+
+    def __call__(self, predicts, batch):
+        assert isinstance(predicts, (list, tuple))
+        features, predicts = predicts
+
+        feats_reshape = paddle.reshape(
+            features, [-1, features.shape[-1]]).astype("float64")
+        label = paddle.argmax(predicts, axis=2)
+        label = paddle.reshape(label, [label.shape[0] * label.shape[1]])
+
+        batch_size = feats_reshape.shape[0]
+
+        #calc l2 distance between feats and centers  
+        square_feat = paddle.sum(paddle.square(feats_reshape),
+                                 axis=1,
+                                 keepdim=True)
+        square_feat = paddle.expand(square_feat, [batch_size, self.num_classes])
+
+        square_center = paddle.sum(paddle.square(self.centers),
+                                   axis=1,
+                                   keepdim=True)
+        square_center = paddle.expand(
+            square_center, [self.num_classes, batch_size]).astype("float64")
+        square_center = paddle.transpose(square_center, [1, 0])
+
+        distmat = paddle.add(square_feat, square_center)
+        feat_dot_center = paddle.matmul(feats_reshape,
+                                        paddle.transpose(self.centers, [1, 0]))
+        distmat = distmat - 2.0 * feat_dot_center
+
+        #generate the mask
+        classes = paddle.arange(self.num_classes).astype("int64")
+        label = paddle.expand(
+            paddle.unsqueeze(label, 1), (batch_size, self.num_classes))
+        mask = paddle.equal(
+            paddle.expand(classes, [batch_size, self.num_classes]),
+            label).astype("float64")
+        dist = paddle.multiply(distmat, mask)
+
+        loss = paddle.sum(paddle.clip(dist, min=1e-12, max=1e+12)) / batch_size
+        return {'loss_center': loss}
--- a/ppocr/losses/cls_loss.py
+++ b/ppocr/losses/cls_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+
+
+class ClsLoss(nn.Layer):
+    def __init__(self, **kwargs):
+        super(ClsLoss, self).__init__()
+        self.loss_func = nn.CrossEntropyLoss(reduction='mean')
+
+    def forward(self, predicts, batch):
+        label = batch[1].astype("int64")
+        loss = self.loss_func(input=predicts, label=label)
+        return {'loss': loss}
--- a/ppocr/losses/combined_loss.py
+++ b/ppocr/losses/combined_loss.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import paddle
+import paddle.nn as nn
+
+from .rec_ctc_loss import CTCLoss
+from .center_loss import CenterLoss
+from .ace_loss import ACELoss
+from .rec_sar_loss import SARLoss
+
+from .distillation_loss import DistillationCTCLoss
+from .distillation_loss import DistillationSARLoss
+from .distillation_loss import DistillationDMLLoss
+from .distillation_loss import DistillationDistanceLoss, DistillationDBLoss, DistillationDilaDBLoss
+
+
+class CombinedLoss(nn.Layer):
+    """
+    CombinedLoss:
+        a combionation of loss function
+    """
+
+    def __init__(self, loss_config_list=None):
+        super().__init__()
+        self.loss_func = []
+        self.loss_weight = []
+        assert isinstance(loss_config_list, list), (
+            'operator config should be a list')
+        for config in loss_config_list:
+            assert isinstance(config,
+                              dict) and len(config) == 1, "yaml format error"
+            name = list(config)[0]
+            param = config[name]
+            assert "weight" in param, "weight must be in param, but param just contains {}".format(
+                param.keys())
+            self.loss_weight.append(param.pop("weight"))
+            self.loss_func.append(eval(name)(**param))
+
+    def forward(self, input, batch, **kargs):
+        loss_dict = {}
+        loss_all = 0.
+        for idx, loss_func in enumerate(self.loss_func):
+            loss = loss_func(input, batch, **kargs)
+            if isinstance(loss, paddle.Tensor):
+                loss = {"loss_{}_{}".format(str(loss), idx): loss}
+
+            weight = self.loss_weight[idx]
+
+            loss = {key: loss[key] * weight for key in loss}
+
+            if "loss" in loss:
+                loss_all += loss["loss"]
+            else:
+                loss_all += paddle.add_n(list(loss.values()))
+            loss_dict.update(loss)
+        loss_dict["loss"] = loss_all
+        return loss_dict
--- a/ppocr/losses/det_basic_loss.py
+++ b/ppocr/losses/det_basic_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+import paddle
+from paddle import nn
+import paddle.nn.functional as F
+
+
+class BalanceLoss(nn.Layer):
+    def __init__(self,
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 negative_ratio=3,
+                 return_origin=False,
+                 eps=1e-6,
+                 **kwargs):
+        """
+               The BalanceLoss for Differentiable Binarization text detection
+               args:
+                   balance_loss (bool): whether balance loss or not, default is True
+                   main_loss_type (str): can only be one of ['CrossEntropy','DiceLoss',
+                       'Euclidean','BCELoss', 'MaskL1Loss'], default is  'DiceLoss'.
+                   negative_ratio (int|float): float, default is 3.
+                   return_origin (bool): whether return unbalanced loss or not, default is False.
+                   eps (float): default is 1e-6.
+               """
+        super(BalanceLoss, self).__init__()
+        self.balance_loss = balance_loss
+        self.main_loss_type = main_loss_type
+        self.negative_ratio = negative_ratio
+        self.return_origin = return_origin
+        self.eps = eps
+
+        if self.main_loss_type == "CrossEntropy":
+            self.loss = nn.CrossEntropyLoss()
+        elif self.main_loss_type == "Euclidean":
+            self.loss = nn.MSELoss()
+        elif self.main_loss_type == "DiceLoss":
+            self.loss = DiceLoss(self.eps)
+        elif self.main_loss_type == "BCELoss":
+            self.loss = BCELoss(reduction='none')
+        elif self.main_loss_type == "MaskL1Loss":
+            self.loss = MaskL1Loss(self.eps)
+        else:
+            loss_type = [
+                'CrossEntropy', 'DiceLoss', 'Euclidean', 'BCELoss', 'MaskL1Loss'
+            ]
+            raise Exception(
+                "main_loss_type in BalanceLoss() can only be one of {}".format(
+                    loss_type))
+
+    def forward(self, pred, gt, mask=None):
+        """
+        The BalanceLoss for Differentiable Binarization text detection
+        args:
+            pred (variable): predicted feature maps.
+            gt (variable): ground truth feature maps.
+            mask (variable): masked maps.
+        return: (variable) balanced loss
+        """
+        positive = gt * mask
+        negative = (1 - gt) * mask
+
+        positive_count = int(positive.sum())
+        negative_count = int(
+            min(negative.sum(), positive_count * self.negative_ratio))
+        loss = self.loss(pred, gt, mask=mask)
+
+        if not self.balance_loss:
+            return loss
+
+        positive_loss = positive * loss
+        negative_loss = negative * loss
+        negative_loss = paddle.reshape(negative_loss, shape=[-1])
+        if negative_count > 0:
+            sort_loss = negative_loss.sort(descending=True)
+            negative_loss = sort_loss[:negative_count]
+            # negative_loss, _ = paddle.topk(negative_loss, k=negative_count_int)
+            balance_loss = (positive_loss.sum() + negative_loss.sum()) / (
+                positive_count + negative_count + self.eps)
+        else:
+            balance_loss = positive_loss.sum() / (positive_count + self.eps)
+        if self.return_origin:
+            return balance_loss, loss
+
+        return balance_loss
+
+
+class DiceLoss(nn.Layer):
+    def __init__(self, eps=1e-6):
+        super(DiceLoss, self).__init__()
+        self.eps = eps
+
+    def forward(self, pred, gt, mask, weights=None):
+        """
+        DiceLoss function.
+        """
+
+        assert pred.shape == gt.shape
+        assert pred.shape == mask.shape
+        if weights is not None:
+            assert weights.shape == mask.shape
+            mask = weights * mask
+        intersection = paddle.sum(pred * gt * mask)
+
+        union = paddle.sum(pred * mask) + paddle.sum(gt * mask) + self.eps
+        loss = 1 - 2.0 * intersection / union
+        assert loss <= 1
+        return loss
+
+
+class MaskL1Loss(nn.Layer):
+    def __init__(self, eps=1e-6):
+        super(MaskL1Loss, self).__init__()
+        self.eps = eps
+
+    def forward(self, pred, gt, mask):
+        """
+        Mask L1 Loss
+        """
+        loss = (paddle.abs(pred - gt) * mask).sum() / (mask.sum() + self.eps)
+        loss = paddle.mean(loss)
+        return loss
+
+
+class BCELoss(nn.Layer):
+    def __init__(self, reduction='mean'):
+        super(BCELoss, self).__init__()
+        self.reduction = reduction
+
+    def forward(self, input, label, mask=None, weight=None, name=None):
+        loss = F.binary_cross_entropy(input, label, reduction=self.reduction)
+        return loss
--- a/ppocr/losses/det_db_loss.py
+++ b/ppocr/losses/det_db_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+
+from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
+
+
+class DBLoss(nn.Layer):
+    """
+    Differentiable Binarization (DB) Loss Function
+    args:
+        param (dict): the super paramter for DB Loss
+    """
+
+    def __init__(self,
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 alpha=5,
+                 beta=10,
+                 ohem_ratio=3,
+                 eps=1e-6,
+                 **kwargs):
+        super(DBLoss, self).__init__()
+        self.alpha = alpha
+        self.beta = beta
+        self.dice_loss = DiceLoss(eps=eps)
+        self.l1_loss = MaskL1Loss(eps=eps)
+        self.bce_loss = BalanceLoss(
+            balance_loss=balance_loss,
+            main_loss_type=main_loss_type,
+            negative_ratio=ohem_ratio)
+
+    def forward(self, predicts, labels):
+        predict_maps = predicts['maps']
+        label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = labels[
+            1:]
+        shrink_maps = predict_maps[:, 0, :, :]
+        threshold_maps = predict_maps[:, 1, :, :]
+        binary_maps = predict_maps[:, 2, :, :]
+
+        loss_shrink_maps = self.bce_loss(shrink_maps, label_shrink_map,
+                                         label_shrink_mask)
+        loss_threshold_maps = self.l1_loss(threshold_maps, label_threshold_map,
+                                           label_threshold_mask)
+        loss_binary_maps = self.dice_loss(binary_maps, label_shrink_map,
+                                          label_shrink_mask)
+        loss_shrink_maps = self.alpha * loss_shrink_maps
+        loss_threshold_maps = self.beta * loss_threshold_maps
+
+        loss_all = loss_shrink_maps + loss_threshold_maps \
+                   + loss_binary_maps
+        losses = {'loss': loss_all, \
+                  "loss_shrink_maps": loss_shrink_maps, \
+                  "loss_threshold_maps": loss_threshold_maps, \
+                  "loss_binary_maps": loss_binary_maps}
+        return losses
--- a/ppocr/losses/det_east_loss.py
+++ b/ppocr/losses/det_east_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+
+
+class EASTLoss(nn.Layer):
+    """
+    """
+
+    def __init__(self,
+                 eps=1e-6,
+                 **kwargs):
+        super(EASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+
+    def forward(self, predicts, labels):
+        l_score, l_geo, l_mask = labels[1:]
+        f_score = predicts['f_score']
+        f_geo = predicts['f_geo']
+
+        dice_loss = self.dice_loss(f_score, l_score, l_mask)
+
+        #smoooth_l1_loss
+        channels = 8
+        l_geo_split = paddle.split(
+            l_geo, num_or_sections=channels + 1, axis=1)
+        f_geo_split = paddle.split(f_geo, num_or_sections=channels, axis=1)
+        smooth_l1 = 0
+        for i in range(0, channels):
+            geo_diff = l_geo_split[i] - f_geo_split[i]
+            abs_geo_diff = paddle.abs(geo_diff)
+            smooth_l1_sign = paddle.less_than(abs_geo_diff, l_score)
+            smooth_l1_sign = paddle.cast(smooth_l1_sign, dtype='float32')
+            in_loss = abs_geo_diff * abs_geo_diff * smooth_l1_sign + \
+                (abs_geo_diff - 0.5) * (1.0 - smooth_l1_sign)
+            out_loss = l_geo_split[-1] / channels * in_loss * l_score
+            smooth_l1 += out_loss
+        smooth_l1_loss = paddle.mean(smooth_l1 * l_score)
+
+        dice_loss = dice_loss * 0.01
+        total_loss = dice_loss + smooth_l1_loss
+        losses = {"loss":total_loss, \
+                  "dice_loss":dice_loss,\
+                  "smooth_l1_loss":smooth_l1_loss}
+        return losses
--- a/ppocr/losses/det_fce_loss.py
+++ b/ppocr/losses/det_fce_loss.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textdet/losses/fce_loss.py
+"""
+
+import numpy as np
+from paddle import nn
+import paddle
+import paddle.nn.functional as F
+from functools import partial
+
+
+def multi_apply(func, *args, **kwargs):
+    pfunc = partial(func, **kwargs) if kwargs else func
+    map_results = map(pfunc, *args)
+    return tuple(map(list, zip(*map_results)))
+
+
+class FCELoss(nn.Layer):
+    """The class for implementing FCENet loss
+    FCENet(CVPR2021): Fourier Contour Embedding for Arbitrary-shaped
+        Text Detection
+
+    [https://arxiv.org/abs/2104.10442]
+
+    Args:
+        fourier_degree (int) : The maximum Fourier transform degree k.
+        num_sample (int) : The sampling points number of regression
+            loss. If it is too small, fcenet tends to be overfitting.
+        ohem_ratio (float): the negative/positive ratio in OHEM.
+    """
+
+    def __init__(self, fourier_degree, num_sample, ohem_ratio=3.):
+        super().__init__()
+        self.fourier_degree = fourier_degree
+        self.num_sample = num_sample
+        self.ohem_ratio = ohem_ratio
+
+    def forward(self, preds, labels):
+        assert isinstance(preds, dict)
+        preds = preds['levels']
+
+        p3_maps, p4_maps, p5_maps = labels[1:]
+        assert p3_maps[0].shape[0] == 4 * self.fourier_degree + 5,\
+            'fourier degree not equal in FCEhead and FCEtarget'
+
+        # to tensor
+        gts = [p3_maps, p4_maps, p5_maps]
+        for idx, maps in enumerate(gts):
+            gts[idx] = paddle.to_tensor(np.stack(maps))
+
+        losses = multi_apply(self.forward_single, preds, gts)
+
+        loss_tr = paddle.to_tensor(0.).astype('float32')
+        loss_tcl = paddle.to_tensor(0.).astype('float32')
+        loss_reg_x = paddle.to_tensor(0.).astype('float32')
+        loss_reg_y = paddle.to_tensor(0.).astype('float32')
+        loss_all = paddle.to_tensor(0.).astype('float32')
+
+        for idx, loss in enumerate(losses):
+            loss_all += sum(loss)
+            if idx == 0:
+                loss_tr += sum(loss)
+            elif idx == 1:
+                loss_tcl += sum(loss)
+            elif idx == 2:
+                loss_reg_x += sum(loss)
+            else:
+                loss_reg_y += sum(loss)
+
+        results = dict(
+            loss=loss_all,
+            loss_text=loss_tr,
+            loss_center=loss_tcl,
+            loss_reg_x=loss_reg_x,
+            loss_reg_y=loss_reg_y, )
+        return results
+
+    def forward_single(self, pred, gt):
+        cls_pred = paddle.transpose(pred[0], (0, 2, 3, 1))
+        reg_pred = paddle.transpose(pred[1], (0, 2, 3, 1))
+        gt = paddle.transpose(gt, (0, 2, 3, 1))
+
+        k = 2 * self.fourier_degree + 1
+        tr_pred = paddle.reshape(cls_pred[:, :, :, :2], (-1, 2))
+        tcl_pred = paddle.reshape(cls_pred[:, :, :, 2:], (-1, 2))
+        x_pred = paddle.reshape(reg_pred[:, :, :, 0:k], (-1, k))
+        y_pred = paddle.reshape(reg_pred[:, :, :, k:2 * k], (-1, k))
+
+        tr_mask = gt[:, :, :, :1].reshape([-1])
+        tcl_mask = gt[:, :, :, 1:2].reshape([-1])
+        train_mask = gt[:, :, :, 2:3].reshape([-1])
+        x_map = paddle.reshape(gt[:, :, :, 3:3 + k], (-1, k))
+        y_map = paddle.reshape(gt[:, :, :, 3 + k:], (-1, k))
+
+        tr_train_mask = (train_mask * tr_mask).astype('bool')
+        tr_train_mask2 = paddle.concat(
+            [tr_train_mask.unsqueeze(1), tr_train_mask.unsqueeze(1)], axis=1)
+        # tr loss
+        loss_tr = self.ohem(tr_pred, tr_mask, train_mask)
+        # tcl loss
+        loss_tcl = paddle.to_tensor(0.).astype('float32')
+        tr_neg_mask = tr_train_mask.logical_not()
+        tr_neg_mask2 = paddle.concat(
+            [tr_neg_mask.unsqueeze(1), tr_neg_mask.unsqueeze(1)], axis=1)
+        if tr_train_mask.sum().item() > 0:
+            loss_tcl_pos = F.cross_entropy(
+                tcl_pred.masked_select(tr_train_mask2).reshape([-1, 2]),
+                tcl_mask.masked_select(tr_train_mask).astype('int64'))
+            loss_tcl_neg = F.cross_entropy(
+                tcl_pred.masked_select(tr_neg_mask2).reshape([-1, 2]),
+                tcl_mask.masked_select(tr_neg_mask).astype('int64'))
+            loss_tcl = loss_tcl_pos + 0.5 * loss_tcl_neg
+
+        # regression loss
+        loss_reg_x = paddle.to_tensor(0.).astype('float32')
+        loss_reg_y = paddle.to_tensor(0.).astype('float32')
+        if tr_train_mask.sum().item() > 0:
+            weight = (tr_mask.masked_select(tr_train_mask.astype('bool'))
+                      .astype('float32') + tcl_mask.masked_select(
+                          tr_train_mask.astype('bool')).astype('float32')) / 2
+            weight = weight.reshape([-1, 1])
+
+            ft_x, ft_y = self.fourier2poly(x_map, y_map)
+            ft_x_pre, ft_y_pre = self.fourier2poly(x_pred, y_pred)
+
+            dim = ft_x.shape[1]
+
+            tr_train_mask3 = paddle.concat(
+                [tr_train_mask.unsqueeze(1) for i in range(dim)], axis=1)
+
+            loss_reg_x = paddle.mean(weight * F.smooth_l1_loss(
+                ft_x_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
+                ft_x.masked_select(tr_train_mask3).reshape([-1, dim]),
+                reduction='none'))
+            loss_reg_y = paddle.mean(weight * F.smooth_l1_loss(
+                ft_y_pre.masked_select(tr_train_mask3).reshape([-1, dim]),
+                ft_y.masked_select(tr_train_mask3).reshape([-1, dim]),
+                reduction='none'))
+
+        return loss_tr, loss_tcl, loss_reg_x, loss_reg_y
+
+    def ohem(self, predict, target, train_mask):
+
+        pos = (target * train_mask).astype('bool')
+        neg = ((1 - target) * train_mask).astype('bool')
+
+        pos2 = paddle.concat([pos.unsqueeze(1), pos.unsqueeze(1)], axis=1)
+        neg2 = paddle.concat([neg.unsqueeze(1), neg.unsqueeze(1)], axis=1)
+
+        n_pos = pos.astype('float32').sum()
+
+        if n_pos.item() > 0:
+            loss_pos = F.cross_entropy(
+                predict.masked_select(pos2).reshape([-1, 2]),
+                target.masked_select(pos).astype('int64'),
+                reduction='sum')
+            loss_neg = F.cross_entropy(
+                predict.masked_select(neg2).reshape([-1, 2]),
+                target.masked_select(neg).astype('int64'),
+                reduction='none')
+            n_neg = min(
+                int(neg.astype('float32').sum().item()),
+                int(self.ohem_ratio * n_pos.astype('float32')))
+        else:
+            loss_pos = paddle.to_tensor(0.)
+            loss_neg = F.cross_entropy(
+                predict.masked_select(neg2).reshape([-1, 2]),
+                target.masked_select(neg).astype('int64'),
+                reduction='none')
+            n_neg = 100
+        if len(loss_neg) > n_neg:
+            loss_neg, _ = paddle.topk(loss_neg, n_neg)
+
+        return (loss_pos + loss_neg.sum()) / (n_pos + n_neg).astype('float32')
+
+    def fourier2poly(self, real_maps, imag_maps):
+        """Transform Fourier coefficient maps to polygon maps.
+
+        Args:
+            real_maps (tensor): A map composed of the real parts of the
+                Fourier coefficients, whose shape is (-1, 2k+1)
+            imag_maps (tensor):A map composed of the imag parts of the
+                Fourier coefficients, whose shape is (-1, 2k+1)
+
+        Returns
+            x_maps (tensor): A map composed of the x value of the polygon
+                represented by n sample points (xn, yn), whose shape is (-1, n)
+            y_maps (tensor): A map composed of the y value of the polygon
+                represented by n sample points (xn, yn), whose shape is (-1, n)
+        """
+
+        k_vect = paddle.arange(
+            -self.fourier_degree, self.fourier_degree + 1,
+            dtype='float32').reshape([-1, 1])
+        i_vect = paddle.arange(
+            0, self.num_sample, dtype='float32').reshape([1, -1])
+
+        transform_matrix = 2 * np.pi / self.num_sample * paddle.matmul(k_vect,
+                                                                       i_vect)
+
+        x1 = paddle.einsum('ak, kn-> an', real_maps,
+                           paddle.cos(transform_matrix))
+        x2 = paddle.einsum('ak, kn-> an', imag_maps,
+                           paddle.sin(transform_matrix))
+        y1 = paddle.einsum('ak, kn-> an', real_maps,
+                           paddle.sin(transform_matrix))
+        y2 = paddle.einsum('ak, kn-> an', imag_maps,
+                           paddle.cos(transform_matrix))
+
+        x_maps = x1 - x2
+        y_maps = y1 + y2
+
+        return x_maps, y_maps
--- a/ppocr/losses/det_pse_loss.py
+++ b/ppocr/losses/det_pse_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
+"""
+
+import paddle
+from paddle import nn
+from paddle.nn import functional as F
+import numpy as np
+from ppocr.utils.iou import iou
+
+
+class PSELoss(nn.Layer):
+    def __init__(self,
+                 alpha,
+                 ohem_ratio=3,
+                 kernel_sample_mask='pred',
+                 reduction='sum',
+                 eps=1e-6,
+                 **kwargs):
+        """Implement PSE Loss.
+        """
+        super(PSELoss, self).__init__()
+        assert reduction in ['sum', 'mean', 'none']
+        self.alpha = alpha
+        self.ohem_ratio = ohem_ratio
+        self.kernel_sample_mask = kernel_sample_mask
+        self.reduction = reduction
+        self.eps = eps
+
+    def forward(self, outputs, labels):
+        predicts = outputs['maps']
+        predicts = F.interpolate(predicts, scale_factor=4)
+
+        texts = predicts[:, 0, :, :]
+        kernels = predicts[:, 1:, :, :]
+        gt_texts, gt_kernels, training_masks = labels[1:]
+
+        # text loss
+        selected_masks = self.ohem_batch(texts, gt_texts, training_masks)
+
+        loss_text = self.dice_loss(texts, gt_texts, selected_masks)
+        iou_text = iou((texts > 0).astype('int64'),
+                       gt_texts,
+                       training_masks,
+                       reduce=False)
+        losses = dict(loss_text=loss_text, iou_text=iou_text)
+
+        # kernel loss
+        loss_kernels = []
+        if self.kernel_sample_mask == 'gt':
+            selected_masks = gt_texts * training_masks
+        elif self.kernel_sample_mask == 'pred':
+            selected_masks = (
+                F.sigmoid(texts) > 0.5).astype('float32') * training_masks
+
+        for i in range(kernels.shape[1]):
+            kernel_i = kernels[:, i, :, :]
+            gt_kernel_i = gt_kernels[:, i, :, :]
+            loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i,
+                                           selected_masks)
+            loss_kernels.append(loss_kernel_i)
+        loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1)
+        iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'),
+                         gt_kernels[:, -1, :, :],
+                         training_masks * gt_texts,
+                         reduce=False)
+        losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel))
+        loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels
+        losses['loss'] = loss
+        if self.reduction == 'sum':
+            losses = {x: paddle.sum(v) for x, v in losses.items()}
+        elif self.reduction == 'mean':
+            losses = {x: paddle.mean(v) for x, v in losses.items()}
+        return losses
+
+    def dice_loss(self, input, target, mask):
+        input = F.sigmoid(input)
+
+        input = input.reshape([input.shape[0], -1])
+        target = target.reshape([target.shape[0], -1])
+        mask = mask.reshape([mask.shape[0], -1])
+
+        input = input * mask
+        target = target * mask
+
+        a = paddle.sum(input * target, 1)
+        b = paddle.sum(input * input, 1) + self.eps
+        c = paddle.sum(target * target, 1) + self.eps
+        d = (2 * a) / (b + c)
+        return 1 - d
+
+    def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3):
+        pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int(
+            paddle.sum(
+                paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5))
+                .astype('float32')))
+
+        if pos_num == 0:
+            selected_mask = training_mask
+            selected_mask = selected_mask.reshape(
+                [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
+                    'float32')
+            return selected_mask
+
+        neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32')))
+        neg_num = int(min(pos_num * ohem_ratio, neg_num))
+
+        if neg_num == 0:
+            selected_mask = training_mask
+            selected_mask = selected_mask.reshape(
+                [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
+                    'float32')
+            return selected_mask
+
+        neg_score = paddle.masked_select(score, gt_text <= 0.5)
+        neg_score_sorted = paddle.sort(-neg_score)
+        threshold = -neg_score_sorted[neg_num - 1]
+
+        selected_mask = paddle.logical_and(
+            paddle.logical_or((score >= threshold), (gt_text > 0.5)),
+            (training_mask > 0.5))
+        selected_mask = selected_mask.reshape(
+            [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
+                'float32')
+        return selected_mask
+
+    def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3):
+        selected_masks = []
+        for i in range(scores.shape[0]):
+            selected_masks.append(
+                self.ohem_single(scores[i, :, :], gt_texts[i, :, :],
+                                 training_masks[i, :, :], ohem_ratio))
+
+        selected_masks = paddle.concat(selected_masks, 0).astype('float32')
+        return selected_masks
--- a/ppocr/losses/det_sast_loss.py
+++ b/ppocr/losses/det_sast_loss.py
+# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+from .det_basic_loss import DiceLoss
+import numpy as np
+
+
+class SASTLoss(nn.Layer):
+    """
+    """
+
+    def __init__(self, eps=1e-6, **kwargs):
+        super(SASTLoss, self).__init__()
+        self.dice_loss = DiceLoss(eps=eps)
+
+    def forward(self, predicts, labels):
+        """
+        tcl_pos: N x 128 x 3
+        tcl_mask: N x 128 x 1
+        tcl_label: N x X list or LoDTensor
+        """
+
+        f_score = predicts['f_score']
+        f_border = predicts['f_border']
+        f_tvo = predicts['f_tvo']
+        f_tco = predicts['f_tco']
+
+        l_score, l_border, l_mask, l_tvo, l_tco = labels[1:]
+
+        #score_loss
+        intersection = paddle.sum(f_score * l_score * l_mask)
+        union = paddle.sum(f_score * l_mask) + paddle.sum(l_score * l_mask)
+        score_loss = 1.0 - 2 * intersection / (union + 1e-5)
+
+        #border loss
+        l_border_split, l_border_norm = paddle.split(
+            l_border, num_or_sections=[4, 1], axis=1)
+        f_border_split = f_border
+        border_ex_shape = l_border_norm.shape * np.array([1, 4, 1, 1])
+        l_border_norm_split = paddle.expand(
+            x=l_border_norm, shape=border_ex_shape)
+        l_border_score = paddle.expand(x=l_score, shape=border_ex_shape)
+        l_border_mask = paddle.expand(x=l_mask, shape=border_ex_shape)
+
+        border_diff = l_border_split - f_border_split
+        abs_border_diff = paddle.abs(border_diff)
+        border_sign = abs_border_diff < 1.0
+        border_sign = paddle.cast(border_sign, dtype='float32')
+        border_sign.stop_gradient = True
+        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
+                    (abs_border_diff - 0.5) * (1.0 - border_sign)
+        border_out_loss = l_border_norm_split * border_in_loss
+        border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
+                    (paddle.sum(l_border_score * l_border_mask) + 1e-5)
+
+        #tvo_loss
+        l_tvo_split, l_tvo_norm = paddle.split(
+            l_tvo, num_or_sections=[8, 1], axis=1)
+        f_tvo_split = f_tvo
+        tvo_ex_shape = l_tvo_norm.shape * np.array([1, 8, 1, 1])
+        l_tvo_norm_split = paddle.expand(x=l_tvo_norm, shape=tvo_ex_shape)
+        l_tvo_score = paddle.expand(x=l_score, shape=tvo_ex_shape)
+        l_tvo_mask = paddle.expand(x=l_mask, shape=tvo_ex_shape)
+        #
+        tvo_geo_diff = l_tvo_split - f_tvo_split
+        abs_tvo_geo_diff = paddle.abs(tvo_geo_diff)
+        tvo_sign = abs_tvo_geo_diff < 1.0
+        tvo_sign = paddle.cast(tvo_sign, dtype='float32')
+        tvo_sign.stop_gradient = True
+        tvo_in_loss = 0.5 * abs_tvo_geo_diff * abs_tvo_geo_diff * tvo_sign + \
+                    (abs_tvo_geo_diff - 0.5) * (1.0 - tvo_sign)
+        tvo_out_loss = l_tvo_norm_split * tvo_in_loss
+        tvo_loss = paddle.sum(tvo_out_loss * l_tvo_score * l_tvo_mask) / \
+                    (paddle.sum(l_tvo_score * l_tvo_mask) + 1e-5)
+
+        #tco_loss
+        l_tco_split, l_tco_norm = paddle.split(
+            l_tco, num_or_sections=[2, 1], axis=1)
+        f_tco_split = f_tco
+        tco_ex_shape = l_tco_norm.shape * np.array([1, 2, 1, 1])
+        l_tco_norm_split = paddle.expand(x=l_tco_norm, shape=tco_ex_shape)
+        l_tco_score = paddle.expand(x=l_score, shape=tco_ex_shape)
+        l_tco_mask = paddle.expand(x=l_mask, shape=tco_ex_shape)
+
+        tco_geo_diff = l_tco_split - f_tco_split
+        abs_tco_geo_diff = paddle.abs(tco_geo_diff)
+        tco_sign = abs_tco_geo_diff < 1.0
+        tco_sign = paddle.cast(tco_sign, dtype='float32')
+        tco_sign.stop_gradient = True
+        tco_in_loss = 0.5 * abs_tco_geo_diff * abs_tco_geo_diff * tco_sign + \
+                    (abs_tco_geo_diff - 0.5) * (1.0 - tco_sign)
+        tco_out_loss = l_tco_norm_split * tco_in_loss
+        tco_loss = paddle.sum(tco_out_loss * l_tco_score * l_tco_mask) / \
+                    (paddle.sum(l_tco_score * l_tco_mask) + 1e-5)
+
+        # total loss
+        tvo_lw, tco_lw = 1.5, 1.5
+        score_lw, border_lw = 1.0, 1.0
+        total_loss = score_loss * score_lw + border_loss * border_lw + \
+                    tvo_loss * tvo_lw + tco_loss * tco_lw
+
+        losses = {'loss':total_loss, "score_loss":score_loss,\
+            "border_loss":border_loss, 'tvo_loss':tvo_loss, 'tco_loss':tco_loss}
+        return losses
--- a/ppocr/losses/distillation_loss.py
+++ b/ppocr/losses/distillation_loss.py
+#copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+
+import paddle
+import paddle.nn as nn
+import numpy as np
+import cv2
+
+from .rec_ctc_loss import CTCLoss
+from .rec_sar_loss import SARLoss
+from .basic_loss import DMLLoss
+from .basic_loss import DistanceLoss
+from .det_db_loss import DBLoss
+from .det_basic_loss import BalanceLoss, MaskL1Loss, DiceLoss
+
+
+def _sum_loss(loss_dict):
+    if "loss" in loss_dict.keys():
+        return loss_dict
+    else:
+        loss_dict["loss"] = 0.
+        for k, value in loss_dict.items():
+            if k == "loss":
+                continue
+            else:
+                loss_dict["loss"] += value
+        return loss_dict
+
+
+class DistillationDMLLoss(DMLLoss):
+    """
+    """
+
+    def __init__(self,
+                 model_name_pairs=[],
+                 act=None,
+                 use_log=False,
+                 key=None,
+                 multi_head=False,
+                 dis_head='ctc',
+                 maps_name=None,
+                 name="dml"):
+        super().__init__(act=act, use_log=use_log)
+        assert isinstance(model_name_pairs, list)
+        self.key = key
+        self.multi_head = multi_head
+        self.dis_head = dis_head
+        self.model_name_pairs = self._check_model_name_pairs(model_name_pairs)
+        self.name = name
+        self.maps_name = self._check_maps_name(maps_name)
+
+    def _check_model_name_pairs(self, model_name_pairs):
+        if not isinstance(model_name_pairs, list):
+            return []
+        elif isinstance(model_name_pairs[0], list) and isinstance(
+                model_name_pairs[0][0], str):
+            return model_name_pairs
+        else:
+            return [model_name_pairs]
+
+    def _check_maps_name(self, maps_name):
+        if maps_name is None:
+            return None
+        elif type(maps_name) == str:
+            return [maps_name]
+        elif type(maps_name) == list:
+            return [maps_name]
+        else:
+            return None
+
+    def _slice_out(self, outs):
+        new_outs = {}
+        for k in self.maps_name:
+            if k == "thrink_maps":
+                new_outs[k] = outs[:, 0, :, :]
+            elif k == "threshold_maps":
+                new_outs[k] = outs[:, 1, :, :]
+            elif k == "binary_maps":
+                new_outs[k] = outs[:, 2, :, :]
+            else:
+                continue
+        return new_outs
+
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, pair in enumerate(self.model_name_pairs):
+            out1 = predicts[pair[0]]
+            out2 = predicts[pair[1]]
+            if self.key is not None:
+                out1 = out1[self.key]
+                out2 = out2[self.key]
+
+            if self.maps_name is None:
+                if self.multi_head:
+                    loss = super().forward(out1[self.dis_head],
+                                           out2[self.dis_head])
+                else:
+                    loss = super().forward(out1, out2)
+                if isinstance(loss, dict):
+                    for key in loss:
+                        loss_dict["{}_{}_{}_{}".format(key, pair[0], pair[1],
+                                                       idx)] = loss[key]
+                else:
+                    loss_dict["{}_{}".format(self.name, idx)] = loss
+            else:
+                outs1 = self._slice_out(out1)
+                outs2 = self._slice_out(out2)
+                for _c, k in enumerate(outs1.keys()):
+                    loss = super().forward(outs1[k], outs2[k])
+                    if isinstance(loss, dict):
+                        for key in loss:
+                            loss_dict["{}_{}_{}_{}_{}".format(key, pair[
+                                0], pair[1], self.maps_name, idx)] = loss[key]
+                    else:
+                        loss_dict["{}_{}_{}".format(self.name, self.maps_name[
+                            _c], idx)] = loss
+
+        loss_dict = _sum_loss(loss_dict)
+
+        return loss_dict
+
+
+class DistillationCTCLoss(CTCLoss):
+    def __init__(self,
+                 model_name_list=[],
+                 key=None,
+                 multi_head=False,
+                 name="loss_ctc"):
+        super().__init__()
+        self.model_name_list = model_name_list
+        self.key = key
+        self.name = name
+        self.multi_head = multi_head
+
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, model_name in enumerate(self.model_name_list):
+            out = predicts[model_name]
+            if self.key is not None:
+                out = out[self.key]
+            if self.multi_head:
+                assert 'ctc' in out, 'multi head has multi out'
+                loss = super().forward(out['ctc'], batch[:2] + batch[3:])
+            else:
+                loss = super().forward(out, batch)
+            if isinstance(loss, dict):
+                for key in loss:
+                    loss_dict["{}_{}_{}".format(self.name, model_name,
+                                                idx)] = loss[key]
+            else:
+                loss_dict["{}_{}".format(self.name, model_name)] = loss
+        return loss_dict
+
+
+class DistillationSARLoss(SARLoss):
+    def __init__(self,
+                 model_name_list=[],
+                 key=None,
+                 multi_head=False,
+                 name="loss_sar",
+                 **kwargs):
+        ignore_index = kwargs.get('ignore_index', 92)
+        super().__init__(ignore_index=ignore_index)
+        self.model_name_list = model_name_list
+        self.key = key
+        self.name = name
+        self.multi_head = multi_head
+
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, model_name in enumerate(self.model_name_list):
+            out = predicts[model_name]
+            if self.key is not None:
+                out = out[self.key]
+            if self.multi_head:
+                assert 'sar' in out, 'multi head has multi out'
+                loss = super().forward(out['sar'], batch[:1] + batch[2:])
+            else:
+                loss = super().forward(out, batch)
+            if isinstance(loss, dict):
+                for key in loss:
+                    loss_dict["{}_{}_{}".format(self.name, model_name,
+                                                idx)] = loss[key]
+            else:
+                loss_dict["{}_{}".format(self.name, model_name)] = loss
+        return loss_dict
+
+
+class DistillationDBLoss(DBLoss):
+    def __init__(self,
+                 model_name_list=[],
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 alpha=5,
+                 beta=10,
+                 ohem_ratio=3,
+                 eps=1e-6,
+                 name="db",
+                 **kwargs):
+        super().__init__()
+        self.model_name_list = model_name_list
+        self.name = name
+        self.key = None
+
+    def forward(self, predicts, batch):
+        loss_dict = {}
+        for idx, model_name in enumerate(self.model_name_list):
+            out = predicts[model_name]
+            if self.key is not None:
+                out = out[self.key]
+            loss = super().forward(out, batch)
+
+            if isinstance(loss, dict):
+                for key in loss.keys():
+                    if key == "loss":
+                        continue
+                    name = "{}_{}_{}".format(self.name, model_name, key)
+                    loss_dict[name] = loss[key]
+            else:
+                loss_dict["{}_{}".format(self.name, model_name)] = loss
+
+        loss_dict = _sum_loss(loss_dict)
+        return loss_dict
+
+
+class DistillationDilaDBLoss(DBLoss):
+    def __init__(self,
+                 model_name_pairs=[],
+                 key=None,
+                 balance_loss=True,
+                 main_loss_type='DiceLoss',
+                 alpha=5,
+                 beta=10,
+                 ohem_ratio=3,
+                 eps=1e-6,
+                 name="dila_dbloss"):
+        super().__init__()
+        self.model_name_pairs = model_name_pairs
+        self.name = name
+        self.key = key
+
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, pair in enumerate(self.model_name_pairs):
+            stu_outs = predicts[pair[0]]
+            tch_outs = predicts[pair[1]]
+            if self.key is not None:
+                stu_preds = stu_outs[self.key]
+                tch_preds = tch_outs[self.key]
+
+            stu_shrink_maps = stu_preds[:, 0, :, :]
+            stu_binary_maps = stu_preds[:, 2, :, :]
+
+            # dilation to teacher prediction
+            dilation_w = np.array([[1, 1], [1, 1]])
+            th_shrink_maps = tch_preds[:, 0, :, :]
+            th_shrink_maps = th_shrink_maps.numpy() > 0.3  # thresh = 0.3 
+            dilate_maps = np.zeros_like(th_shrink_maps).astype(np.float32)
+            for i in range(th_shrink_maps.shape[0]):
+                dilate_maps[i] = cv2.dilate(
+                    th_shrink_maps[i, :, :].astype(np.uint8), dilation_w)
+            th_shrink_maps = paddle.to_tensor(dilate_maps)
+
+            label_threshold_map, label_threshold_mask, label_shrink_map, label_shrink_mask = batch[
+                1:]
+
+            # calculate the shrink map loss
+            bce_loss = self.alpha * self.bce_loss(
+                stu_shrink_maps, th_shrink_maps, label_shrink_mask)
+            loss_binary_maps = self.dice_loss(stu_binary_maps, th_shrink_maps,
+                                              label_shrink_mask)
+
+            # k = f"{self.name}_{pair[0]}_{pair[1]}"
+            k = "{}_{}_{}".format(self.name, pair[0], pair[1])
+            loss_dict[k] = bce_loss + loss_binary_maps
+
+        loss_dict = _sum_loss(loss_dict)
+        return loss_dict
+
+
+class DistillationDistanceLoss(DistanceLoss):
+    """
+    """
+
+    def __init__(self,
+                 mode="l2",
+                 model_name_pairs=[],
+                 key=None,
+                 name="loss_distance",
+                 **kargs):
+        super().__init__(mode=mode, **kargs)
+        assert isinstance(model_name_pairs, list)
+        self.key = key
+        self.model_name_pairs = model_name_pairs
+        self.name = name + "_l2"
+
+    def forward(self, predicts, batch):
+        loss_dict = dict()
+        for idx, pair in enumerate(self.model_name_pairs):
+            out1 = predicts[pair[0]]
+            out2 = predicts[pair[1]]
+            if self.key is not None:
+                out1 = out1[self.key]
+                out2 = out2[self.key]
+            loss = super().forward(out1, out2)
+            if isinstance(loss, dict):
+                for key in loss:
+                    loss_dict["{}_{}_{}".format(self.name, key, idx)] = loss[
+                        key]
+            else:
+                loss_dict["{}_{}_{}_{}".format(self.name, pair[0], pair[1],
+                                               idx)] = loss
+        return loss_dict
--- a/ppocr/losses/e2e_pg_loss.py
+++ b/ppocr/losses/e2e_pg_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+import paddle
+
+from .det_basic_loss import DiceLoss
+from ppocr.utils.e2e_utils.extract_batchsize import pre_process
+
+
+class PGLoss(nn.Layer):
+    def __init__(self,
+                 tcl_bs,
+                 max_text_length,
+                 max_text_nums,
+                 pad_num,
+                 eps=1e-6,
+                 **kwargs):
+        super(PGLoss, self).__init__()
+        self.tcl_bs = tcl_bs
+        self.max_text_nums = max_text_nums
+        self.max_text_length = max_text_length
+        self.pad_num = pad_num
+        self.dice_loss = DiceLoss(eps=eps)
+
+    def border_loss(self, f_border, l_border, l_score, l_mask):
+        l_border_split, l_border_norm = paddle.tensor.split(
+            l_border, num_or_sections=[4, 1], axis=1)
+        f_border_split = f_border
+        b, c, h, w = l_border_norm.shape
+        l_border_norm_split = paddle.expand(
+            x=l_border_norm, shape=[b, 4 * c, h, w])
+        b, c, h, w = l_score.shape
+        l_border_score = paddle.expand(x=l_score, shape=[b, 4 * c, h, w])
+        b, c, h, w = l_mask.shape
+        l_border_mask = paddle.expand(x=l_mask, shape=[b, 4 * c, h, w])
+        border_diff = l_border_split - f_border_split
+        abs_border_diff = paddle.abs(border_diff)
+        border_sign = abs_border_diff < 1.0
+        border_sign = paddle.cast(border_sign, dtype='float32')
+        border_sign.stop_gradient = True
+        border_in_loss = 0.5 * abs_border_diff * abs_border_diff * border_sign + \
+                         (abs_border_diff - 0.5) * (1.0 - border_sign)
+        border_out_loss = l_border_norm_split * border_in_loss
+        border_loss = paddle.sum(border_out_loss * l_border_score * l_border_mask) / \
+                      (paddle.sum(l_border_score * l_border_mask) + 1e-5)
+        return border_loss
+
+    def direction_loss(self, f_direction, l_direction, l_score, l_mask):
+        l_direction_split, l_direction_norm = paddle.tensor.split(
+            l_direction, num_or_sections=[2, 1], axis=1)
+        f_direction_split = f_direction
+        b, c, h, w = l_direction_norm.shape
+        l_direction_norm_split = paddle.expand(
+            x=l_direction_norm, shape=[b, 2 * c, h, w])
+        b, c, h, w = l_score.shape
+        l_direction_score = paddle.expand(x=l_score, shape=[b, 2 * c, h, w])
+        b, c, h, w = l_mask.shape
+        l_direction_mask = paddle.expand(x=l_mask, shape=[b, 2 * c, h, w])
+        direction_diff = l_direction_split - f_direction_split
+        abs_direction_diff = paddle.abs(direction_diff)
+        direction_sign = abs_direction_diff < 1.0
+        direction_sign = paddle.cast(direction_sign, dtype='float32')
+        direction_sign.stop_gradient = True
+        direction_in_loss = 0.5 * abs_direction_diff * abs_direction_diff * direction_sign + \
+                            (abs_direction_diff - 0.5) * (1.0 - direction_sign)
+        direction_out_loss = l_direction_norm_split * direction_in_loss
+        direction_loss = paddle.sum(direction_out_loss * l_direction_score * l_direction_mask) / \
+                         (paddle.sum(l_direction_score * l_direction_mask) + 1e-5)
+        return direction_loss
+
+    def ctcloss(self, f_char, tcl_pos, tcl_mask, tcl_label, label_t):
+        f_char = paddle.transpose(f_char, [0, 2, 3, 1])
+        tcl_pos = paddle.reshape(tcl_pos, [-1, 3])
+        tcl_pos = paddle.cast(tcl_pos, dtype=int)
+        f_tcl_char = paddle.gather_nd(f_char, tcl_pos)
+        f_tcl_char = paddle.reshape(f_tcl_char,
+                                    [-1, 64, 37])  # len(Lexicon_Table)+1
+        f_tcl_char_fg, f_tcl_char_bg = paddle.split(f_tcl_char, [36, 1], axis=2)
+        f_tcl_char_bg = f_tcl_char_bg * tcl_mask + (1.0 - tcl_mask) * 20.0
+        b, c, l = tcl_mask.shape
+        tcl_mask_fg = paddle.expand(x=tcl_mask, shape=[b, c, 36 * l])
+        tcl_mask_fg.stop_gradient = True
+        f_tcl_char_fg = f_tcl_char_fg * tcl_mask_fg + (1.0 - tcl_mask_fg) * (
+            -20.0)
+        f_tcl_char_mask = paddle.concat([f_tcl_char_fg, f_tcl_char_bg], axis=2)
+        f_tcl_char_ld = paddle.transpose(f_tcl_char_mask, (1, 0, 2))
+        N, B, _ = f_tcl_char_ld.shape
+        input_lengths = paddle.to_tensor([N] * B, dtype='int64')
+        cost = paddle.nn.functional.ctc_loss(
+            log_probs=f_tcl_char_ld,
+            labels=tcl_label,
+            input_lengths=input_lengths,
+            label_lengths=label_t,
+            blank=self.pad_num,
+            reduction='none')
+        cost = cost.mean()
+        return cost
+
+    def forward(self, predicts, labels):
+        images, tcl_maps, tcl_label_maps, border_maps \
+            , direction_maps, training_masks, label_list, pos_list, pos_mask = labels
+        # for all the batch_size
+        pos_list, pos_mask, label_list, label_t = pre_process(
+            label_list, pos_list, pos_mask, self.max_text_length,
+            self.max_text_nums, self.pad_num, self.tcl_bs)
+
+        f_score, f_border, f_direction, f_char = predicts['f_score'], predicts['f_border'], predicts['f_direction'], \
+                                                 predicts['f_char']
+        score_loss = self.dice_loss(f_score, tcl_maps, training_masks)
+        border_loss = self.border_loss(f_border, border_maps, tcl_maps,
+                                       training_masks)
+        direction_loss = self.direction_loss(f_direction, direction_maps,
+                                             tcl_maps, training_masks)
+        ctc_loss = self.ctcloss(f_char, pos_list, pos_mask, label_list, label_t)
+        loss_all = score_loss + border_loss + direction_loss + 5 * ctc_loss
+
+        losses = {
+            'loss': loss_all,
+            "score_loss": score_loss,
+            "border_loss": border_loss,
+            "direction_loss": direction_loss,
+            "ctc_loss": ctc_loss
+        }
+        return losses
--- a/ppocr/losses/kie_sdmgr_loss.py
+++ b/ppocr/losses/kie_sdmgr_loss.py
+# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# reference from : https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/kie/losses/sdmgr_loss.py
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from paddle import nn
+import paddle
+
+
+class SDMGRLoss(nn.Layer):
+    def __init__(self, node_weight=1.0, edge_weight=1.0, ignore=0):
+        super().__init__()
+        self.loss_node = nn.CrossEntropyLoss(ignore_index=ignore)
+        self.loss_edge = nn.CrossEntropyLoss(ignore_index=-1)
+        self.node_weight = node_weight
+        self.edge_weight = edge_weight
+        self.ignore = ignore
+
+    def pre_process(self, gts, tag):
+        gts, tag = gts.numpy(), tag.numpy().tolist()
+        temp_gts = []
+        batch = len(tag)
+        for i in range(batch):
+            num, recoder_len = tag[i][0], tag[i][1]
+            temp_gts.append(
+                paddle.to_tensor(
+                    gts[i, :num, :num + 1], dtype='int64'))
+        return temp_gts
+
+    def accuracy(self, pred, target, topk=1, thresh=None):
+        """Calculate accuracy according to the prediction and target.
+
+        Args:
+            pred (torch.Tensor): The model prediction, shape (N, num_class)
+            target (torch.Tensor): The target of each prediction, shape (N, )
+            topk (int | tuple[int], optional): If the predictions in ``topk``
+                matches the target, the predictions will be regarded as
+                correct ones. Defaults to 1.
+            thresh (float, optional): If not None, predictions with scores under
+                this threshold are considered incorrect. Default to None.
+
+        Returns:
+            float | tuple[float]: If the input ``topk`` is a single integer,
+                the function will return a single float as accuracy. If
+                ``topk`` is a tuple containing multiple integers, the
+                function will return a tuple containing accuracies of
+                each ``topk`` number.
+        """
+        assert isinstance(topk, (int, tuple))
+        if isinstance(topk, int):
+            topk = (topk, )
+            return_single = True
+        else:
+            return_single = False
+
+        maxk = max(topk)
+        if pred.shape[0] == 0:
+            accu = [pred.new_tensor(0.) for i in range(len(topk))]
+            return accu[0] if return_single else accu
+        pred_value, pred_label = paddle.topk(pred, maxk, axis=1)
+        pred_label = pred_label.transpose(
+            [1, 0])  # transpose to shape (maxk, N)
+        correct = paddle.equal(pred_label,
+                               (target.reshape([1, -1]).expand_as(pred_label)))
+        res = []
+        for k in topk:
+            correct_k = paddle.sum(correct[:k].reshape([-1]).astype('float32'),
+                                   axis=0,
+                                   keepdim=True)
+            res.append(
+                paddle.multiply(correct_k,
+                                paddle.to_tensor(100.0 / pred.shape[0])))
+        return res[0] if return_single else res
+
+    def forward(self, pred, batch):
+        node_preds, edge_preds = pred
+        gts, tag = batch[4], batch[5]
+        gts = self.pre_process(gts, tag)
+        node_gts, edge_gts = [], []
+        for gt in gts:
+            node_gts.append(gt[:, 0])
+            edge_gts.append(gt[:, 1:].reshape([-1]))
+        node_gts = paddle.concat(node_gts)
+        edge_gts = paddle.concat(edge_gts)
+
+        node_valids = paddle.nonzero(node_gts != self.ignore).reshape([-1])
+        edge_valids = paddle.nonzero(edge_gts != -1).reshape([-1])
+        loss_node = self.loss_node(node_preds, node_gts)
+        loss_edge = self.loss_edge(edge_preds, edge_gts)
+        loss = self.node_weight * loss_node + self.edge_weight * loss_edge
+        return dict(
+            loss=loss,
+            loss_node=loss_node,
+            loss_edge=loss_edge,
+            acc_node=self.accuracy(
+                paddle.gather(node_preds, node_valids),
+                paddle.gather(node_gts, node_valids)),
+            acc_edge=self.accuracy(
+                paddle.gather(edge_preds, edge_valids),
+                paddle.gather(edge_gts, edge_valids)))
--- a/ppocr/losses/rec_aster_loss.py
+++ b/ppocr/losses/rec_aster_loss.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import paddle
+from paddle import nn
+
+
+class CosineEmbeddingLoss(nn.Layer):
+    def __init__(self, margin=0.):
+        super(CosineEmbeddingLoss, self).__init__()
+        self.margin = margin
+        self.epsilon = 1e-12
+
+    def forward(self, x1, x2, target):
+        similarity = paddle.sum(
+            x1 * x2, dim=-1) / (paddle.norm(
+                x1, axis=-1) * paddle.norm(
+                    x2, axis=-1) + self.epsilon)
+        one_list = paddle.full_like(target, fill_value=1)
+        out = paddle.mean(
+            paddle.where(
+                paddle.equal(target, one_list), 1. - similarity,
+                paddle.maximum(
+                    paddle.zeros_like(similarity), similarity - self.margin)))
+
+        return out
+
+
+class AsterLoss(nn.Layer):
+    def __init__(self,
+                 weight=None,
+                 size_average=True,
+                 ignore_index=-100,
+                 sequence_normalize=False,
+                 sample_normalize=True,
+                 **kwargs):
+        super(AsterLoss, self).__init__()
+        self.weight = weight
+        self.size_average = size_average
+        self.ignore_index = ignore_index
+        self.sequence_normalize = sequence_normalize
+        self.sample_normalize = sample_normalize
+        self.loss_sem = CosineEmbeddingLoss()
+        self.is_cosin_loss = True
+        self.loss_func_rec = nn.CrossEntropyLoss(weight=None, reduction='none')
+
+    def forward(self, predicts, batch):
+        targets = batch[1].astype("int64")
+        label_lengths = batch[2].astype('int64')
+        sem_target = batch[3].astype('float32')
+        embedding_vectors = predicts['embedding_vectors']
+        rec_pred = predicts['rec_pred']
+
+        if not self.is_cosin_loss:
+            sem_loss = paddle.sum(self.loss_sem(embedding_vectors, sem_target))
+        else:
+            label_target = paddle.ones([embedding_vectors.shape[0]])
+            sem_loss = paddle.sum(
+                self.loss_sem(embedding_vectors, sem_target, label_target))
+
+        # rec loss
+        batch_size, def_max_length = targets.shape[0], targets.shape[1]
+
+        mask = paddle.zeros([batch_size, def_max_length])
+        for i in range(batch_size):
+            mask[i, :label_lengths[i]] = 1
+        mask = paddle.cast(mask, "float32")
+        max_length = max(label_lengths)
+        assert max_length == rec_pred.shape[1]
+        targets = targets[:, :max_length]
+        mask = mask[:, :max_length]
+        rec_pred = paddle.reshape(rec_pred, [-1, rec_pred.shape[2]])
+        input = nn.functional.log_softmax(rec_pred, axis=1)
+        targets = paddle.reshape(targets, [-1, 1])
+        mask = paddle.reshape(mask, [-1, 1])
+        output = -paddle.index_sample(input, index=targets) * mask
+        output = paddle.sum(output)
+        if self.sequence_normalize:
+            output = output / paddle.sum(mask)
+        if self.sample_normalize:
+            output = output / batch_size
+
+        loss = output + sem_loss * 0.1
+        return {'loss': loss}