det_pse_loss.py 5.57 KB
Newer Older
WenmuZhou's avatar
WenmuZhou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
WenmuZhou's avatar
WenmuZhou committed
14
15
16
17
"""
This code is refer from:
https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
"""
WenmuZhou's avatar
WenmuZhou committed
18

WenmuZhou's avatar
WenmuZhou committed
19
20
21
22
23
24
25
26
import paddle
from paddle import nn
from paddle.nn import functional as F
import numpy as np
from ppocr.utils.iou import iou


class PSELoss(nn.Layer):
WenmuZhou's avatar
WenmuZhou committed
27
28
29
30
31
    def __init__(self,
                 alpha,
                 ohem_ratio=3,
                 kernel_sample_mask='pred',
                 reduction='sum',
WenmuZhou's avatar
WenmuZhou committed
32
33
                 eps=1e-6,
                 **kwargs):
WenmuZhou's avatar
WenmuZhou committed
34
35
36
37
38
39
40
41
        """Implement PSE Loss.
        """
        super(PSELoss, self).__init__()
        assert reduction in ['sum', 'mean', 'none']
        self.alpha = alpha
        self.ohem_ratio = ohem_ratio
        self.kernel_sample_mask = kernel_sample_mask
        self.reduction = reduction
WenmuZhou's avatar
WenmuZhou committed
42
        self.eps = eps
WenmuZhou's avatar
WenmuZhou committed
43
44
45
46
47
48
49
50
51
52
53
54
55

    def forward(self, outputs, labels):
        predicts = outputs['maps']
        predicts = F.interpolate(predicts, scale_factor=4)

        texts = predicts[:, 0, :, :]
        kernels = predicts[:, 1:, :, :]
        gt_texts, gt_kernels, training_masks = labels[1:]

        # text loss
        selected_masks = self.ohem_batch(texts, gt_texts, training_masks)

        loss_text = self.dice_loss(texts, gt_texts, selected_masks)
WenmuZhou's avatar
WenmuZhou committed
56
57
58
59
60
        iou_text = iou((texts > 0).astype('int64'),
                       gt_texts,
                       training_masks,
                       reduce=False)
        losses = dict(loss_text=loss_text, iou_text=iou_text)
WenmuZhou's avatar
WenmuZhou committed
61
62
63
64
65
66

        # kernel loss
        loss_kernels = []
        if self.kernel_sample_mask == 'gt':
            selected_masks = gt_texts * training_masks
        elif self.kernel_sample_mask == 'pred':
WenmuZhou's avatar
WenmuZhou committed
67
68
            selected_masks = (
                F.sigmoid(texts) > 0.5).astype('float32') * training_masks
WenmuZhou's avatar
WenmuZhou committed
69
70
71
72

        for i in range(kernels.shape[1]):
            kernel_i = kernels[:, i, :, :]
            gt_kernel_i = gt_kernels[:, i, :, :]
WenmuZhou's avatar
WenmuZhou committed
73
74
            loss_kernel_i = self.dice_loss(kernel_i, gt_kernel_i,
                                           selected_masks)
WenmuZhou's avatar
WenmuZhou committed
75
76
            loss_kernels.append(loss_kernel_i)
        loss_kernels = paddle.mean(paddle.stack(loss_kernels, axis=1), axis=1)
WenmuZhou's avatar
WenmuZhou committed
77
78
79
80
81
        iou_kernel = iou((kernels[:, -1, :, :] > 0).astype('int64'),
                         gt_kernels[:, -1, :, :],
                         training_masks * gt_texts,
                         reduce=False)
        losses.update(dict(loss_kernels=loss_kernels, iou_kernel=iou_kernel))
WenmuZhou's avatar
WenmuZhou committed
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
        loss = self.alpha * loss_text + (1 - self.alpha) * loss_kernels
        losses['loss'] = loss
        if self.reduction == 'sum':
            losses = {x: paddle.sum(v) for x, v in losses.items()}
        elif self.reduction == 'mean':
            losses = {x: paddle.mean(v) for x, v in losses.items()}
        return losses

    def dice_loss(self, input, target, mask):
        input = F.sigmoid(input)

        input = input.reshape([input.shape[0], -1])
        target = target.reshape([target.shape[0], -1])
        mask = mask.reshape([mask.shape[0], -1])

        input = input * mask
        target = target * mask

        a = paddle.sum(input * target, 1)
WenmuZhou's avatar
WenmuZhou committed
101
102
        b = paddle.sum(input * input, 1) + self.eps
        c = paddle.sum(target * target, 1) + self.eps
WenmuZhou's avatar
WenmuZhou committed
103
104
105
106
107
        d = (2 * a) / (b + c)
        return 1 - d

    def ohem_single(self, score, gt_text, training_mask, ohem_ratio=3):
        pos_num = int(paddle.sum((gt_text > 0.5).astype('float32'))) - int(
WenmuZhou's avatar
WenmuZhou committed
108
109
110
            paddle.sum(
                paddle.logical_and((gt_text > 0.5), (training_mask <= 0.5))
                .astype('float32')))
WenmuZhou's avatar
WenmuZhou committed
111
112
113

        if pos_num == 0:
            selected_mask = training_mask
WenmuZhou's avatar
WenmuZhou committed
114
115
116
            selected_mask = selected_mask.reshape(
                [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
                    'float32')
WenmuZhou's avatar
WenmuZhou committed
117
118
119
120
121
122
123
            return selected_mask

        neg_num = int(paddle.sum((gt_text <= 0.5).astype('float32')))
        neg_num = int(min(pos_num * ohem_ratio, neg_num))

        if neg_num == 0:
            selected_mask = training_mask
WenmuZhou's avatar
WenmuZhou committed
124
125
126
            selected_mask = selected_mask.view(
                1, selected_mask.shape[0],
                selected_mask.shape[1]).astype('float32')
WenmuZhou's avatar
WenmuZhou committed
127
128
129
130
131
132
            return selected_mask

        neg_score = paddle.masked_select(score, gt_text <= 0.5)
        neg_score_sorted = paddle.sort(-neg_score)
        threshold = -neg_score_sorted[neg_num - 1]

WenmuZhou's avatar
WenmuZhou committed
133
134
135
136
137
138
        selected_mask = paddle.logical_and(
            paddle.logical_or((score >= threshold), (gt_text > 0.5)),
            (training_mask > 0.5))
        selected_mask = selected_mask.reshape(
            [1, selected_mask.shape[0], selected_mask.shape[1]]).astype(
                'float32')
WenmuZhou's avatar
WenmuZhou committed
139
140
141
142
143
144
        return selected_mask

    def ohem_batch(self, scores, gt_texts, training_masks, ohem_ratio=3):
        selected_masks = []
        for i in range(scores.shape[0]):
            selected_masks.append(
WenmuZhou's avatar
WenmuZhou committed
145
146
                self.ohem_single(scores[i, :, :], gt_texts[i, :, :],
                                 training_masks[i, :, :], ohem_ratio))
WenmuZhou's avatar
WenmuZhou committed
147
148

        selected_masks = paddle.concat(selected_masks, 0).astype('float32')
WenmuZhou's avatar
WenmuZhou committed
149
        return selected_masks