Merge branch 'dygraph' of https://github.com/PaddlePaddle/PaddleOCR into test_v10

5ec23a47 · LDOUBLEV · 6d1085a4 · 790b5b0b · 5ec23a47 · 5ec23a47
Commit 5ec23a47 authored Nov 08, 2021 by LDOUBLEV
20 changed files
--- a/deploy/pdserving/README.md
+++ b/deploy/pdserving/README.md
@@ -114,7 +114,7 @@ The recognition model is the same.
    git clone https://github.com/PaddlePaddle/PaddleOCR
    # Enter the working directory  
-    cd PaddleOCR/deploy/pdserver/
+    cd PaddleOCR/deploy/pdserving/
    ```
    The pdserver directory contains the code to start the pipeline service and send prediction requests, including:

--- a/deploy/pdserving/README_CN.md
+++ b/deploy/pdserving/README_CN.md
@@ -112,7 +112,7 @@ python3 -m paddle_serving_client.convert --dirname ./ch_ppocr_mobile_v2.0_rec_in
    git clone https://github.com/PaddlePaddle/PaddleOCR
    # 进入到工作目录
-    cd PaddleOCR/deploy/pdserver/
+    cd PaddleOCR/deploy/pdserving/
    ```
    pdserver目录包含启动pipeline服务和发送预测请求的代码，包括：
    ```

--- a/ppocr/data/imaug/iaa_augment.py
+++ b/ppocr/data/imaug/iaa_augment.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/iaa_augment.py
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppocr/data/imaug/make_border_map.py
+++ b/ppocr/data/imaug/make_border_map.py
-# -*- coding:utf-8 -*- 
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_border_map.py
+"""
 from __future__ import absolute_import
 from __future__ import division

--- a/ppocr/data/imaug/make_pse_gt.py
+++ b/ppocr/data/imaug/make_pse_gt.py
-# -*- coding:utf-8 -*- 
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 from __future__ import absolute_import
 from __future__ import division
@@ -12,12 +24,8 @@ from shapely.geometry import Polygon
 __all__ = ['MakePseGt']
-class MakePseGt(object):
-    r'''
-    Making binary mask from detection data with ICDAR format.
-    Typically following the process of class `MakeICDARData`.
-    '''
+class MakePseGt(object):
    def __init__(self, kernel_num=7, size=640, min_shrink_ratio=0.4, **kwargs):
        self.kernel_num = kernel_num
        self.min_shrink_ratio = min_shrink_ratio
@@ -38,16 +46,20 @@ class MakePseGt(object):
            text_polys *= scale
        gt_kernels = []
-        for i in range(1,self.kernel_num+1):
+        for i in range(1, self.kernel_num + 1):
            # s1->sn, from big to small
-            rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1) * i
+            rate = 1.0 - (1.0 - self.min_shrink_ratio) / (self.kernel_num - 1
-            text_kernel, ignore_tags = self.generate_kernel(image.shape[0:2], rate, text_polys, ignore_tags)
+                                                          ) * i
+            text_kernel, ignore_tags = self.generate_kernel(
+                image.shape[0:2], rate, text_polys, ignore_tags)
            gt_kernels.append(text_kernel)
        training_mask = np.ones(image.shape[0:2], dtype='uint8')
        for i in range(text_polys.shape[0]):
            if ignore_tags[i]:
-                cv2.fillPoly(training_mask, text_polys[i].astype(np.int32)[np.newaxis, :, :], 0)
+                cv2.fillPoly(training_mask,
+                             text_polys[i].astype(np.int32)[np.newaxis, :, :],
+                             0)
        gt_kernels = np.array(gt_kernels)
        gt_kernels[gt_kernels > 0] = 1
@@ -59,16 +71,25 @@ class MakePseGt(object):
        data['mask'] = training_mask.astype('float32')
        return data
-    def generate_kernel(self, img_size, shrink_ratio, text_polys, ignore_tags=None):
+    def generate_kernel(self,
+                        img_size,
+                        shrink_ratio,
+                        text_polys,
+                        ignore_tags=None):
+        """
+        Refer to part of the code:
+        https://github.com/open-mmlab/mmocr/blob/main/mmocr/datasets/pipelines/textdet_targets/base_textdet_targets.py
+        """
        h, w = img_size
        text_kernel = np.zeros((h, w), dtype=np.float32)
        for i, poly in enumerate(text_polys):
            polygon = Polygon(poly)
-            distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (polygon.length + 1e-6)
+            distance = polygon.area * (1 - shrink_ratio * shrink_ratio) / (
+                polygon.length + 1e-6)
            subject = [tuple(l) for l in poly]
            pco = pyclipper.PyclipperOffset()
-            pco.AddPath(subject, pyclipper.JT_ROUND,
+            pco.AddPath(subject, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-                        pyclipper.ET_CLOSEDPOLYGON)
            shrinked = np.array(pco.Execute(-distance))
            if len(shrinked) == 0 or shrinked.size == 0:

--- a/ppocr/data/imaug/make_shrink_map.py
+++ b/ppocr/data/imaug/make_shrink_map.py
-# -*- coding:utf-8 -*- 
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/make_shrink_map.py
+"""
 from __future__ import absolute_import
 from __future__ import division

--- a/ppocr/data/imaug/random_crop_data.py
+++ b/ppocr/data/imaug/random_crop_data.py
-# -*- coding:utf-8 -*- 
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/data_loader/modules/random_crop_data.py
+"""
 from __future__ import absolute_import
 from __future__ import division

--- a/ppocr/data/imaug/text_image_aug/augment.py
+++ b/ppocr/data/imaug/text_image_aug/augment.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/augment.py
+"""
 import numpy as np
 from .warp_mls import WarpMLS

--- a/ppocr/data/imaug/text_image_aug/warp_mls.py
+++ b/ppocr/data/imaug/text_image_aug/warp_mls.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/RubanSeven/Text-Image-Augmentation-python/blob/master/warp_mls.py
+"""
 import numpy as np

--- a/ppocr/losses/ace_loss.py
+++ b/ppocr/losses/ace_loss.py
@@ -11,6 +11,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# This code is refer from: https://github.com/viig99/LS-ACELoss
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppocr/losses/center_loss.py
+++ b/ppocr/losses/center_loss.py
@@ -12,6 +12,8 @@
 #See the License for the specific language governing permissions and
 #limitations under the License.
+# This code is refer from: https://github.com/KaiyangZhou/pytorch-center-loss
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -27,6 +29,7 @@ class CenterLoss(nn.Layer):
    """
    Reference: Wen et al. A Discriminative Feature Learning Approach for Deep Face Recognition. ECCV 2016.
    """
    def __init__(self,
                 num_classes=6625,
                 feat_dim=96,

--- a/ppocr/losses/det_basic_loss.py
+++ b/ppocr/losses/det_basic_loss.py
@@ -11,7 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/basic_loss.py
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppocr/losses/det_db_loss.py
+++ b/ppocr/losses/det_db_loss.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/WenmuZhou/DBNet.pytorch/blob/master/models/losses/DB_loss.py
+"""
 from __future__ import absolute_import
 from __future__ import division

--- a/ppocr/losses/det_pse_loss.py
+++ b/ppocr/losses/det_pse_loss.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
+"""
 import paddle
 from paddle import nn

--- a/ppocr/modeling/backbones/rec_mv1_enhance.py
+++ b/ppocr/modeling/backbones/rec_mv1_enhance.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# This code is refer from: https://github.com/PaddlePaddle/PaddleClas/blob/develop/ppcls/arch/backbone/legendary_models/pp_lcnet.py
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppocr/modeling/backbones/rec_resnet_31.py
+++ b/ppocr/modeling/backbones/rec_resnet_31.py
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+This code is refer from: 
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
+https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
@@ -18,12 +37,12 @@ def conv3x3(in_channel, out_channel, stride=1):
        kernel_size=3,
        stride=stride,
        padding=1,
-        bias_attr=False
+        bias_attr=False)
-    )
 class BasicBlock(nn.Layer):
    expansion = 1
    def __init__(self, in_channels, channels, stride=1, downsample=False):
        super().__init__()
        self.conv1 = conv3x3(in_channels, channels, stride)
@@ -34,9 +53,13 @@ class BasicBlock(nn.Layer):
        self.downsample = downsample
        if downsample:
            self.downsample = nn.Sequential(
-                nn.Conv2D(in_channels, channels * self.expansion, 1, stride, bias_attr=False),
+                nn.Conv2D(
-                nn.BatchNorm2D(channels * self.expansion),
+                    in_channels,
-            )
+                    channels * self.expansion,
+                    1,
+                    stride,
+                    bias_attr=False),
+                nn.BatchNorm2D(channels * self.expansion), )
        else:
            self.downsample = nn.Sequential()
        self.stride = stride
@@ -69,6 +92,7 @@ class ResNet31(nn.Layer):
        out_indices (None | Sequence[int]): Indices of output stages.
        last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
    '''
    def __init__(self,
                 in_channels=3,
                 layers=[1, 2, 5, 3],
@@ -83,41 +107,51 @@ class ResNet31(nn.Layer):
        self.last_stage_pool = last_stage_pool
        # conv 1 (Conv Conv)
-        self.conv1_1 = nn.Conv2D(in_channels, channels[0], kernel_size=3, stride=1, padding=1)
+        self.conv1_1 = nn.Conv2D(
+            in_channels, channels[0], kernel_size=3, stride=1, padding=1)
        self.bn1_1 = nn.BatchNorm2D(channels[0])
        self.relu1_1 = nn.ReLU()
-        self.conv1_2 = nn.Conv2D(channels[0], channels[1], kernel_size=3, stride=1, padding=1)
+        self.conv1_2 = nn.Conv2D(
+            channels[0], channels[1], kernel_size=3, stride=1, padding=1)
        self.bn1_2 = nn.BatchNorm2D(channels[1])
        self.relu1_2 = nn.ReLU()
        # conv 2 (Max-pooling, Residual block, Conv)
-        self.pool2 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self.pool2 = nn.MaxPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
        self.block2 = self._make_layer(channels[1], channels[2], layers[0])
-        self.conv2 = nn.Conv2D(channels[2], channels[2], kernel_size=3, stride=1, padding=1)
+        self.conv2 = nn.Conv2D(
+            channels[2], channels[2], kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2D(channels[2])
        self.relu2 = nn.ReLU()
        # conv 3 (Max-pooling, Residual block, Conv)
-        self.pool3 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
+        self.pool3 = nn.MaxPool2D(
+            kernel_size=2, stride=2, padding=0, ceil_mode=True)
        self.block3 = self._make_layer(channels[2], channels[3], layers[1])
-        self.conv3 = nn.Conv2D(channels[3], channels[3], kernel_size=3, stride=1, padding=1)
+        self.conv3 = nn.Conv2D(
+            channels[3], channels[3], kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2D(channels[3])
        self.relu3 = nn.ReLU()
        # conv 4 (Max-pooling, Residual block, Conv)
-        self.pool4 = nn.MaxPool2D(kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True)
+        self.pool4 = nn.MaxPool2D(
+            kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True)
        self.block4 = self._make_layer(channels[3], channels[4], layers[2])
-        self.conv4 = nn.Conv2D(channels[4], channels[4], kernel_size=3, stride=1, padding=1)
+        self.conv4 = nn.Conv2D(
+            channels[4], channels[4], kernel_size=3, stride=1, padding=1)
        self.bn4 = nn.BatchNorm2D(channels[4])
        self.relu4 = nn.ReLU()
        # conv 5 ((Max-pooling), Residual block, Conv)
        self.pool5 = None
        if self.last_stage_pool:
-            self.pool5 = nn.MaxPool2D(kernel_size=2, stride=2, padding=0, ceil_mode=True)
+            self.pool5 = nn.MaxPool2D(
+                kernel_size=2, stride=2, padding=0, ceil_mode=True)
        self.block5 = self._make_layer(channels[4], channels[5], layers[3])
-        self.conv5 = nn.Conv2D(channels[5], channels[5], kernel_size=3, stride=1, padding=1)
+        self.conv5 = nn.Conv2D(
+            channels[5], channels[5], kernel_size=3, stride=1, padding=1)
        self.bn5 = nn.BatchNorm2D(channels[5])
        self.relu5 = nn.ReLU()
@@ -135,14 +169,14 @@ class ResNet31(nn.Layer):
                        kernel_size=1,
                        stride=1,
                        bias_attr=False),
-                    nn.BatchNorm2D(output_channels),
+                    nn.BatchNorm2D(output_channels), )
-                )
-            layers.append(BasicBlock(input_channels, output_channels, downsample=downsample))
+            layers.append(
+                BasicBlock(
+                    input_channels, output_channels, downsample=downsample))
            input_channels = output_channels
        return nn.Sequential(*layers)
    def forward(self, x):
        x = self.conv1_1(x)
        x = self.bn1_1(x)
@@ -166,7 +200,7 @@ class ResNet31(nn.Layer):
            x = block_layer(x)
            x = conv_layer(x)
            x = bn_layer(x)
-            x= relu_layer(x)
+            x = relu_layer(x)
            outs.append(x)

--- a/ppocr/modeling/backbones/rec_resnet_aster.py
+++ b/ppocr/modeling/backbones/rec_resnet_aster.py
@@ -11,7 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/resnet_aster.py
+"""
 import paddle
 import paddle.nn as nn

--- a/ppocr/modeling/heads/det_pse_head.py
+++ b/ppocr/modeling/heads/det_pse_head.py
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -11,22 +11,24 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/whai362/PSENet/blob/python3/models/head/psenet_head.py
+"""
 from paddle import nn
 class PSEHead(nn.Layer):
-    def __init__(self,
+    def __init__(self, in_channels, hidden_dim=256, out_channels=7, **kwargs):
-                 in_channels,
-                 hidden_dim=256,
-                 out_channels=7,
-                 **kwargs):
        super(PSEHead, self).__init__()
-        self.conv1 = nn.Conv2D(in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)
+        self.conv1 = nn.Conv2D(
+            in_channels, hidden_dim, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2D(hidden_dim)
        self.relu1 = nn.ReLU()
-        self.conv2 = nn.Conv2D(hidden_dim, out_channels, kernel_size=1, stride=1, padding=0)
+        self.conv2 = nn.Conv2D(
+            hidden_dim, out_channels, kernel_size=1, stride=1, padding=0)
    def forward(self, x, **kwargs):
        out = self.conv1(x)

--- a/ppocr/modeling/heads/rec_aster_head.py
+++ b/ppocr/modeling/heads/rec_aster_head.py
@@ -11,6 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+"""
+This code is refer from:
+https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/attention_recognition_head.py
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

--- a/ppocr/modeling/heads/rec_att_head.py
+++ b/ppocr/modeling/heads/rec_att_head.py
@@ -75,7 +75,7 @@ class AttentionHead(nn.Layer):
                            probs_step, axis=1)], axis=1)
                next_input = probs_step.argmax(axis=1)
                targets = next_input
+            probs = paddle.nn.functional.softmax(probs, axis=2)
        return probs