Merge pull request #1105 from dyning/dygraph

updata structure of dygraph

Merge pull request #1105 from dyning/dygraph
updata structure of dygraph
96c91907 · dyning · GitHub · 7d09cd19 · 1ae37919 · 96c91907
Unverified Commit 96c91907 authored Nov 05, 2020 by dyning Committed by GitHub Nov 05, 2020
20 changed files
--- a/ppocr/data/simple_dataset.py
+++ b/ppocr/data/simple_dataset.py
+# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import copy
+import numpy as np
+import os
+import random
+import paddle
+from paddle.io import Dataset
+import time
+
+from .imaug import transform, create_operators
+
+
+class SimpleDataSet(Dataset):
+    def __init__(self, config, mode, logger):
+        super(SimpleDataSet, self).__init__()
+
+        global_config = config['Global']
+        dataset_config = config[mode]['dataset']
+        loader_config = config[mode]['loader']
+        batch_size = loader_config['batch_size_per_card']
+
+        self.delimiter = dataset_config.get('delimiter', '\t')
+        label_file_list = dataset_config.pop('label_file_list')
+        data_source_num = len(label_file_list)
+        if data_source_num == 1:
+            ratio_list = [1.0]
+        else:
+            ratio_list = dataset_config.pop('ratio_list')
+
+        assert sum(ratio_list) == 1, "The sum of the ratio_list should be 1."
+        assert len(
+            ratio_list
+        ) == data_source_num, "The length of ratio_list should be the same as the file_list."
+        self.data_dir = dataset_config['data_dir']
+        self.do_shuffle = loader_config['shuffle']
+
+        logger.info("Initialize indexs of datasets:%s" % label_file_list)
+        self.data_lines_list, data_num_list = self.get_image_info_list(
+            label_file_list)
+        self.data_idx_order_list = self.dataset_traversal(
+            data_num_list, ratio_list, batch_size)
+        self.shuffle_data_random()
+
+        self.ops = create_operators(dataset_config['transforms'], global_config)
+
+    def get_image_info_list(self, file_list):
+        if isinstance(file_list, str):
+            file_list = [file_list]
+        data_lines_list = []
+        data_num_list = []
+        for file in file_list:
+            with open(file, "rb") as f:
+                lines = f.readlines()
+                data_lines_list.append(lines)
+                data_num_list.append(len(lines))
+        return data_lines_list, data_num_list
+
+    def dataset_traversal(self, data_num_list, ratio_list, batch_size):
+        select_num_list = []
+        dataset_num = len(data_num_list)
+        for dno in range(dataset_num):
+            select_num = round(batch_size * ratio_list[dno])
+            select_num = max(select_num, 1)
+            select_num_list.append(select_num)
+        data_idx_order_list = []
+        cur_index_sets = [0] * dataset_num
+        while True:
+            finish_read_num = 0
+            for dataset_idx in range(dataset_num):
+                cur_index = cur_index_sets[dataset_idx]
+                if cur_index >= data_num_list[dataset_idx]:
+                    finish_read_num += 1
+                else:
+                    select_num = select_num_list[dataset_idx]
+                    for sno in range(select_num):
+                        cur_index = cur_index_sets[dataset_idx]
+                        if cur_index >= data_num_list[dataset_idx]:
+                            break
+                        data_idx_order_list.append((dataset_idx, cur_index))
+                        cur_index_sets[dataset_idx] += 1
+            if finish_read_num == dataset_num:
+                break
+        return data_idx_order_list
+
+    def shuffle_data_random(self):
+        if self.do_shuffle:
+            for dno in range(len(self.data_lines_list)):
+                random.shuffle(self.data_lines_list[dno])
+        return
+
+    def __getitem__(self, idx):
+        dataset_idx, file_idx = self.data_idx_order_list[idx]
+        data_line = self.data_lines_list[dataset_idx][file_idx]
+        data_line = data_line.decode('utf-8')
+        substr = data_line.strip("\n").split(self.delimiter)
+        file_name = substr[0]
+        label = substr[1]
+        img_path = os.path.join(self.data_dir, file_name)
+        data = {'img_path': img_path, 'label': label}
+        with open(data['img_path'], 'rb') as f:
+            img = f.read()
+            data['image'] = img
+        outs = transform(data, self.ops)
+        if outs is None:
+            return self.__getitem__(np.random.randint(self.__len__()))
+        return outs
+
+    def __len__(self):
+        return len(self.data_idx_order_list)
--- a/ppocr/modeling/losses/__init__.py
+++ b/ppocr/modeling/losses/__init__.py
--- a/ppocr/modeling/losses/det_basic_loss.py
+++ b/ppocr/modeling/losses/det_basic_loss.py
--- a/ppocr/modeling/losses/det_db_loss.py
+++ b/ppocr/modeling/losses/det_db_loss.py
--- a/ppocr/modeling/losses/rec_ctc_loss.py
+++ b/ppocr/modeling/losses/rec_ctc_loss.py
--- a/ppocr/modeling/__init__.py
+++ b/ppocr/modeling/__init__.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-from .losses import build_loss
-
-__all__ = ['build_model', 'build_loss']
-
-
-def build_model(config):
-    from .architectures import Model
-
-    config = copy.deepcopy(config)
-    module_class = Model(config)
-    return module_class
--- a/ppocr/modeling/architectures/__init__.py
+++ b/ppocr/modeling/architectures/__init__.py
@@ -12,5 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from .model import Model
-__all__ = ['Model']
\ No newline at end of file
+import copy
+
+__all__ = ['build_model']
+
+def build_model(config):
+    from .base_model import BaseModel
+    
+    config = copy.deepcopy(config)
+    module_class = BaseModel(config)
+    return module_class
\ No newline at end of file
--- a/ppocr/modeling/architectures/model.py
+++ b/ppocr/modeling/architectures/model.py
@@ -15,34 +15,25 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import os, sys
-
-__dir__ = os.path.dirname(os.path.abspath(__file__))
-sys.path.append(__dir__)
-sys.path.append('/home/zhoujun20/PaddleOCR')
-
 from paddle import nn
-from ppocr.modeling.transform import build_transform
+
 from ppocr.modeling.backbones import build_backbone
 from ppocr.modeling.necks import build_neck
 from ppocr.modeling.heads import build_head

-__all__ = ['Model']
+__all__ = ['BaseModel']

-
-class Model(nn.Layer):
+class BaseModel(nn.Layer):
    def __init__(self, config):
        """
-        Detection module for OCR.
+        the module for OCR.
        args:
            config (dict): the super parameters for module.
        """
-        super(Model, self).__init__()
-        algorithm = config['algorithm']
-        self.type = config['type']
-        self.model_name = '{}_{}'.format(self.type, algorithm)
+        super(BaseModel, self).__init__()
        
        in_channels = config.get('in_channels', 3)
+        model_type = config['model_type']
        # build transfrom,
        # for rec, transfrom can be TPS,None
        # for det and cls, transfrom shoule to be None,
@@ -57,7 +48,7 @@ class Model(nn.Layer):

        # build backbone, backbone is need for del, rec and cls
        config["Backbone"]['in_channels'] = in_channels
-        self.backbone = build_backbone(config["Backbone"], self.type)
+        self.backbone = build_backbone(config["Backbone"], model_type)
        in_channels = self.backbone.out_channels
        
        # build neck
@@ -71,6 +62,7 @@ class Model(nn.Layer):
            config['Neck']['in_channels'] = in_channels
            self.neck = build_neck(config['Neck'])
            in_channels = self.neck.out_channels
+        
        # # build head, head is need for det, rec and cls
        config["Head"]['in_channels'] = in_channels
        self.head = build_head(config["Head"])

--- a/ppocr/modeling/backbones/__init__.py
+++ b/ppocr/modeling/backbones/__init__.py
@@ -19,7 +19,6 @@ def build_backbone(config, model_type):
    if model_type == 'det':
        from .det_mobilenet_v3 import MobileNetV3
        from .det_resnet_vd import ResNet
-
        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
    elif model_type == 'rec':
        from .rec_mobilenet_v3 import MobileNetV3

--- a/ppocr/modeling/backbones/det_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/det_mobilenet_v3.py
@@ -130,7 +130,6 @@ class MobileNetV3(nn.Layer):
                if_act=True,
                act='hard_swish',
                name='conv_last'))
-
        self.stages.append(nn.Sequential(*block_list))
        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
        for i, stage in enumerate(self.stages):
@@ -159,7 +158,7 @@ class ConvBNLayer(nn.Layer):
        super(ConvBNLayer, self).__init__()
        self.if_act = if_act
        self.act = act
-        self.conv = nn.Conv2d(
+        self.conv = nn.Conv2D(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
@@ -184,7 +183,7 @@ class ConvBNLayer(nn.Layer):
            if self.act == "relu":
                x = F.relu(x)
            elif self.act == "hard_swish":
-                x = F.hard_swish(x)
+                x = F.activation.hard_swish(x)
            else:
                print("The activation function is selected incorrectly.")
                exit()
@@ -243,16 +242,15 @@ class ResidualUnit(nn.Layer):
            x = self.mid_se(x)
        x = self.linear_conv(x)
        if self.if_shortcut:
-            x = paddle.elementwise_add(inputs, x)
+            x = paddle.add(inputs, x)
        return x


 class SEModule(nn.Layer):
    def __init__(self, in_channels, reduction=4, name=""):
        super(SEModule, self).__init__()
-        self.avg_pool = nn.Pool2D(
-            pool_type="avg", global_pooling=True, use_cudnn=False)
-        self.conv1 = nn.Conv2d(
+        self.avg_pool = nn.AdaptiveAvgPool2D(1)
+        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels // reduction,
            kernel_size=1,
@@ -260,7 +258,7 @@ class SEModule(nn.Layer):
            padding=0,
            weight_attr=ParamAttr(name=name + "_1_weights"),
            bias_attr=ParamAttr(name=name + "_1_offset"))
-        self.conv2 = nn.Conv2d(
+        self.conv2 = nn.Conv2D(
            in_channels=in_channels // reduction,
            out_channels=in_channels,
            kernel_size=1,
@@ -274,5 +272,5 @@ class SEModule(nn.Layer):
        outputs = self.conv1(outputs)
        outputs = F.relu(outputs)
        outputs = self.conv2(outputs)
-        outputs = F.hard_sigmoid(outputs)
+        outputs = F.activation.hard_sigmoid(outputs)
        return inputs * outputs
\ No newline at end of file
--- a/ppocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/ppocr/modeling/backbones/rec_mobilenet_v3.py
@@ -127,7 +127,7 @@ class MobileNetV3(nn.Layer):
            act='hard_swish',
            name='conv_last')

-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
+        self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
        self.out_channels = make_divisible(scale * cls_ch_squeeze)

    def forward(self, x):

--- a/ppocr/modeling/heads/__init__.py
+++ b/ppocr/modeling/heads/__init__.py
@@ -20,8 +20,8 @@ def build_head(config):
    from .det_db_head import DBHead

    # rec head
-    from .rec_ctc_head import CTC
-    support_dict = ['DBHead', 'CTC']
+    from .rec_ctc_head import CTCHead
+    support_dict = ['DBHead', 'CTCHead']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('head only support {}'.format(

--- a/ppocr/modeling/heads/det_db_head.py
+++ b/ppocr/modeling/heads/det_db_head.py
@@ -33,7 +33,7 @@ def get_bias_attr(k, name):
 class Head(nn.Layer):
    def __init__(self, in_channels, name_list):
        super(Head, self).__init__()
-        self.conv1 = nn.Conv2d(
+        self.conv1 = nn.Conv2D(
            in_channels=in_channels,
            out_channels=in_channels // 4,
            kernel_size=3,
@@ -51,14 +51,14 @@ class Head(nn.Layer):
            moving_mean_name=name_list[1] + '.w_1',
            moving_variance_name=name_list[1] + '.w_2',
            act='relu')
-        self.conv2 = nn.ConvTranspose2d(
+        self.conv2 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=in_channels // 4,
            kernel_size=2,
            stride=2,
            weight_attr=ParamAttr(
                name=name_list[2] + '.w_0',
-                initializer=paddle.nn.initializer.MSRA(uniform=False)),
+                initializer=paddle.nn.initializer.KaimingNormal()),
            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv2"))
        self.conv_bn2 = nn.BatchNorm(
            num_channels=in_channels // 4,
@@ -71,14 +71,14 @@ class Head(nn.Layer):
            moving_mean_name=name_list[3] + '.w_1',
            moving_variance_name=name_list[3] + '.w_2',
            act="relu")
-        self.conv3 = nn.ConvTranspose2d(
+        self.conv3 = nn.Conv2DTranspose(
            in_channels=in_channels // 4,
            out_channels=1,
            kernel_size=2,
            stride=2,
            weight_attr=ParamAttr(
                name=name_list[4] + '.w_0',
-                initializer=paddle.nn.initializer.MSRA(uniform=False)),
+                initializer=paddle.nn.initializer.KaimingNormal()),
            bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv3"),
        )


--- a/ppocr/modeling/heads/rec_ctc_head.py
+++ b/ppocr/modeling/heads/rec_ctc_head.py
@@ -33,10 +33,9 @@ def get_para_bias_attr(l2_decay, k, name):
        regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
    return [weight_attr, bias_attr]

-
-class CTC(nn.Layer):
-    def __init__(self, in_channels, out_channels, fc_decay=1e-5, **kwargs):
-        super(CTC, self).__init__()
+class CTCHead(nn.Layer):
+    def __init__(self, in_channels, out_channels, fc_decay=0.0004, **kwargs):
+        super(CTCHead, self).__init__()
        weight_attr, bias_attr = get_para_bias_attr(
            l2_decay=fc_decay, k=in_channels, name='ctc_fc')
        self.fc = nn.Linear(

--- a/ppocr/modeling/necks/__init__.py
+++ b/ppocr/modeling/necks/__init__.py
@@ -14,11 +14,10 @@

 __all__ = ['build_neck']

-
 def build_neck(config):
-    from .fpn import FPN
+    from .db_fpn import DBFPN
    from .rnn import SequenceEncoder
-    support_dict = ['FPN', 'SequenceEncoder']
+    support_dict = ['DBFPN', 'SequenceEncoder']

    module_name = config.pop('name')
    assert module_name in support_dict, Exception('neck only support {}'.format(

--- a/ppocr/modeling/necks/fpn.py
+++ b/ppocr/modeling/necks/fpn.py
@@ -22,41 +22,41 @@ import paddle.nn.functional as F
 from paddle import ParamAttr


-class FPN(nn.Layer):
+class DBFPN(nn.Layer):
    def __init__(self, in_channels, out_channels, **kwargs):
-        super(FPN, self).__init__()
+        super(DBFPN, self).__init__()
        self.out_channels = out_channels
-        weight_attr = paddle.nn.initializer.MSRA(uniform=False)
+        weight_attr = paddle.nn.initializer.KaimingNormal()

-        self.in2_conv = nn.Conv2d(
+        self.in2_conv = nn.Conv2D(
            in_channels=in_channels[0],
            out_channels=self.out_channels,
            kernel_size=1,
            weight_attr=ParamAttr(
                name='conv2d_51.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.in3_conv = nn.Conv2d(
+        self.in3_conv = nn.Conv2D(
            in_channels=in_channels[1],
            out_channels=self.out_channels,
            kernel_size=1,
            weight_attr=ParamAttr(
                name='conv2d_50.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.in4_conv = nn.Conv2d(
+        self.in4_conv = nn.Conv2D(
            in_channels=in_channels[2],
            out_channels=self.out_channels,
            kernel_size=1,
            weight_attr=ParamAttr(
                name='conv2d_49.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.in5_conv = nn.Conv2d(
+        self.in5_conv = nn.Conv2D(
            in_channels=in_channels[3],
            out_channels=self.out_channels,
            kernel_size=1,
            weight_attr=ParamAttr(
                name='conv2d_48.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.p5_conv = nn.Conv2d(
+        self.p5_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
@@ -64,7 +64,7 @@ class FPN(nn.Layer):
            weight_attr=ParamAttr(
                name='conv2d_52.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.p4_conv = nn.Conv2d(
+        self.p4_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
@@ -72,7 +72,7 @@ class FPN(nn.Layer):
            weight_attr=ParamAttr(
                name='conv2d_53.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.p3_conv = nn.Conv2d(
+        self.p3_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
@@ -80,7 +80,7 @@ class FPN(nn.Layer):
            weight_attr=ParamAttr(
                name='conv2d_54.w_0', initializer=weight_attr),
            bias_attr=False)
-        self.p2_conv = nn.Conv2d(
+        self.p2_conv = nn.Conv2D(
            in_channels=self.out_channels,
            out_channels=self.out_channels // 4,
            kernel_size=3,
@@ -97,17 +97,17 @@ class FPN(nn.Layer):
        in3 = self.in3_conv(c3)
        in2 = self.in2_conv(c2)

-        out4 = in4 + F.resize_nearest(in5, scale=2)  # 1/16
-        out3 = in3 + F.resize_nearest(out4, scale=2)  # 1/8
-        out2 = in2 + F.resize_nearest(out3, scale=2)  # 1/4
+        out4 = in4 + F.upsample(in5, scale_factor=2, mode="nearest")  # 1/16
+        out3 = in3 + F.upsample(out4, scale_factor=2, mode="nearest")  # 1/8
+        out2 = in2 + F.upsample(out3, scale_factor=2, mode="nearest")  # 1/4

        p5 = self.p5_conv(in5)
        p4 = self.p4_conv(out4)
        p3 = self.p3_conv(out3)
        p2 = self.p2_conv(out2)
-        p5 = F.resize_nearest(p5, scale=8)
-        p4 = F.resize_nearest(p4, scale=4)
-        p3 = F.resize_nearest(p3, scale=2)
+        p5 = F.upsample(p5, scale_factor=8, mode="nearest")
+        p4 = F.upsample(p4, scale_factor=4, mode="nearest")
+        p3 = F.upsample(p3, scale_factor=2, mode="nearest")

        fuse = paddle.concat([p5, p4, p3, p2], axis=1)
        return fuse
--- a/ppocr/modeling/necks/rnn.py
+++ b/ppocr/modeling/necks/rnn.py
@@ -76,8 +76,7 @@ class SequenceEncoder(nn.Layer):
                'fc': EncoderWithFC,
                'rnn': EncoderWithRNN
            }
-            assert encoder_type in support_encoder_dict, '{} must in {}'.format(
-                encoder_type, support_encoder_dict.keys())
+            assert encoder_type in support_encoder_dict, '{} must in {}'.format(encoder_type, support_encoder_dict.keys())

            self.encoder = support_encoder_dict[encoder_type](
                self.encoder_reshape.out_channels, hidden_size)

--- a/ppocr/optimizer/__init__.py
+++ b/ppocr/optimizer/__init__.py
@@ -51,6 +51,6 @@ def build_optimizer(config, epochs, step_each_epoch, parameters):
    # step3 build optimizer
    optim_name = config.pop('name')
    optim = getattr(optimizer, optim_name)(learning_rate=lr,
-                                           regularization=reg,
+                                           weight_decay=reg,
                                           **config)
    return optim(parameters), lr
--- a/ppocr/optimizer/learning_rate.py
+++ b/ppocr/optimizer/learning_rate.py
@@ -17,7 +17,7 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

-from paddle.optimizer import lr_scheduler
+from paddle.optimizer import lr as lr_scheduler


 class Linear(object):

--- a/ppocr/optimizer/optimizer.py
+++ b/ppocr/optimizer/optimizer.py
@@ -40,8 +40,8 @@ class Momentum(object):
        opt = optim.Momentum(
            learning_rate=self.learning_rate,
            momentum=self.momentum,
-            parameters=self.weight_decay,
-            weight_decay=parameters)
+            parameters=parameters,
+            weight_decay=self.weight_decay)
        return opt