"example/24_batched_gemm/CMakeLists.txt" did not exist on "2327f1a640c267743f119e59d759bc62a7887eae"
Unverified Commit 96c91907 authored by dyning's avatar dyning Committed by GitHub
Browse files

Merge pull request #1105 from dyning/dygraph

updata structure of dygraph
parents 7d09cd19 1ae37919
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
import numpy as np
import os
import random
import paddle
from paddle.io import Dataset
import time
from .imaug import transform, create_operators
class SimpleDataSet(Dataset):
def __init__(self, config, mode, logger):
super(SimpleDataSet, self).__init__()
global_config = config['Global']
dataset_config = config[mode]['dataset']
loader_config = config[mode]['loader']
batch_size = loader_config['batch_size_per_card']
self.delimiter = dataset_config.get('delimiter', '\t')
label_file_list = dataset_config.pop('label_file_list')
data_source_num = len(label_file_list)
if data_source_num == 1:
ratio_list = [1.0]
else:
ratio_list = dataset_config.pop('ratio_list')
assert sum(ratio_list) == 1, "The sum of the ratio_list should be 1."
assert len(
ratio_list
) == data_source_num, "The length of ratio_list should be the same as the file_list."
self.data_dir = dataset_config['data_dir']
self.do_shuffle = loader_config['shuffle']
logger.info("Initialize indexs of datasets:%s" % label_file_list)
self.data_lines_list, data_num_list = self.get_image_info_list(
label_file_list)
self.data_idx_order_list = self.dataset_traversal(
data_num_list, ratio_list, batch_size)
self.shuffle_data_random()
self.ops = create_operators(dataset_config['transforms'], global_config)
def get_image_info_list(self, file_list):
if isinstance(file_list, str):
file_list = [file_list]
data_lines_list = []
data_num_list = []
for file in file_list:
with open(file, "rb") as f:
lines = f.readlines()
data_lines_list.append(lines)
data_num_list.append(len(lines))
return data_lines_list, data_num_list
def dataset_traversal(self, data_num_list, ratio_list, batch_size):
select_num_list = []
dataset_num = len(data_num_list)
for dno in range(dataset_num):
select_num = round(batch_size * ratio_list[dno])
select_num = max(select_num, 1)
select_num_list.append(select_num)
data_idx_order_list = []
cur_index_sets = [0] * dataset_num
while True:
finish_read_num = 0
for dataset_idx in range(dataset_num):
cur_index = cur_index_sets[dataset_idx]
if cur_index >= data_num_list[dataset_idx]:
finish_read_num += 1
else:
select_num = select_num_list[dataset_idx]
for sno in range(select_num):
cur_index = cur_index_sets[dataset_idx]
if cur_index >= data_num_list[dataset_idx]:
break
data_idx_order_list.append((dataset_idx, cur_index))
cur_index_sets[dataset_idx] += 1
if finish_read_num == dataset_num:
break
return data_idx_order_list
def shuffle_data_random(self):
if self.do_shuffle:
for dno in range(len(self.data_lines_list)):
random.shuffle(self.data_lines_list[dno])
return
def __getitem__(self, idx):
dataset_idx, file_idx = self.data_idx_order_list[idx]
data_line = self.data_lines_list[dataset_idx][file_idx]
data_line = data_line.decode('utf-8')
substr = data_line.strip("\n").split(self.delimiter)
file_name = substr[0]
label = substr[1]
img_path = os.path.join(self.data_dir, file_name)
data = {'img_path': img_path, 'label': label}
with open(data['img_path'], 'rb') as f:
img = f.read()
data['image'] = img
outs = transform(data, self.ops)
if outs is None:
return self.__getitem__(np.random.randint(self.__len__()))
return outs
def __len__(self):
return len(self.data_idx_order_list)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from .losses import build_loss
__all__ = ['build_model', 'build_loss']
def build_model(config):
from .architectures import Model
config = copy.deepcopy(config)
module_class = Model(config)
return module_class
......@@ -12,5 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .model import Model
__all__ = ['Model']
\ No newline at end of file
import copy
__all__ = ['build_model']
def build_model(config):
from .base_model import BaseModel
config = copy.deepcopy(config)
module_class = BaseModel(config)
return module_class
\ No newline at end of file
......@@ -15,34 +15,25 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.append('/home/zhoujun20/PaddleOCR')
from paddle import nn
from ppocr.modeling.transform import build_transform
from ppocr.modeling.backbones import build_backbone
from ppocr.modeling.necks import build_neck
from ppocr.modeling.heads import build_head
__all__ = ['Model']
__all__ = ['BaseModel']
class Model(nn.Layer):
class BaseModel(nn.Layer):
def __init__(self, config):
"""
Detection module for OCR.
the module for OCR.
args:
config (dict): the super parameters for module.
"""
super(Model, self).__init__()
algorithm = config['algorithm']
self.type = config['type']
self.model_name = '{}_{}'.format(self.type, algorithm)
super(BaseModel, self).__init__()
in_channels = config.get('in_channels', 3)
model_type = config['model_type']
# build transfrom,
# for rec, transfrom can be TPS,None
# for det and cls, transfrom shoule to be None,
......@@ -57,7 +48,7 @@ class Model(nn.Layer):
# build backbone, backbone is need for del, rec and cls
config["Backbone"]['in_channels'] = in_channels
self.backbone = build_backbone(config["Backbone"], self.type)
self.backbone = build_backbone(config["Backbone"], model_type)
in_channels = self.backbone.out_channels
# build neck
......@@ -71,6 +62,7 @@ class Model(nn.Layer):
config['Neck']['in_channels'] = in_channels
self.neck = build_neck(config['Neck'])
in_channels = self.neck.out_channels
# # build head, head is need for det, rec and cls
config["Head"]['in_channels'] = in_channels
self.head = build_head(config["Head"])
......
......@@ -19,7 +19,6 @@ def build_backbone(config, model_type):
if model_type == 'det':
from .det_mobilenet_v3 import MobileNetV3
from .det_resnet_vd import ResNet
support_dict = ['MobileNetV3', 'ResNet', 'ResNet_SAST']
elif model_type == 'rec':
from .rec_mobilenet_v3 import MobileNetV3
......
......@@ -130,7 +130,6 @@ class MobileNetV3(nn.Layer):
if_act=True,
act='hard_swish',
name='conv_last'))
self.stages.append(nn.Sequential(*block_list))
self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
for i, stage in enumerate(self.stages):
......@@ -159,7 +158,7 @@ class ConvBNLayer(nn.Layer):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = nn.Conv2d(
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
......@@ -184,7 +183,7 @@ class ConvBNLayer(nn.Layer):
if self.act == "relu":
x = F.relu(x)
elif self.act == "hard_swish":
x = F.hard_swish(x)
x = F.activation.hard_swish(x)
else:
print("The activation function is selected incorrectly.")
exit()
......@@ -243,16 +242,15 @@ class ResidualUnit(nn.Layer):
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = paddle.elementwise_add(inputs, x)
x = paddle.add(inputs, x)
return x
class SEModule(nn.Layer):
def __init__(self, in_channels, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = nn.Pool2D(
pool_type="avg", global_pooling=True, use_cudnn=False)
self.conv1 = nn.Conv2d(
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.conv1 = nn.Conv2D(
in_channels=in_channels,
out_channels=in_channels // reduction,
kernel_size=1,
......@@ -260,7 +258,7 @@ class SEModule(nn.Layer):
padding=0,
weight_attr=ParamAttr(name=name + "_1_weights"),
bias_attr=ParamAttr(name=name + "_1_offset"))
self.conv2 = nn.Conv2d(
self.conv2 = nn.Conv2D(
in_channels=in_channels // reduction,
out_channels=in_channels,
kernel_size=1,
......@@ -274,5 +272,5 @@ class SEModule(nn.Layer):
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = F.hard_sigmoid(outputs)
outputs = F.activation.hard_sigmoid(outputs)
return inputs * outputs
\ No newline at end of file
......@@ -127,7 +127,7 @@ class MobileNetV3(nn.Layer):
act='hard_swish',
name='conv_last')
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
self.out_channels = make_divisible(scale * cls_ch_squeeze)
def forward(self, x):
......
......@@ -20,8 +20,8 @@ def build_head(config):
from .det_db_head import DBHead
# rec head
from .rec_ctc_head import CTC
support_dict = ['DBHead', 'CTC']
from .rec_ctc_head import CTCHead
support_dict = ['DBHead', 'CTCHead']
module_name = config.pop('name')
assert module_name in support_dict, Exception('head only support {}'.format(
......
......@@ -33,7 +33,7 @@ def get_bias_attr(k, name):
class Head(nn.Layer):
def __init__(self, in_channels, name_list):
super(Head, self).__init__()
self.conv1 = nn.Conv2d(
self.conv1 = nn.Conv2D(
in_channels=in_channels,
out_channels=in_channels // 4,
kernel_size=3,
......@@ -51,14 +51,14 @@ class Head(nn.Layer):
moving_mean_name=name_list[1] + '.w_1',
moving_variance_name=name_list[1] + '.w_2',
act='relu')
self.conv2 = nn.ConvTranspose2d(
self.conv2 = nn.Conv2DTranspose(
in_channels=in_channels // 4,
out_channels=in_channels // 4,
kernel_size=2,
stride=2,
weight_attr=ParamAttr(
name=name_list[2] + '.w_0',
initializer=paddle.nn.initializer.MSRA(uniform=False)),
initializer=paddle.nn.initializer.KaimingNormal()),
bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv2"))
self.conv_bn2 = nn.BatchNorm(
num_channels=in_channels // 4,
......@@ -71,14 +71,14 @@ class Head(nn.Layer):
moving_mean_name=name_list[3] + '.w_1',
moving_variance_name=name_list[3] + '.w_2',
act="relu")
self.conv3 = nn.ConvTranspose2d(
self.conv3 = nn.Conv2DTranspose(
in_channels=in_channels // 4,
out_channels=1,
kernel_size=2,
stride=2,
weight_attr=ParamAttr(
name=name_list[4] + '.w_0',
initializer=paddle.nn.initializer.MSRA(uniform=False)),
initializer=paddle.nn.initializer.KaimingNormal()),
bias_attr=get_bias_attr(in_channels // 4, name_list[-1] + "conv3"),
)
......
......@@ -33,10 +33,9 @@ def get_para_bias_attr(l2_decay, k, name):
regularizer=regularizer, initializer=initializer, name=name + "_b_attr")
return [weight_attr, bias_attr]
class CTC(nn.Layer):
def __init__(self, in_channels, out_channels, fc_decay=1e-5, **kwargs):
super(CTC, self).__init__()
class CTCHead(nn.Layer):
def __init__(self, in_channels, out_channels, fc_decay=0.0004, **kwargs):
super(CTCHead, self).__init__()
weight_attr, bias_attr = get_para_bias_attr(
l2_decay=fc_decay, k=in_channels, name='ctc_fc')
self.fc = nn.Linear(
......
......@@ -14,11 +14,10 @@
__all__ = ['build_neck']
def build_neck(config):
from .fpn import FPN
from .db_fpn import DBFPN
from .rnn import SequenceEncoder
support_dict = ['FPN', 'SequenceEncoder']
support_dict = ['DBFPN', 'SequenceEncoder']
module_name = config.pop('name')
assert module_name in support_dict, Exception('neck only support {}'.format(
......
......@@ -22,41 +22,41 @@ import paddle.nn.functional as F
from paddle import ParamAttr
class FPN(nn.Layer):
class DBFPN(nn.Layer):
def __init__(self, in_channels, out_channels, **kwargs):
super(FPN, self).__init__()
super(DBFPN, self).__init__()
self.out_channels = out_channels
weight_attr = paddle.nn.initializer.MSRA(uniform=False)
weight_attr = paddle.nn.initializer.KaimingNormal()
self.in2_conv = nn.Conv2d(
self.in2_conv = nn.Conv2D(
in_channels=in_channels[0],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_51.w_0', initializer=weight_attr),
bias_attr=False)
self.in3_conv = nn.Conv2d(
self.in3_conv = nn.Conv2D(
in_channels=in_channels[1],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_50.w_0', initializer=weight_attr),
bias_attr=False)
self.in4_conv = nn.Conv2d(
self.in4_conv = nn.Conv2D(
in_channels=in_channels[2],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_49.w_0', initializer=weight_attr),
bias_attr=False)
self.in5_conv = nn.Conv2d(
self.in5_conv = nn.Conv2D(
in_channels=in_channels[3],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(
name='conv2d_48.w_0', initializer=weight_attr),
bias_attr=False)
self.p5_conv = nn.Conv2d(
self.p5_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
......@@ -64,7 +64,7 @@ class FPN(nn.Layer):
weight_attr=ParamAttr(
name='conv2d_52.w_0', initializer=weight_attr),
bias_attr=False)
self.p4_conv = nn.Conv2d(
self.p4_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
......@@ -72,7 +72,7 @@ class FPN(nn.Layer):
weight_attr=ParamAttr(
name='conv2d_53.w_0', initializer=weight_attr),
bias_attr=False)
self.p3_conv = nn.Conv2d(
self.p3_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
......@@ -80,7 +80,7 @@ class FPN(nn.Layer):
weight_attr=ParamAttr(
name='conv2d_54.w_0', initializer=weight_attr),
bias_attr=False)
self.p2_conv = nn.Conv2d(
self.p2_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
......@@ -97,17 +97,17 @@ class FPN(nn.Layer):
in3 = self.in3_conv(c3)
in2 = self.in2_conv(c2)
out4 = in4 + F.resize_nearest(in5, scale=2) # 1/16
out3 = in3 + F.resize_nearest(out4, scale=2) # 1/8
out2 = in2 + F.resize_nearest(out3, scale=2) # 1/4
out4 = in4 + F.upsample(in5, scale_factor=2, mode="nearest") # 1/16
out3 = in3 + F.upsample(out4, scale_factor=2, mode="nearest") # 1/8
out2 = in2 + F.upsample(out3, scale_factor=2, mode="nearest") # 1/4
p5 = self.p5_conv(in5)
p4 = self.p4_conv(out4)
p3 = self.p3_conv(out3)
p2 = self.p2_conv(out2)
p5 = F.resize_nearest(p5, scale=8)
p4 = F.resize_nearest(p4, scale=4)
p3 = F.resize_nearest(p3, scale=2)
p5 = F.upsample(p5, scale_factor=8, mode="nearest")
p4 = F.upsample(p4, scale_factor=4, mode="nearest")
p3 = F.upsample(p3, scale_factor=2, mode="nearest")
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
return fuse
......@@ -76,8 +76,7 @@ class SequenceEncoder(nn.Layer):
'fc': EncoderWithFC,
'rnn': EncoderWithRNN
}
assert encoder_type in support_encoder_dict, '{} must in {}'.format(
encoder_type, support_encoder_dict.keys())
assert encoder_type in support_encoder_dict, '{} must in {}'.format(encoder_type, support_encoder_dict.keys())
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, hidden_size)
......
......@@ -51,6 +51,6 @@ def build_optimizer(config, epochs, step_each_epoch, parameters):
# step3 build optimizer
optim_name = config.pop('name')
optim = getattr(optimizer, optim_name)(learning_rate=lr,
regularization=reg,
weight_decay=reg,
**config)
return optim(parameters), lr
......@@ -17,7 +17,7 @@ from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from paddle.optimizer import lr_scheduler
from paddle.optimizer import lr as lr_scheduler
class Linear(object):
......
......@@ -40,8 +40,8 @@ class Momentum(object):
opt = optim.Momentum(
learning_rate=self.learning_rate,
momentum=self.momentum,
parameters=self.weight_decay,
weight_decay=parameters)
parameters=parameters,
weight_decay=self.weight_decay)
return opt
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment