Commit f1506916 authored by sugon_cxj's avatar sugon_cxj
Browse files

first commit

parent 55c28ed5
Pipeline #266 canceled with stages
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
import os
import sys
__dir__ = os.path.dirname(os.path.abspath(__file__))
sys.path.append(__dir__)
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '../../..')))
from ppocr.modeling.backbones.det_mobilenet_v3 import SEModule
class DSConv(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
padding,
stride=1,
groups=None,
if_act=True,
act="relu",
**kwargs):
super(DSConv, self).__init__()
if groups == None:
groups = in_channels
self.if_act = if_act
self.act = act
self.conv1 = nn.Conv2D(
in_channels=in_channels,
out_channels=in_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias_attr=False)
self.bn1 = nn.BatchNorm(num_channels=in_channels, act=None)
self.conv2 = nn.Conv2D(
in_channels=in_channels,
out_channels=int(in_channels * 4),
kernel_size=1,
stride=1,
bias_attr=False)
self.bn2 = nn.BatchNorm(num_channels=int(in_channels * 4), act=None)
self.conv3 = nn.Conv2D(
in_channels=int(in_channels * 4),
out_channels=out_channels,
kernel_size=1,
stride=1,
bias_attr=False)
self._c = [in_channels, out_channels]
if in_channels != out_channels:
self.conv_end = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
bias_attr=False)
def forward(self, inputs):
x = self.conv1(inputs)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
if self.if_act:
if self.act == "relu":
x = F.relu(x)
elif self.act == "hardswish":
x = F.hardswish(x)
else:
print("The activation function({}) is selected incorrectly.".
format(self.act))
exit()
x = self.conv3(x)
if self._c[0] != self._c[1]:
x = x + self.conv_end(inputs)
return x
class DBFPN(nn.Layer):
def __init__(self, in_channels, out_channels, **kwargs):
super(DBFPN, self).__init__()
self.out_channels = out_channels
weight_attr = paddle.nn.initializer.KaimingUniform()
self.in2_conv = nn.Conv2D(
in_channels=in_channels[0],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in3_conv = nn.Conv2D(
in_channels=in_channels[1],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in4_conv = nn.Conv2D(
in_channels=in_channels[2],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in5_conv = nn.Conv2D(
in_channels=in_channels[3],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p5_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p4_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p3_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p2_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.in5_conv(c5)
in4 = self.in4_conv(c4)
in3 = self.in3_conv(c3)
in2 = self.in2_conv(c2)
out4 = in4 + F.upsample(
in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
out3 = in3 + F.upsample(
out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
out2 = in2 + F.upsample(
out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
p5 = self.p5_conv(in5)
p4 = self.p4_conv(out4)
p3 = self.p3_conv(out3)
p2 = self.p2_conv(out2)
p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
return fuse
class RSELayer(nn.Layer):
def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
super(RSELayer, self).__init__()
weight_attr = paddle.nn.initializer.KaimingUniform()
self.out_channels = out_channels
self.in_conv = nn.Conv2D(
in_channels=in_channels,
out_channels=self.out_channels,
kernel_size=kernel_size,
padding=int(kernel_size // 2),
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.se_block = SEModule(self.out_channels)
self.shortcut = shortcut
def forward(self, ins):
x = self.in_conv(ins)
if self.shortcut:
out = x + self.se_block(x)
else:
out = self.se_block(x)
return out
class RSEFPN(nn.Layer):
def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
super(RSEFPN, self).__init__()
self.out_channels = out_channels
self.ins_conv = nn.LayerList()
self.inp_conv = nn.LayerList()
for i in range(len(in_channels)):
self.ins_conv.append(
RSELayer(
in_channels[i],
out_channels,
kernel_size=1,
shortcut=shortcut))
self.inp_conv.append(
RSELayer(
out_channels,
out_channels // 4,
kernel_size=3,
shortcut=shortcut))
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.ins_conv[3](c5)
in4 = self.ins_conv[2](c4)
in3 = self.ins_conv[1](c3)
in2 = self.ins_conv[0](c2)
out4 = in4 + F.upsample(
in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
out3 = in3 + F.upsample(
out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
out2 = in2 + F.upsample(
out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
p5 = self.inp_conv[3](in5)
p4 = self.inp_conv[2](out4)
p3 = self.inp_conv[1](out3)
p2 = self.inp_conv[0](out2)
p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
return fuse
class LKPAN(nn.Layer):
def __init__(self, in_channels, out_channels, mode='large', **kwargs):
super(LKPAN, self).__init__()
self.out_channels = out_channels
weight_attr = paddle.nn.initializer.KaimingUniform()
self.ins_conv = nn.LayerList()
self.inp_conv = nn.LayerList()
# pan head
self.pan_head_conv = nn.LayerList()
self.pan_lat_conv = nn.LayerList()
if mode.lower() == 'lite':
p_layer = DSConv
elif mode.lower() == 'large':
p_layer = nn.Conv2D
else:
raise ValueError(
"mode can only be one of ['lite', 'large'], but received {}".
format(mode))
for i in range(len(in_channels)):
self.ins_conv.append(
nn.Conv2D(
in_channels=in_channels[i],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False))
self.inp_conv.append(
p_layer(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=9,
padding=4,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False))
if i > 0:
self.pan_head_conv.append(
nn.Conv2D(
in_channels=self.out_channels // 4,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
stride=2,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False))
self.pan_lat_conv.append(
p_layer(
in_channels=self.out_channels // 4,
out_channels=self.out_channels // 4,
kernel_size=9,
padding=4,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False))
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.ins_conv[3](c5)
in4 = self.ins_conv[2](c4)
in3 = self.ins_conv[1](c3)
in2 = self.ins_conv[0](c2)
out4 = in4 + F.upsample(
in5, scale_factor=2, mode="nearest", align_mode=1) # 1/16
out3 = in3 + F.upsample(
out4, scale_factor=2, mode="nearest", align_mode=1) # 1/8
out2 = in2 + F.upsample(
out3, scale_factor=2, mode="nearest", align_mode=1) # 1/4
f5 = self.inp_conv[3](in5)
f4 = self.inp_conv[2](out4)
f3 = self.inp_conv[1](out3)
f2 = self.inp_conv[0](out2)
pan3 = f3 + self.pan_head_conv[0](f2)
pan4 = f4 + self.pan_head_conv[1](pan3)
pan5 = f5 + self.pan_head_conv[2](pan4)
p2 = self.pan_lat_conv[0](f2)
p3 = self.pan_lat_conv[1](pan3)
p4 = self.pan_lat_conv[2](pan4)
p5 = self.pan_lat_conv[3](pan5)
p5 = F.upsample(p5, scale_factor=8, mode="nearest", align_mode=1)
p4 = F.upsample(p4, scale_factor=4, mode="nearest", align_mode=1)
p3 = F.upsample(p3, scale_factor=2, mode="nearest", align_mode=1)
fuse = paddle.concat([p5, p4, p3, p2], axis=1)
return fuse
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
if_act=True,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
self.bn = nn.BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(name="bn_" + name + "_scale"),
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
moving_mean_name="bn_" + name + "_mean",
moving_variance_name="bn_" + name + "_variance")
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class DeConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
if_act=True,
act=None,
name=None):
super(DeConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.deconv = nn.Conv2DTranspose(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
self.bn = nn.BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(name="bn_" + name + "_scale"),
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
moving_mean_name="bn_" + name + "_mean",
moving_variance_name="bn_" + name + "_variance")
def forward(self, x):
x = self.deconv(x)
x = self.bn(x)
return x
class EASTFPN(nn.Layer):
def __init__(self, in_channels, model_name, **kwargs):
super(EASTFPN, self).__init__()
self.model_name = model_name
if self.model_name == "large":
self.out_channels = 128
else:
self.out_channels = 64
self.in_channels = in_channels[::-1]
self.h1_conv = ConvBNLayer(
in_channels=self.out_channels+self.in_channels[1],
out_channels=self.out_channels,
kernel_size=3,
stride=1,
padding=1,
if_act=True,
act='relu',
name="unet_h_1")
self.h2_conv = ConvBNLayer(
in_channels=self.out_channels+self.in_channels[2],
out_channels=self.out_channels,
kernel_size=3,
stride=1,
padding=1,
if_act=True,
act='relu',
name="unet_h_2")
self.h3_conv = ConvBNLayer(
in_channels=self.out_channels+self.in_channels[3],
out_channels=self.out_channels,
kernel_size=3,
stride=1,
padding=1,
if_act=True,
act='relu',
name="unet_h_3")
self.g0_deconv = DeConvBNLayer(
in_channels=self.in_channels[0],
out_channels=self.out_channels,
kernel_size=4,
stride=2,
padding=1,
if_act=True,
act='relu',
name="unet_g_0")
self.g1_deconv = DeConvBNLayer(
in_channels=self.out_channels,
out_channels=self.out_channels,
kernel_size=4,
stride=2,
padding=1,
if_act=True,
act='relu',
name="unet_g_1")
self.g2_deconv = DeConvBNLayer(
in_channels=self.out_channels,
out_channels=self.out_channels,
kernel_size=4,
stride=2,
padding=1,
if_act=True,
act='relu',
name="unet_g_2")
self.g3_conv = ConvBNLayer(
in_channels=self.out_channels,
out_channels=self.out_channels,
kernel_size=3,
stride=1,
padding=1,
if_act=True,
act='relu',
name="unet_g_3")
def forward(self, x):
f = x[::-1]
h = f[0]
g = self.g0_deconv(h)
h = paddle.concat([g, f[1]], axis=1)
h = self.h1_conv(h)
g = self.g1_deconv(h)
h = paddle.concat([g, f[2]], axis=1)
h = self.h2_conv(h)
g = self.g2_deconv(h)
h = paddle.concat([g, f[3]], axis=1)
h = self.h3_conv(h)
g = self.g3_conv(h)
return g
\ No newline at end of file
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/PaddlePaddle/PaddleDetection/blob/release/2.3/ppdet/modeling/necks/fpn.py
"""
import paddle.nn as nn
import paddle.nn.functional as F
from paddle import ParamAttr
from paddle.nn.initializer import XavierUniform
from paddle.nn.initializer import Normal
from paddle.regularizer import L2Decay
__all__ = ['FCEFPN']
class ConvNormLayer(nn.Layer):
def __init__(self,
ch_in,
ch_out,
filter_size,
stride,
groups=1,
norm_type='bn',
norm_decay=0.,
norm_groups=32,
lr_scale=1.,
freeze_norm=False,
initializer=Normal(
mean=0., std=0.01)):
super(ConvNormLayer, self).__init__()
assert norm_type in ['bn', 'sync_bn', 'gn']
bias_attr = False
self.conv = nn.Conv2D(
in_channels=ch_in,
out_channels=ch_out,
kernel_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(
initializer=initializer, learning_rate=1.),
bias_attr=bias_attr)
norm_lr = 0. if freeze_norm else 1.
param_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
bias_attr = ParamAttr(
learning_rate=norm_lr,
regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
if norm_type == 'bn':
self.norm = nn.BatchNorm2D(
ch_out, weight_attr=param_attr, bias_attr=bias_attr)
elif norm_type == 'sync_bn':
self.norm = nn.SyncBatchNorm(
ch_out, weight_attr=param_attr, bias_attr=bias_attr)
elif norm_type == 'gn':
self.norm = nn.GroupNorm(
num_groups=norm_groups,
num_channels=ch_out,
weight_attr=param_attr,
bias_attr=bias_attr)
def forward(self, inputs):
out = self.conv(inputs)
out = self.norm(out)
return out
class FCEFPN(nn.Layer):
"""
Feature Pyramid Network, see https://arxiv.org/abs/1612.03144
Args:
in_channels (list[int]): input channels of each level which can be
derived from the output shape of backbone by from_config
out_channels (list[int]): output channel of each level
spatial_scales (list[float]): the spatial scales between input feature
maps and original input image which can be derived from the output
shape of backbone by from_config
has_extra_convs (bool): whether to add extra conv to the last level.
default False
extra_stage (int): the number of extra stages added to the last level.
default 1
use_c5 (bool): Whether to use c5 as the input of extra stage,
otherwise p5 is used. default True
norm_type (string|None): The normalization type in FPN module. If
norm_type is None, norm will not be used after conv and if
norm_type is string, bn, gn, sync_bn are available. default None
norm_decay (float): weight decay for normalization layer weights.
default 0.
freeze_norm (bool): whether to freeze normalization layer.
default False
relu_before_extra_convs (bool): whether to add relu before extra convs.
default False
"""
def __init__(self,
in_channels,
out_channels,
spatial_scales=[0.25, 0.125, 0.0625, 0.03125],
has_extra_convs=False,
extra_stage=1,
use_c5=True,
norm_type=None,
norm_decay=0.,
freeze_norm=False,
relu_before_extra_convs=True):
super(FCEFPN, self).__init__()
self.out_channels = out_channels
for s in range(extra_stage):
spatial_scales = spatial_scales + [spatial_scales[-1] / 2.]
self.spatial_scales = spatial_scales
self.has_extra_convs = has_extra_convs
self.extra_stage = extra_stage
self.use_c5 = use_c5
self.relu_before_extra_convs = relu_before_extra_convs
self.norm_type = norm_type
self.norm_decay = norm_decay
self.freeze_norm = freeze_norm
self.lateral_convs = []
self.fpn_convs = []
fan = out_channels * 3 * 3
# stage index 0,1,2,3 stands for res2,res3,res4,res5 on ResNet Backbone
# 0 <= st_stage < ed_stage <= 3
st_stage = 4 - len(in_channels)
ed_stage = st_stage + len(in_channels) - 1
for i in range(st_stage, ed_stage + 1):
if i == 3:
lateral_name = 'fpn_inner_res5_sum'
else:
lateral_name = 'fpn_inner_res{}_sum_lateral'.format(i + 2)
in_c = in_channels[i - st_stage]
if self.norm_type is not None:
lateral = self.add_sublayer(
lateral_name,
ConvNormLayer(
ch_in=in_c,
ch_out=out_channels,
filter_size=1,
stride=1,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=in_c)))
else:
lateral = self.add_sublayer(
lateral_name,
nn.Conv2D(
in_channels=in_c,
out_channels=out_channels,
kernel_size=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=in_c))))
self.lateral_convs.append(lateral)
for i in range(st_stage, ed_stage + 1):
fpn_name = 'fpn_res{}_sum'.format(i + 2)
if self.norm_type is not None:
fpn_conv = self.add_sublayer(
fpn_name,
ConvNormLayer(
ch_in=out_channels,
ch_out=out_channels,
filter_size=3,
stride=1,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=fan)))
else:
fpn_conv = self.add_sublayer(
fpn_name,
nn.Conv2D(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(fpn_conv)
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
if self.has_extra_convs:
for i in range(self.extra_stage):
lvl = ed_stage + 1 + i
if i == 0 and self.use_c5:
in_c = in_channels[-1]
else:
in_c = out_channels
extra_fpn_name = 'fpn_{}'.format(lvl + 2)
if self.norm_type is not None:
extra_fpn_conv = self.add_sublayer(
extra_fpn_name,
ConvNormLayer(
ch_in=in_c,
ch_out=out_channels,
filter_size=3,
stride=2,
norm_type=self.norm_type,
norm_decay=self.norm_decay,
freeze_norm=self.freeze_norm,
initializer=XavierUniform(fan_out=fan)))
else:
extra_fpn_conv = self.add_sublayer(
extra_fpn_name,
nn.Conv2D(
in_channels=in_c,
out_channels=out_channels,
kernel_size=3,
stride=2,
padding=1,
weight_attr=ParamAttr(
initializer=XavierUniform(fan_out=fan))))
self.fpn_convs.append(extra_fpn_conv)
@classmethod
def from_config(cls, cfg, input_shape):
return {
'in_channels': [i.channels for i in input_shape],
'spatial_scales': [1.0 / i.stride for i in input_shape],
}
def forward(self, body_feats):
laterals = []
num_levels = len(body_feats)
for i in range(num_levels):
laterals.append(self.lateral_convs[i](body_feats[i]))
for i in range(1, num_levels):
lvl = num_levels - i
upsample = F.interpolate(
laterals[lvl],
scale_factor=2.,
mode='nearest', )
laterals[lvl - 1] += upsample
fpn_output = []
for lvl in range(num_levels):
fpn_output.append(self.fpn_convs[lvl](laterals[lvl]))
if self.extra_stage > 0:
# use max pool to get more levels on top of outputs (Faster R-CNN, Mask R-CNN)
if not self.has_extra_convs:
assert self.extra_stage == 1, 'extra_stage should be 1 if FPN has not extra convs'
fpn_output.append(F.max_pool2d(fpn_output[-1], 1, stride=2))
# add extra conv levels for RetinaNet(use_c5)/FCOS(use_p5)
else:
if self.use_c5:
extra_source = body_feats[-1]
else:
extra_source = fpn_output[-1]
fpn_output.append(self.fpn_convs[num_levels](extra_source))
for i in range(1, self.extra_stage):
if self.relu_before_extra_convs:
fpn_output.append(self.fpn_convs[num_levels + i](F.relu(
fpn_output[-1])))
else:
fpn_output.append(self.fpn_convs[num_levels + i](
fpn_output[-1]))
return fpn_output
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/whai362/PSENet/blob/python3/models/neck/fpn.py
"""
import paddle.nn as nn
import paddle
import math
import paddle.nn.functional as F
class Conv_BN_ReLU(nn.Layer):
def __init__(self,
in_planes,
out_planes,
kernel_size=1,
stride=1,
padding=0):
super(Conv_BN_ReLU, self).__init__()
self.conv = nn.Conv2D(
in_planes,
out_planes,
kernel_size=kernel_size,
stride=stride,
padding=padding,
bias_attr=False)
self.bn = nn.BatchNorm2D(out_planes, momentum=0.1)
self.relu = nn.ReLU()
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
m.weight = paddle.create_parameter(
shape=m.weight.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Normal(
0, math.sqrt(2. / n)))
elif isinstance(m, nn.BatchNorm2D):
m.weight = paddle.create_parameter(
shape=m.weight.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Constant(1.0))
m.bias = paddle.create_parameter(
shape=m.bias.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Constant(0.0))
def forward(self, x):
return self.relu(self.bn(self.conv(x)))
class FPN(nn.Layer):
def __init__(self, in_channels, out_channels):
super(FPN, self).__init__()
# Top layer
self.toplayer_ = Conv_BN_ReLU(
in_channels[3], out_channels, kernel_size=1, stride=1, padding=0)
# Lateral layers
self.latlayer1_ = Conv_BN_ReLU(
in_channels[2], out_channels, kernel_size=1, stride=1, padding=0)
self.latlayer2_ = Conv_BN_ReLU(
in_channels[1], out_channels, kernel_size=1, stride=1, padding=0)
self.latlayer3_ = Conv_BN_ReLU(
in_channels[0], out_channels, kernel_size=1, stride=1, padding=0)
# Smooth layers
self.smooth1_ = Conv_BN_ReLU(
out_channels, out_channels, kernel_size=3, stride=1, padding=1)
self.smooth2_ = Conv_BN_ReLU(
out_channels, out_channels, kernel_size=3, stride=1, padding=1)
self.smooth3_ = Conv_BN_ReLU(
out_channels, out_channels, kernel_size=3, stride=1, padding=1)
self.out_channels = out_channels * 4
for m in self.sublayers():
if isinstance(m, nn.Conv2D):
n = m._kernel_size[0] * m._kernel_size[1] * m._out_channels
m.weight = paddle.create_parameter(
shape=m.weight.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Normal(
0, math.sqrt(2. / n)))
elif isinstance(m, nn.BatchNorm2D):
m.weight = paddle.create_parameter(
shape=m.weight.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Constant(1.0))
m.bias = paddle.create_parameter(
shape=m.bias.shape,
dtype='float32',
default_initializer=paddle.nn.initializer.Constant(0.0))
def _upsample(self, x, scale=1):
return F.upsample(x, scale_factor=scale, mode='bilinear')
def _upsample_add(self, x, y, scale=1):
return F.upsample(x, scale_factor=scale, mode='bilinear') + y
def forward(self, x):
f2, f3, f4, f5 = x
p5 = self.toplayer_(f5)
f4 = self.latlayer1_(f4)
p4 = self._upsample_add(p5, f4, 2)
p4 = self.smooth1_(p4)
f3 = self.latlayer2_(f3)
p3 = self._upsample_add(p4, f3, 2)
p3 = self.smooth2_(p3)
f2 = self.latlayer3_(f2)
p2 = self._upsample_add(p3, f2, 2)
p2 = self.smooth3_(p2)
p3 = self._upsample(p3, 2)
p4 = self._upsample(p4, 4)
p5 = self._upsample(p5, 8)
fuse = paddle.concat([p2, p3, p4, p5], axis=1)
return fuse
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm(
out_channels,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance',
use_global_stats=False)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class DeConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size=4,
stride=2,
padding=1,
groups=1,
if_act=True,
act=None,
name=None):
super(DeConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.deconv = nn.Conv2DTranspose(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
weight_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
self.bn = nn.BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(name="bn_" + name + "_scale"),
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
moving_mean_name="bn_" + name + "_mean",
moving_variance_name="bn_" + name + "_variance",
use_global_stats=False)
def forward(self, x):
x = self.deconv(x)
x = self.bn(x)
return x
class PGFPN(nn.Layer):
def __init__(self, in_channels, **kwargs):
super(PGFPN, self).__init__()
num_inputs = [2048, 2048, 1024, 512, 256]
num_outputs = [256, 256, 192, 192, 128]
self.out_channels = 128
self.conv_bn_layer_1 = ConvBNLayer(
in_channels=3,
out_channels=32,
kernel_size=3,
stride=1,
act=None,
name='FPN_d1')
self.conv_bn_layer_2 = ConvBNLayer(
in_channels=64,
out_channels=64,
kernel_size=3,
stride=1,
act=None,
name='FPN_d2')
self.conv_bn_layer_3 = ConvBNLayer(
in_channels=256,
out_channels=128,
kernel_size=3,
stride=1,
act=None,
name='FPN_d3')
self.conv_bn_layer_4 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=2,
act=None,
name='FPN_d4')
self.conv_bn_layer_5 = ConvBNLayer(
in_channels=64,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name='FPN_d5')
self.conv_bn_layer_6 = ConvBNLayer(
in_channels=64,
out_channels=128,
kernel_size=3,
stride=2,
act=None,
name='FPN_d6')
self.conv_bn_layer_7 = ConvBNLayer(
in_channels=128,
out_channels=128,
kernel_size=3,
stride=1,
act='relu',
name='FPN_d7')
self.conv_bn_layer_8 = ConvBNLayer(
in_channels=128,
out_channels=128,
kernel_size=1,
stride=1,
act=None,
name='FPN_d8')
self.conv_h0 = ConvBNLayer(
in_channels=num_inputs[0],
out_channels=num_outputs[0],
kernel_size=1,
stride=1,
act=None,
name="conv_h{}".format(0))
self.conv_h1 = ConvBNLayer(
in_channels=num_inputs[1],
out_channels=num_outputs[1],
kernel_size=1,
stride=1,
act=None,
name="conv_h{}".format(1))
self.conv_h2 = ConvBNLayer(
in_channels=num_inputs[2],
out_channels=num_outputs[2],
kernel_size=1,
stride=1,
act=None,
name="conv_h{}".format(2))
self.conv_h3 = ConvBNLayer(
in_channels=num_inputs[3],
out_channels=num_outputs[3],
kernel_size=1,
stride=1,
act=None,
name="conv_h{}".format(3))
self.conv_h4 = ConvBNLayer(
in_channels=num_inputs[4],
out_channels=num_outputs[4],
kernel_size=1,
stride=1,
act=None,
name="conv_h{}".format(4))
self.dconv0 = DeConvBNLayer(
in_channels=num_outputs[0],
out_channels=num_outputs[0 + 1],
name="dconv_{}".format(0))
self.dconv1 = DeConvBNLayer(
in_channels=num_outputs[1],
out_channels=num_outputs[1 + 1],
act=None,
name="dconv_{}".format(1))
self.dconv2 = DeConvBNLayer(
in_channels=num_outputs[2],
out_channels=num_outputs[2 + 1],
act=None,
name="dconv_{}".format(2))
self.dconv3 = DeConvBNLayer(
in_channels=num_outputs[3],
out_channels=num_outputs[3 + 1],
act=None,
name="dconv_{}".format(3))
self.conv_g1 = ConvBNLayer(
in_channels=num_outputs[1],
out_channels=num_outputs[1],
kernel_size=3,
stride=1,
act='relu',
name="conv_g{}".format(1))
self.conv_g2 = ConvBNLayer(
in_channels=num_outputs[2],
out_channels=num_outputs[2],
kernel_size=3,
stride=1,
act='relu',
name="conv_g{}".format(2))
self.conv_g3 = ConvBNLayer(
in_channels=num_outputs[3],
out_channels=num_outputs[3],
kernel_size=3,
stride=1,
act='relu',
name="conv_g{}".format(3))
self.conv_g4 = ConvBNLayer(
in_channels=num_outputs[4],
out_channels=num_outputs[4],
kernel_size=3,
stride=1,
act='relu',
name="conv_g{}".format(4))
self.convf = ConvBNLayer(
in_channels=num_outputs[4],
out_channels=num_outputs[4],
kernel_size=1,
stride=1,
act=None,
name="conv_f{}".format(4))
def forward(self, x):
c0, c1, c2, c3, c4, c5, c6 = x
# FPN_Down_Fusion
f = [c0, c1, c2]
g = [None, None, None]
h = [None, None, None]
h[0] = self.conv_bn_layer_1(f[0])
h[1] = self.conv_bn_layer_2(f[1])
h[2] = self.conv_bn_layer_3(f[2])
g[0] = self.conv_bn_layer_4(h[0])
g[1] = paddle.add(g[0], h[1])
g[1] = F.relu(g[1])
g[1] = self.conv_bn_layer_5(g[1])
g[1] = self.conv_bn_layer_6(g[1])
g[2] = paddle.add(g[1], h[2])
g[2] = F.relu(g[2])
g[2] = self.conv_bn_layer_7(g[2])
f_down = self.conv_bn_layer_8(g[2])
# FPN UP Fusion
f1 = [c6, c5, c4, c3, c2]
g = [None, None, None, None, None]
h = [None, None, None, None, None]
h[0] = self.conv_h0(f1[0])
h[1] = self.conv_h1(f1[1])
h[2] = self.conv_h2(f1[2])
h[3] = self.conv_h3(f1[3])
h[4] = self.conv_h4(f1[4])
g[0] = self.dconv0(h[0])
g[1] = paddle.add(g[0], h[1])
g[1] = F.relu(g[1])
g[1] = self.conv_g1(g[1])
g[1] = self.dconv1(g[1])
g[2] = paddle.add(g[1], h[2])
g[2] = F.relu(g[2])
g[2] = self.conv_g2(g[2])
g[2] = self.dconv2(g[2])
g[3] = paddle.add(g[2], h[3])
g[3] = F.relu(g[3])
g[3] = self.conv_g3(g[3])
g[3] = self.dconv3(g[3])
g[4] = paddle.add(x=g[3], y=h[4])
g[4] = F.relu(g[4])
g[4] = self.conv_g4(g[4])
f_up = self.convf(g[4])
f_common = paddle.add(f_down, f_up)
f_common = F.relu(f_common)
return f_common
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Code is refer from:
https://github.com/RuijieJ/pren/blob/main/Nets/Aggregation.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
class PoolAggregate(nn.Layer):
def __init__(self, n_r, d_in, d_middle=None, d_out=None):
super(PoolAggregate, self).__init__()
if not d_middle:
d_middle = d_in
if not d_out:
d_out = d_in
self.d_in = d_in
self.d_middle = d_middle
self.d_out = d_out
self.act = nn.Swish()
self.n_r = n_r
self.aggs = self._build_aggs()
def _build_aggs(self):
aggs = []
for i in range(self.n_r):
aggs.append(
self.add_sublayer(
'{}'.format(i),
nn.Sequential(
('conv1', nn.Conv2D(
self.d_in, self.d_middle, 3, 2, 1, bias_attr=False)
), ('bn1', nn.BatchNorm(self.d_middle)),
('act', self.act), ('conv2', nn.Conv2D(
self.d_middle, self.d_out, 3, 2, 1, bias_attr=False
)), ('bn2', nn.BatchNorm(self.d_out)))))
return aggs
def forward(self, x):
b = x.shape[0]
outs = []
for agg in self.aggs:
y = agg(x)
p = F.adaptive_avg_pool2d(y, 1)
outs.append(p.reshape((b, 1, self.d_out)))
out = paddle.concat(outs, 1)
return out
class WeightAggregate(nn.Layer):
def __init__(self, n_r, d_in, d_middle=None, d_out=None):
super(WeightAggregate, self).__init__()
if not d_middle:
d_middle = d_in
if not d_out:
d_out = d_in
self.n_r = n_r
self.d_out = d_out
self.act = nn.Swish()
self.conv_n = nn.Sequential(
('conv1', nn.Conv2D(
d_in, d_in, 3, 1, 1,
bias_attr=False)), ('bn1', nn.BatchNorm(d_in)),
('act1', self.act), ('conv2', nn.Conv2D(
d_in, n_r, 1, bias_attr=False)), ('bn2', nn.BatchNorm(n_r)),
('act2', nn.Sigmoid()))
self.conv_d = nn.Sequential(
('conv1', nn.Conv2D(
d_in, d_middle, 3, 1, 1,
bias_attr=False)), ('bn1', nn.BatchNorm(d_middle)),
('act1', self.act), ('conv2', nn.Conv2D(
d_middle, d_out, 1,
bias_attr=False)), ('bn2', nn.BatchNorm(d_out)))
def forward(self, x):
b, _, h, w = x.shape
hmaps = self.conv_n(x)
fmaps = self.conv_d(x)
r = paddle.bmm(
hmaps.reshape((b, self.n_r, h * w)),
fmaps.reshape((b, self.d_out, h * w)).transpose((0, 2, 1)))
return r
class GCN(nn.Layer):
def __init__(self, d_in, n_in, d_out=None, n_out=None, dropout=0.1):
super(GCN, self).__init__()
if not d_out:
d_out = d_in
if not n_out:
n_out = d_in
self.conv_n = nn.Conv1D(n_in, n_out, 1)
self.linear = nn.Linear(d_in, d_out)
self.dropout = nn.Dropout(dropout)
self.act = nn.Swish()
def forward(self, x):
x = self.conv_n(x)
x = self.dropout(self.linear(x))
return self.act(x)
class PRENFPN(nn.Layer):
def __init__(self, in_channels, n_r, d_model, max_len, dropout):
super(PRENFPN, self).__init__()
assert len(in_channels) == 3, "in_channels' length must be 3."
c1, c2, c3 = in_channels # the depths are from big to small
# build fpn
assert d_model % 3 == 0, "{} can't be divided by 3.".format(d_model)
self.agg_p1 = PoolAggregate(n_r, c1, d_out=d_model // 3)
self.agg_p2 = PoolAggregate(n_r, c2, d_out=d_model // 3)
self.agg_p3 = PoolAggregate(n_r, c3, d_out=d_model // 3)
self.agg_w1 = WeightAggregate(n_r, c1, 4 * c1, d_model // 3)
self.agg_w2 = WeightAggregate(n_r, c2, 4 * c2, d_model // 3)
self.agg_w3 = WeightAggregate(n_r, c3, 4 * c3, d_model // 3)
self.gcn_pool = GCN(d_model, n_r, d_model, max_len, dropout)
self.gcn_weight = GCN(d_model, n_r, d_model, max_len, dropout)
self.out_channels = d_model
def forward(self, inputs):
f3, f5, f7 = inputs
rp1 = self.agg_p1(f3)
rp2 = self.agg_p2(f5)
rp3 = self.agg_p3(f7)
rp = paddle.concat([rp1, rp2, rp3], 2) # [b,nr,d]
rw1 = self.agg_w1(f3)
rw2 = self.agg_w2(f5)
rw3 = self.agg_w3(f7)
rw = paddle.concat([rw1, rw2, rw3], 2) # [b,nr,d]
y1 = self.gcn_pool(rp)
y2 = self.gcn_weight(rw)
y = 0.5 * (y1 + y2)
return y # [b,max_len,d]
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
from ppocr.modeling.heads.rec_ctc_head import get_para_bias_attr
from ppocr.modeling.backbones.rec_svtrnet import Block, ConvBNLayer, trunc_normal_, zeros_, ones_
class Im2Seq(nn.Layer):
def __init__(self, in_channels, **kwargs):
super().__init__()
self.out_channels = in_channels
def forward(self, x):
B, C, H, W = x.shape
assert H == 1
x = x.squeeze(axis=2)
x = x.transpose([0, 2, 1]) # (NTC)(batch, width, channels)
return x
class EncoderWithRNN(nn.Layer):
def __init__(self, in_channels, hidden_size):
super(EncoderWithRNN, self).__init__()
self.out_channels = hidden_size * 2
self.lstm = nn.LSTM(
in_channels, hidden_size, direction='bidirectional', num_layers=2)
def forward(self, x):
x, _ = self.lstm(x)
return x
class EncoderWithFC(nn.Layer):
def __init__(self, in_channels, hidden_size):
super(EncoderWithFC, self).__init__()
self.out_channels = hidden_size
weight_attr, bias_attr = get_para_bias_attr(
l2_decay=0.00001, k=in_channels)
self.fc = nn.Linear(
in_channels,
hidden_size,
weight_attr=weight_attr,
bias_attr=bias_attr,
name='reduce_encoder_fea')
def forward(self, x):
x = self.fc(x)
return x
class EncoderWithSVTR(nn.Layer):
def __init__(
self,
in_channels,
dims=64, # XS
depth=2,
hidden_dims=120,
use_guide=False,
num_heads=8,
qkv_bias=True,
mlp_ratio=2.0,
drop_rate=0.1,
attn_drop_rate=0.1,
drop_path=0.,
qk_scale=None):
super(EncoderWithSVTR, self).__init__()
self.depth = depth
self.use_guide = use_guide
self.conv1 = ConvBNLayer(
in_channels, in_channels // 8, padding=1, act=nn.Swish)
self.conv2 = ConvBNLayer(
in_channels // 8, hidden_dims, kernel_size=1, act=nn.Swish)
self.svtr_block = nn.LayerList([
Block(
dim=hidden_dims,
num_heads=num_heads,
mixer='Global',
HW=None,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
act_layer=nn.Swish,
attn_drop=attn_drop_rate,
drop_path=drop_path,
norm_layer='nn.LayerNorm',
epsilon=1e-05,
prenorm=False) for i in range(depth)
])
self.norm = nn.LayerNorm(hidden_dims, epsilon=1e-6)
self.conv3 = ConvBNLayer(
hidden_dims, in_channels, kernel_size=1, act=nn.Swish)
# last conv-nxn, the input is concat of input tensor and conv3 output tensor
self.conv4 = ConvBNLayer(
2 * in_channels, in_channels // 8, padding=1, act=nn.Swish)
self.conv1x1 = ConvBNLayer(
in_channels // 8, dims, kernel_size=1, act=nn.Swish)
self.out_channels = dims
self.apply(self._init_weights)
def _init_weights(self, m):
if isinstance(m, nn.Linear):
trunc_normal_(m.weight)
if isinstance(m, nn.Linear) and m.bias is not None:
zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
zeros_(m.bias)
ones_(m.weight)
def forward(self, x):
# for use guide
if self.use_guide:
z = x.clone()
z.stop_gradient = True
else:
z = x
# for short cut
h = z
# reduce dim
z = self.conv1(z)
z = self.conv2(z)
# SVTR global block
B, C, H, W = z.shape
z = z.flatten(2).transpose([0, 2, 1])
for blk in self.svtr_block:
z = blk(z)
z = self.norm(z)
# last stage
z = z.reshape([0, H, W, C]).transpose([0, 3, 1, 2])
z = self.conv3(z)
z = paddle.concat((h, z), axis=1)
z = self.conv1x1(self.conv4(z))
return z
class SequenceEncoder(nn.Layer):
def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
super(SequenceEncoder, self).__init__()
self.encoder_reshape = Im2Seq(in_channels)
self.out_channels = self.encoder_reshape.out_channels
self.encoder_type = encoder_type
if encoder_type == 'reshape':
self.only_reshape = True
else:
support_encoder_dict = {
'reshape': Im2Seq,
'fc': EncoderWithFC,
'rnn': EncoderWithRNN,
'svtr': EncoderWithSVTR
}
assert encoder_type in support_encoder_dict, '{} must in {}'.format(
encoder_type, support_encoder_dict.keys())
if encoder_type == "svtr":
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, **kwargs)
else:
self.encoder = support_encoder_dict[encoder_type](
self.encoder_reshape.out_channels, hidden_size)
self.out_channels = self.encoder.out_channels
self.only_reshape = False
def forward(self, x):
if self.encoder_type != 'svtr':
x = self.encoder_reshape(x)
if not self.only_reshape:
x = self.encoder(x)
return x
else:
x = self.encoder(x)
x = self.encoder_reshape(x)
return x
# copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups=1,
if_act=True,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
self.bn = nn.BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(name="bn_" + name + "_scale"),
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
moving_mean_name="bn_" + name + "_mean",
moving_variance_name="bn_" + name + "_variance")
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class DeConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups=1,
if_act=True,
act=None,
name=None):
super(DeConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.deconv = nn.Conv2DTranspose(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + '_weights'),
bias_attr=False)
self.bn = nn.BatchNorm(
num_channels=out_channels,
act=act,
param_attr=ParamAttr(name="bn_" + name + "_scale"),
bias_attr=ParamAttr(name="bn_" + name + "_offset"),
moving_mean_name="bn_" + name + "_mean",
moving_variance_name="bn_" + name + "_variance")
def forward(self, x):
x = self.deconv(x)
x = self.bn(x)
return x
class FPN_Up_Fusion(nn.Layer):
def __init__(self, in_channels):
super(FPN_Up_Fusion, self).__init__()
in_channels = in_channels[::-1]
out_channels = [256, 256, 192, 192, 128]
self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 1, 1, act=None, name='fpn_up_h0')
self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 1, 1, act=None, name='fpn_up_h1')
self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 1, 1, act=None, name='fpn_up_h2')
self.h3_conv = ConvBNLayer(in_channels[3], out_channels[3], 1, 1, act=None, name='fpn_up_h3')
self.h4_conv = ConvBNLayer(in_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_h4')
self.g0_conv = DeConvBNLayer(out_channels[0], out_channels[1], 4, 2, act=None, name='fpn_up_g0')
self.g1_conv = nn.Sequential(
ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_up_g1_1'),
DeConvBNLayer(out_channels[1], out_channels[2], 4, 2, act=None, name='fpn_up_g1_2')
)
self.g2_conv = nn.Sequential(
ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_up_g2_1'),
DeConvBNLayer(out_channels[2], out_channels[3], 4, 2, act=None, name='fpn_up_g2_2')
)
self.g3_conv = nn.Sequential(
ConvBNLayer(out_channels[3], out_channels[3], 3, 1, act='relu', name='fpn_up_g3_1'),
DeConvBNLayer(out_channels[3], out_channels[4], 4, 2, act=None, name='fpn_up_g3_2')
)
self.g4_conv = nn.Sequential(
ConvBNLayer(out_channels[4], out_channels[4], 3, 1, act='relu', name='fpn_up_fusion_1'),
ConvBNLayer(out_channels[4], out_channels[4], 1, 1, act=None, name='fpn_up_fusion_2')
)
def _add_relu(self, x1, x2):
x = paddle.add(x=x1, y=x2)
x = F.relu(x)
return x
def forward(self, x):
f = x[2:][::-1]
h0 = self.h0_conv(f[0])
h1 = self.h1_conv(f[1])
h2 = self.h2_conv(f[2])
h3 = self.h3_conv(f[3])
h4 = self.h4_conv(f[4])
g0 = self.g0_conv(h0)
g1 = self._add_relu(g0, h1)
g1 = self.g1_conv(g1)
g2 = self.g2_conv(self._add_relu(g1, h2))
g3 = self.g3_conv(self._add_relu(g2, h3))
g4 = self.g4_conv(self._add_relu(g3, h4))
return g4
class FPN_Down_Fusion(nn.Layer):
def __init__(self, in_channels):
super(FPN_Down_Fusion, self).__init__()
out_channels = [32, 64, 128]
self.h0_conv = ConvBNLayer(in_channels[0], out_channels[0], 3, 1, act=None, name='fpn_down_h0')
self.h1_conv = ConvBNLayer(in_channels[1], out_channels[1], 3, 1, act=None, name='fpn_down_h1')
self.h2_conv = ConvBNLayer(in_channels[2], out_channels[2], 3, 1, act=None, name='fpn_down_h2')
self.g0_conv = ConvBNLayer(out_channels[0], out_channels[1], 3, 2, act=None, name='fpn_down_g0')
self.g1_conv = nn.Sequential(
ConvBNLayer(out_channels[1], out_channels[1], 3, 1, act='relu', name='fpn_down_g1_1'),
ConvBNLayer(out_channels[1], out_channels[2], 3, 2, act=None, name='fpn_down_g1_2')
)
self.g2_conv = nn.Sequential(
ConvBNLayer(out_channels[2], out_channels[2], 3, 1, act='relu', name='fpn_down_fusion_1'),
ConvBNLayer(out_channels[2], out_channels[2], 1, 1, act=None, name='fpn_down_fusion_2')
)
def forward(self, x):
f = x[:3]
h0 = self.h0_conv(f[0])
h1 = self.h1_conv(f[1])
h2 = self.h2_conv(f[2])
g0 = self.g0_conv(h0)
g1 = paddle.add(x=g0, y=h1)
g1 = F.relu(g1)
g1 = self.g1_conv(g1)
g2 = paddle.add(x=g1, y=h2)
g2 = F.relu(g2)
g2 = self.g2_conv(g2)
return g2
class Cross_Attention(nn.Layer):
def __init__(self, in_channels):
super(Cross_Attention, self).__init__()
self.theta_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_theta')
self.phi_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_phi')
self.g_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act='relu', name='f_g')
self.fh_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_weight')
self.fh_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fh_sc')
self.fv_weight_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_weight')
self.fv_sc_conv = ConvBNLayer(in_channels, in_channels, 1, 1, act=None, name='fv_sc')
self.f_attn_conv = ConvBNLayer(in_channels * 2, in_channels, 1, 1, act='relu', name='f_attn')
def _cal_fweight(self, f, shape):
f_theta, f_phi, f_g = f
#flatten
f_theta = paddle.transpose(f_theta, [0, 2, 3, 1])
f_theta = paddle.reshape(f_theta, [shape[0] * shape[1], shape[2], 128])
f_phi = paddle.transpose(f_phi, [0, 2, 3, 1])
f_phi = paddle.reshape(f_phi, [shape[0] * shape[1], shape[2], 128])
f_g = paddle.transpose(f_g, [0, 2, 3, 1])
f_g = paddle.reshape(f_g, [shape[0] * shape[1], shape[2], 128])
#correlation
f_attn = paddle.matmul(f_theta, paddle.transpose(f_phi, [0, 2, 1]))
#scale
f_attn = f_attn / (128**0.5)
f_attn = F.softmax(f_attn)
#weighted sum
f_weight = paddle.matmul(f_attn, f_g)
f_weight = paddle.reshape(
f_weight, [shape[0], shape[1], shape[2], 128])
return f_weight
def forward(self, f_common):
f_shape = paddle.shape(f_common)
# print('f_shape: ', f_shape)
f_theta = self.theta_conv(f_common)
f_phi = self.phi_conv(f_common)
f_g = self.g_conv(f_common)
######## horizon ########
fh_weight = self._cal_fweight([f_theta, f_phi, f_g],
[f_shape[0], f_shape[2], f_shape[3]])
fh_weight = paddle.transpose(fh_weight, [0, 3, 1, 2])
fh_weight = self.fh_weight_conv(fh_weight)
#short cut
fh_sc = self.fh_sc_conv(f_common)
f_h = F.relu(fh_weight + fh_sc)
######## vertical ########
fv_theta = paddle.transpose(f_theta, [0, 1, 3, 2])
fv_phi = paddle.transpose(f_phi, [0, 1, 3, 2])
fv_g = paddle.transpose(f_g, [0, 1, 3, 2])
fv_weight = self._cal_fweight([fv_theta, fv_phi, fv_g],
[f_shape[0], f_shape[3], f_shape[2]])
fv_weight = paddle.transpose(fv_weight, [0, 3, 2, 1])
fv_weight = self.fv_weight_conv(fv_weight)
#short cut
fv_sc = self.fv_sc_conv(f_common)
f_v = F.relu(fv_weight + fv_sc)
######## merge ########
f_attn = paddle.concat([f_h, f_v], axis=1)
f_attn = self.f_attn_conv(f_attn)
return f_attn
class SASTFPN(nn.Layer):
def __init__(self, in_channels, with_cab=False, **kwargs):
super(SASTFPN, self).__init__()
self.in_channels = in_channels
self.with_cab = with_cab
self.FPN_Down_Fusion = FPN_Down_Fusion(self.in_channels)
self.FPN_Up_Fusion = FPN_Up_Fusion(self.in_channels)
self.out_channels = 128
self.cross_attention = Cross_Attention(self.out_channels)
def forward(self, x):
#down fpn
f_down = self.FPN_Down_Fusion(x)
#up fpn
f_up = self.FPN_Up_Fusion(x)
#fusion
f_common = paddle.add(x=f_down, y=f_up)
f_common = F.relu(f_common)
if self.with_cab:
# print('enhence f_common with CAB.')
f_common = self.cross_attention(f_common)
return f_common
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
class TableFPN(nn.Layer):
def __init__(self, in_channels, out_channels, **kwargs):
super(TableFPN, self).__init__()
self.out_channels = 512
weight_attr = paddle.nn.initializer.KaimingUniform()
self.in2_conv = nn.Conv2D(
in_channels=in_channels[0],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in3_conv = nn.Conv2D(
in_channels=in_channels[1],
out_channels=self.out_channels,
kernel_size=1,
stride = 1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in4_conv = nn.Conv2D(
in_channels=in_channels[2],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.in5_conv = nn.Conv2D(
in_channels=in_channels[3],
out_channels=self.out_channels,
kernel_size=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p5_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p4_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p3_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.p2_conv = nn.Conv2D(
in_channels=self.out_channels,
out_channels=self.out_channels // 4,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr),
bias_attr=False)
self.fuse_conv = nn.Conv2D(
in_channels=self.out_channels * 4,
out_channels=512,
kernel_size=3,
padding=1,
weight_attr=ParamAttr(initializer=weight_attr), bias_attr=False)
def forward(self, x):
c2, c3, c4, c5 = x
in5 = self.in5_conv(c5)
in4 = self.in4_conv(c4)
in3 = self.in3_conv(c3)
in2 = self.in2_conv(c2)
out4 = in4 + F.upsample(
in5, size=in4.shape[2:4], mode="nearest", align_mode=1) # 1/16
out3 = in3 + F.upsample(
out4, size=in3.shape[2:4], mode="nearest", align_mode=1) # 1/8
out2 = in2 + F.upsample(
out3, size=in2.shape[2:4], mode="nearest", align_mode=1) # 1/4
p4 = F.upsample(out4, size=in5.shape[2:4], mode="nearest", align_mode=1)
p3 = F.upsample(out3, size=in5.shape[2:4], mode="nearest", align_mode=1)
p2 = F.upsample(out2, size=in5.shape[2:4], mode="nearest", align_mode=1)
fuse = paddle.concat([in5, p4, p3, p2], axis=1)
fuse_conv = self.fuse_conv(fuse) * 0.005
return [c5 + fuse_conv]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
__all__ = ['build_transform']
def build_transform(config):
from .tps import TPS
from .stn import STN_ON
support_dict = ['TPS', 'STN_ON']
module_name = config.pop('name')
assert module_name in support_dict, Exception(
'transform only support {}'.format(support_dict))
module_class = eval(module_name)(**config)
return module_class
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/stn_head.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
from paddle import nn, ParamAttr
from paddle.nn import functional as F
import numpy as np
from .tps_spatial_transformer import TPSSpatialTransformer
def conv3x3_block(in_channels, out_channels, stride=1):
n = 3 * 3 * out_channels
w = math.sqrt(2. / n)
conv_layer = nn.Conv2D(
in_channels,
out_channels,
kernel_size=3,
stride=stride,
padding=1,
weight_attr=nn.initializer.Normal(
mean=0.0, std=w),
bias_attr=nn.initializer.Constant(0))
block = nn.Sequential(conv_layer, nn.BatchNorm2D(out_channels), nn.ReLU())
return block
class STN(nn.Layer):
def __init__(self, in_channels, num_ctrlpoints, activation='none'):
super(STN, self).__init__()
self.in_channels = in_channels
self.num_ctrlpoints = num_ctrlpoints
self.activation = activation
self.stn_convnet = nn.Sequential(
conv3x3_block(in_channels, 32), #32x64
nn.MaxPool2D(
kernel_size=2, stride=2),
conv3x3_block(32, 64), #16x32
nn.MaxPool2D(
kernel_size=2, stride=2),
conv3x3_block(64, 128), # 8*16
nn.MaxPool2D(
kernel_size=2, stride=2),
conv3x3_block(128, 256), # 4*8
nn.MaxPool2D(
kernel_size=2, stride=2),
conv3x3_block(256, 256), # 2*4,
nn.MaxPool2D(
kernel_size=2, stride=2),
conv3x3_block(256, 256)) # 1*2
self.stn_fc1 = nn.Sequential(
nn.Linear(
2 * 256,
512,
weight_attr=nn.initializer.Normal(0, 0.001),
bias_attr=nn.initializer.Constant(0)),
nn.BatchNorm1D(512),
nn.ReLU())
fc2_bias = self.init_stn()
self.stn_fc2 = nn.Linear(
512,
num_ctrlpoints * 2,
weight_attr=nn.initializer.Constant(0.0),
bias_attr=nn.initializer.Assign(fc2_bias))
def init_stn(self):
margin = 0.01
sampling_num_per_side = int(self.num_ctrlpoints / 2)
ctrl_pts_x = np.linspace(margin, 1. - margin, sampling_num_per_side)
ctrl_pts_y_top = np.ones(sampling_num_per_side) * margin
ctrl_pts_y_bottom = np.ones(sampling_num_per_side) * (1 - margin)
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
ctrl_points = np.concatenate(
[ctrl_pts_top, ctrl_pts_bottom], axis=0).astype(np.float32)
if self.activation == 'none':
pass
elif self.activation == 'sigmoid':
ctrl_points = -np.log(1. / ctrl_points - 1.)
ctrl_points = paddle.to_tensor(ctrl_points)
fc2_bias = paddle.reshape(
ctrl_points, shape=[ctrl_points.shape[0] * ctrl_points.shape[1]])
return fc2_bias
def forward(self, x):
x = self.stn_convnet(x)
batch_size, _, h, w = x.shape
x = paddle.reshape(x, shape=(batch_size, -1))
img_feat = self.stn_fc1(x)
x = self.stn_fc2(0.1 * img_feat)
if self.activation == 'sigmoid':
x = F.sigmoid(x)
x = paddle.reshape(x, shape=[-1, self.num_ctrlpoints, 2])
return img_feat, x
class STN_ON(nn.Layer):
def __init__(self, in_channels, tps_inputsize, tps_outputsize,
num_control_points, tps_margins, stn_activation):
super(STN_ON, self).__init__()
self.tps = TPSSpatialTransformer(
output_image_size=tuple(tps_outputsize),
num_control_points=num_control_points,
margins=tuple(tps_margins))
self.stn_head = STN(in_channels=in_channels,
num_ctrlpoints=num_control_points,
activation=stn_activation)
self.tps_inputsize = tps_inputsize
self.out_channels = in_channels
def forward(self, image):
stn_input = paddle.nn.functional.interpolate(
image, self.tps_inputsize, mode="bilinear", align_corners=True)
stn_img_feat, ctrl_points = self.stn_head(stn_input)
x, _ = self.tps(image, ctrl_points)
return x
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/clovaai/deep-text-recognition-benchmark/blob/master/modules/transformation.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
from paddle import nn, ParamAttr
from paddle.nn import functional as F
import numpy as np
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
bn_name = "bn_" + name
self.bn = nn.BatchNorm(
out_channels,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
return x
class LocalizationNetwork(nn.Layer):
def __init__(self, in_channels, num_fiducial, loc_lr, model_name):
super(LocalizationNetwork, self).__init__()
self.F = num_fiducial
F = num_fiducial
if model_name == "large":
num_filters_list = [64, 128, 256, 512]
fc_dim = 256
else:
num_filters_list = [16, 32, 64, 128]
fc_dim = 64
self.block_list = []
for fno in range(0, len(num_filters_list)):
num_filters = num_filters_list[fno]
name = "loc_conv%d" % fno
conv = self.add_sublayer(
name,
ConvBNLayer(
in_channels=in_channels,
out_channels=num_filters,
kernel_size=3,
act='relu',
name=name))
self.block_list.append(conv)
if fno == len(num_filters_list) - 1:
pool = nn.AdaptiveAvgPool2D(1)
else:
pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
in_channels = num_filters
self.block_list.append(pool)
name = "loc_fc1"
stdv = 1.0 / math.sqrt(num_filters_list[-1] * 1.0)
self.fc1 = nn.Linear(
in_channels,
fc_dim,
weight_attr=ParamAttr(
learning_rate=loc_lr,
name=name + "_w",
initializer=nn.initializer.Uniform(-stdv, stdv)),
bias_attr=ParamAttr(name=name + '.b_0'),
name=name)
# Init fc2 in LocalizationNetwork
initial_bias = self.get_initial_fiducials()
initial_bias = initial_bias.reshape(-1)
name = "loc_fc2"
param_attr = ParamAttr(
learning_rate=loc_lr,
initializer=nn.initializer.Assign(np.zeros([fc_dim, F * 2])),
name=name + "_w")
bias_attr = ParamAttr(
learning_rate=loc_lr,
initializer=nn.initializer.Assign(initial_bias),
name=name + "_b")
self.fc2 = nn.Linear(
fc_dim,
F * 2,
weight_attr=param_attr,
bias_attr=bias_attr,
name=name)
self.out_channels = F * 2
def forward(self, x):
"""
Estimating parameters of geometric transformation
Args:
image: input
Return:
batch_C_prime: the matrix of the geometric transformation
"""
B = x.shape[0]
i = 0
for block in self.block_list:
x = block(x)
x = x.squeeze(axis=2).squeeze(axis=2)
x = self.fc1(x)
x = F.relu(x)
x = self.fc2(x)
x = x.reshape(shape=[-1, self.F, 2])
return x
def get_initial_fiducials(self):
""" see RARE paper Fig. 6 (a) """
F = self.F
ctrl_pts_x = np.linspace(-1.0, 1.0, int(F / 2))
ctrl_pts_y_top = np.linspace(0.0, -1.0, num=int(F / 2))
ctrl_pts_y_bottom = np.linspace(1.0, 0.0, num=int(F / 2))
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
initial_bias = np.concatenate([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return initial_bias
class GridGenerator(nn.Layer):
def __init__(self, in_channels, num_fiducial):
super(GridGenerator, self).__init__()
self.eps = 1e-6
self.F = num_fiducial
name = "ex_fc"
initializer = nn.initializer.Constant(value=0.0)
param_attr = ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_w")
bias_attr = ParamAttr(
learning_rate=0.0, initializer=initializer, name=name + "_b")
self.fc = nn.Linear(
in_channels,
6,
weight_attr=param_attr,
bias_attr=bias_attr,
name=name)
def forward(self, batch_C_prime, I_r_size):
"""
Generate the grid for the grid_sampler.
Args:
batch_C_prime: the matrix of the geometric transformation
I_r_size: the shape of the input image
Return:
batch_P_prime: the grid for the grid_sampler
"""
C = self.build_C_paddle()
P = self.build_P_paddle(I_r_size)
inv_delta_C_tensor = self.build_inv_delta_C_paddle(C).astype('float32')
P_hat_tensor = self.build_P_hat_paddle(
C, paddle.to_tensor(P)).astype('float32')
inv_delta_C_tensor.stop_gradient = True
P_hat_tensor.stop_gradient = True
batch_C_ex_part_tensor = self.get_expand_tensor(batch_C_prime)
batch_C_ex_part_tensor.stop_gradient = True
batch_C_prime_with_zeros = paddle.concat(
[batch_C_prime, batch_C_ex_part_tensor], axis=1)
batch_T = paddle.matmul(inv_delta_C_tensor, batch_C_prime_with_zeros)
batch_P_prime = paddle.matmul(P_hat_tensor, batch_T)
return batch_P_prime
def build_C_paddle(self):
""" Return coordinates of fiducial points in I_r; C """
F = self.F
ctrl_pts_x = paddle.linspace(-1.0, 1.0, int(F / 2), dtype='float64')
ctrl_pts_y_top = -1 * paddle.ones([int(F / 2)], dtype='float64')
ctrl_pts_y_bottom = paddle.ones([int(F / 2)], dtype='float64')
ctrl_pts_top = paddle.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = paddle.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
C = paddle.concat([ctrl_pts_top, ctrl_pts_bottom], axis=0)
return C # F x 2
def build_P_paddle(self, I_r_size):
I_r_height, I_r_width = I_r_size
I_r_grid_x = (paddle.arange(
-I_r_width, I_r_width, 2, dtype='float64') + 1.0
) / paddle.to_tensor(np.array([I_r_width]))
I_r_grid_y = (paddle.arange(
-I_r_height, I_r_height, 2, dtype='float64') + 1.0
) / paddle.to_tensor(np.array([I_r_height]))
# P: self.I_r_width x self.I_r_height x 2
P = paddle.stack(paddle.meshgrid(I_r_grid_x, I_r_grid_y), axis=2)
P = paddle.transpose(P, perm=[1, 0, 2])
# n (= self.I_r_width x self.I_r_height) x 2
return P.reshape([-1, 2])
def build_inv_delta_C_paddle(self, C):
""" Return inv_delta_C which is needed to calculate T """
F = self.F
hat_eye = paddle.eye(F, dtype='float64') # F x F
hat_C = paddle.norm(
C.reshape([1, F, 2]) - C.reshape([F, 1, 2]), axis=2) + hat_eye
hat_C = (hat_C**2) * paddle.log(hat_C)
delta_C = paddle.concat( # F+3 x F+3
[
paddle.concat(
[paddle.ones(
(F, 1), dtype='float64'), C, hat_C], axis=1), # F x F+3
paddle.concat(
[
paddle.zeros(
(2, 3), dtype='float64'), paddle.transpose(
C, perm=[1, 0])
],
axis=1), # 2 x F+3
paddle.concat(
[
paddle.zeros(
(1, 3), dtype='float64'), paddle.ones(
(1, F), dtype='float64')
],
axis=1) # 1 x F+3
],
axis=0)
inv_delta_C = paddle.inverse(delta_C)
return inv_delta_C # F+3 x F+3
def build_P_hat_paddle(self, C, P):
F = self.F
eps = self.eps
n = P.shape[0] # n (= self.I_r_width x self.I_r_height)
# P_tile: n x 2 -> n x 1 x 2 -> n x F x 2
P_tile = paddle.tile(paddle.unsqueeze(P, axis=1), (1, F, 1))
C_tile = paddle.unsqueeze(C, axis=0) # 1 x F x 2
P_diff = P_tile - C_tile # n x F x 2
# rbf_norm: n x F
rbf_norm = paddle.norm(P_diff, p=2, axis=2, keepdim=False)
# rbf: n x F
rbf = paddle.multiply(
paddle.square(rbf_norm), paddle.log(rbf_norm + eps))
P_hat = paddle.concat(
[paddle.ones(
(n, 1), dtype='float64'), P, rbf], axis=1)
return P_hat # n x F+3
def get_expand_tensor(self, batch_C_prime):
B, H, C = batch_C_prime.shape
batch_C_prime = batch_C_prime.reshape([B, H * C])
batch_C_ex_part_tensor = self.fc(batch_C_prime)
batch_C_ex_part_tensor = batch_C_ex_part_tensor.reshape([-1, 3, 2])
return batch_C_ex_part_tensor
class TPS(nn.Layer):
def __init__(self, in_channels, num_fiducial, loc_lr, model_name):
super(TPS, self).__init__()
self.loc_net = LocalizationNetwork(in_channels, num_fiducial, loc_lr,
model_name)
self.grid_generator = GridGenerator(self.loc_net.out_channels,
num_fiducial)
self.out_channels = in_channels
def forward(self, image):
image.stop_gradient = False
batch_C_prime = self.loc_net(image)
batch_P_prime = self.grid_generator(batch_C_prime, image.shape[2:])
batch_P_prime = batch_P_prime.reshape(
[-1, image.shape[2], image.shape[3], 2])
batch_I_r = F.grid_sample(x=image, grid=batch_P_prime)
return batch_I_r
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/ayumiymk/aster.pytorch/blob/master/lib/models/tps_spatial_transformer.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import paddle
from paddle import nn, ParamAttr
from paddle.nn import functional as F
import numpy as np
import itertools
def grid_sample(input, grid, canvas=None):
input.stop_gradient = False
output = F.grid_sample(input, grid)
if canvas is None:
return output
else:
input_mask = paddle.ones(shape=input.shape)
output_mask = F.grid_sample(input_mask, grid)
padded_output = output * output_mask + canvas * (1 - output_mask)
return padded_output
# phi(x1, x2) = r^2 * log(r), where r = ||x1 - x2||_2
def compute_partial_repr(input_points, control_points):
N = input_points.shape[0]
M = control_points.shape[0]
pairwise_diff = paddle.reshape(
input_points, shape=[N, 1, 2]) - paddle.reshape(
control_points, shape=[1, M, 2])
# original implementation, very slow
# pairwise_dist = torch.sum(pairwise_diff ** 2, dim = 2) # square of distance
pairwise_diff_square = pairwise_diff * pairwise_diff
pairwise_dist = pairwise_diff_square[:, :, 0] + pairwise_diff_square[:, :,
1]
repr_matrix = 0.5 * pairwise_dist * paddle.log(pairwise_dist)
# fix numerical error for 0 * log(0), substitute all nan with 0
mask = np.array(repr_matrix != repr_matrix)
repr_matrix[mask] = 0
return repr_matrix
# output_ctrl_pts are specified, according to our task.
def build_output_control_points(num_control_points, margins):
margin_x, margin_y = margins
num_ctrl_pts_per_side = num_control_points // 2
ctrl_pts_x = np.linspace(margin_x, 1.0 - margin_x, num_ctrl_pts_per_side)
ctrl_pts_y_top = np.ones(num_ctrl_pts_per_side) * margin_y
ctrl_pts_y_bottom = np.ones(num_ctrl_pts_per_side) * (1.0 - margin_y)
ctrl_pts_top = np.stack([ctrl_pts_x, ctrl_pts_y_top], axis=1)
ctrl_pts_bottom = np.stack([ctrl_pts_x, ctrl_pts_y_bottom], axis=1)
output_ctrl_pts_arr = np.concatenate(
[ctrl_pts_top, ctrl_pts_bottom], axis=0)
output_ctrl_pts = paddle.to_tensor(output_ctrl_pts_arr)
return output_ctrl_pts
class TPSSpatialTransformer(nn.Layer):
def __init__(self,
output_image_size=None,
num_control_points=None,
margins=None):
super(TPSSpatialTransformer, self).__init__()
self.output_image_size = output_image_size
self.num_control_points = num_control_points
self.margins = margins
self.target_height, self.target_width = output_image_size
target_control_points = build_output_control_points(num_control_points,
margins)
N = num_control_points
# create padded kernel matrix
forward_kernel = paddle.zeros(shape=[N + 3, N + 3])
target_control_partial_repr = compute_partial_repr(
target_control_points, target_control_points)
target_control_partial_repr = paddle.cast(target_control_partial_repr,
forward_kernel.dtype)
forward_kernel[:N, :N] = target_control_partial_repr
forward_kernel[:N, -3] = 1
forward_kernel[-3, :N] = 1
target_control_points = paddle.cast(target_control_points,
forward_kernel.dtype)
forward_kernel[:N, -2:] = target_control_points
forward_kernel[-2:, :N] = paddle.transpose(
target_control_points, perm=[1, 0])
# compute inverse matrix
inverse_kernel = paddle.inverse(forward_kernel)
# create target cordinate matrix
HW = self.target_height * self.target_width
target_coordinate = list(
itertools.product(
range(self.target_height), range(self.target_width)))
target_coordinate = paddle.to_tensor(target_coordinate) # HW x 2
Y, X = paddle.split(
target_coordinate, target_coordinate.shape[1], axis=1)
Y = Y / (self.target_height - 1)
X = X / (self.target_width - 1)
target_coordinate = paddle.concat(
[X, Y], axis=1) # convert from (y, x) to (x, y)
target_coordinate_partial_repr = compute_partial_repr(
target_coordinate, target_control_points)
target_coordinate_repr = paddle.concat(
[
target_coordinate_partial_repr, paddle.ones(shape=[HW, 1]),
target_coordinate
],
axis=1)
# register precomputed matrices
self.inverse_kernel = inverse_kernel
self.padding_matrix = paddle.zeros(shape=[3, 2])
self.target_coordinate_repr = target_coordinate_repr
self.target_control_points = target_control_points
def forward(self, input, source_control_points):
assert source_control_points.ndimension() == 3
assert source_control_points.shape[1] == self.num_control_points
assert source_control_points.shape[2] == 2
batch_size = paddle.shape(source_control_points)[0]
padding_matrix = paddle.expand(
self.padding_matrix, shape=[batch_size, 3, 2])
Y = paddle.concat([source_control_points, padding_matrix], 1)
mapping_matrix = paddle.matmul(self.inverse_kernel, Y)
source_coordinate = paddle.matmul(self.target_coordinate_repr,
mapping_matrix)
grid = paddle.reshape(
source_coordinate,
shape=[-1, self.target_height, self.target_width, 2])
grid = paddle.clip(grid, 0,
1) # the source_control_points may be out of [0, 1].
# the input to grid_sample is normalized [-1, 1], but what we get is [0, 1]
grid = 2.0 * grid - 1.0
output_maps = grid_sample(input, grid, canvas=None)
return output_maps, source_coordinate
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
import paddle
__all__ = ['build_optimizer']
def build_lr_scheduler(lr_config, epochs, step_each_epoch):
from . import learning_rate
lr_config.update({'epochs': epochs, 'step_each_epoch': step_each_epoch})
lr_name = lr_config.pop('name', 'Const')
lr = getattr(learning_rate, lr_name)(**lr_config)()
return lr
def build_optimizer(config, epochs, step_each_epoch, model):
from . import regularizer, optimizer
config = copy.deepcopy(config)
# step1 build lr
lr = build_lr_scheduler(config.pop('lr'), epochs, step_each_epoch)
# step2 build regularization
if 'regularizer' in config and config['regularizer'] is not None:
reg_config = config.pop('regularizer')
reg_name = reg_config.pop('name')
if not hasattr(regularizer, reg_name):
reg_name += 'Decay'
reg = getattr(regularizer, reg_name)(**reg_config)()
elif 'weight_decay' in config:
reg = config.pop('weight_decay')
else:
reg = None
# step3 build optimizer
optim_name = config.pop('name')
if 'clip_norm' in config:
clip_norm = config.pop('clip_norm')
grad_clip = paddle.nn.ClipGradByNorm(clip_norm=clip_norm)
else:
grad_clip = None
optim = getattr(optimizer, optim_name)(learning_rate=lr,
weight_decay=reg,
grad_clip=grad_clip,
**config)
return optim(model), lr
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment