Commit f1506916 authored by sugon_cxj's avatar sugon_cxj
Browse files

first commit

parent 55c28ed5
Pipeline #266 canceled with stages
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import paddle.nn.functional as F
from paddle import ParamAttr
__all__ = ['MobileNetV3']
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class MobileNetV3(nn.Layer):
def __init__(self,
in_channels=3,
model_name='large',
scale=0.5,
disable_se=False,
**kwargs):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
super(MobileNetV3, self).__init__()
self.disable_se = disable_se
if model_name == "large":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', 2],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', 2],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hardswish', 2],
[3, 200, 80, False, 'hardswish', 1],
[3, 184, 80, False, 'hardswish', 1],
[3, 184, 80, False, 'hardswish', 1],
[3, 480, 112, True, 'hardswish', 1],
[3, 672, 112, True, 'hardswish', 1],
[5, 672, 160, True, 'hardswish', 2],
[5, 960, 160, True, 'hardswish', 1],
[5, 960, 160, True, 'hardswish', 1],
]
cls_ch_squeeze = 960
elif model_name == "small":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2],
[3, 72, 24, False, 'relu', 2],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hardswish', 2],
[5, 240, 40, True, 'hardswish', 1],
[5, 240, 40, True, 'hardswish', 1],
[5, 120, 48, True, 'hardswish', 1],
[5, 144, 48, True, 'hardswish', 1],
[5, 288, 96, True, 'hardswish', 2],
[5, 576, 96, True, 'hardswish', 1],
[5, 576, 96, True, 'hardswish', 1],
]
cls_ch_squeeze = 576
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert scale in supported_scale, \
"supported scale are {} but input scale is {}".format(supported_scale, scale)
inplanes = 16
# conv1
self.conv = ConvBNLayer(
in_channels=in_channels,
out_channels=make_divisible(inplanes * scale),
kernel_size=3,
stride=2,
padding=1,
groups=1,
if_act=True,
act='hardswish')
self.stages = []
self.out_channels = []
block_list = []
i = 0
inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in cfg:
se = se and not self.disable_se
start_idx = 2 if model_name == 'large' else 0
if s == 2 and i > start_idx:
self.out_channels.append(inplanes)
self.stages.append(nn.Sequential(*block_list))
block_list = []
block_list.append(
ResidualUnit(
in_channels=inplanes,
mid_channels=make_divisible(scale * exp),
out_channels=make_divisible(scale * c),
kernel_size=k,
stride=s,
use_se=se,
act=nl))
inplanes = make_divisible(scale * c)
i += 1
block_list.append(
ConvBNLayer(
in_channels=inplanes,
out_channels=make_divisible(scale * cls_ch_squeeze),
kernel_size=1,
stride=1,
padding=0,
groups=1,
if_act=True,
act='hardswish'))
self.stages.append(nn.Sequential(*block_list))
self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
for i, stage in enumerate(self.stages):
self.add_sublayer(sublayer=stage, name="stage{}".format(i))
def forward(self, x):
x = self.conv(x)
out_list = []
for stage in self.stages:
x = stage(x)
out_list.append(x)
return out_list
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
if_act=True,
act=None):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias_attr=False)
self.bn = nn.BatchNorm(num_channels=out_channels, act=None)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
if self.act == "relu":
x = F.relu(x)
elif self.act == "hardswish":
x = F.hardswish(x)
else:
print("The activation function({}) is selected incorrectly.".
format(self.act))
exit()
return x
class ResidualUnit(nn.Layer):
def __init__(self,
in_channels,
mid_channels,
out_channels,
kernel_size,
stride,
use_se,
act=None):
super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_channels == out_channels
self.if_se = use_se
self.expand_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=1,
stride=1,
padding=0,
if_act=True,
act=act)
self.bottleneck_conv = ConvBNLayer(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=kernel_size,
stride=stride,
padding=int((kernel_size - 1) // 2),
groups=mid_channels,
if_act=True,
act=act)
if self.if_se:
self.mid_se = SEModule(mid_channels)
self.linear_conv = ConvBNLayer(
in_channels=mid_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
if_act=False,
act=None)
def forward(self, inputs):
x = self.expand_conv(inputs)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = paddle.add(inputs, x)
return x
class SEModule(nn.Layer):
def __init__(self, in_channels, reduction=4):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2D(1)
self.conv1 = nn.Conv2D(
in_channels=in_channels,
out_channels=in_channels // reduction,
kernel_size=1,
stride=1,
padding=0)
self.conv2 = nn.Conv2D(
in_channels=in_channels // reduction,
out_channels=in_channels,
kernel_size=1,
stride=1,
padding=0)
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = F.hardsigmoid(outputs, slope=0.2, offset=0.5)
return inputs * outputs
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.vision.ops import DeformConv2D
from paddle.regularizer import L2Decay
from paddle.nn.initializer import Normal, Constant, XavierUniform
__all__ = ["ResNet"]
class DeformableConvV2(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
padding=0,
dilation=1,
groups=1,
weight_attr=None,
bias_attr=None,
lr_scale=1,
regularizer=None,
skip_quant=False,
dcn_bias_regularizer=L2Decay(0.),
dcn_bias_lr_scale=2.):
super(DeformableConvV2, self).__init__()
self.offset_channel = 2 * kernel_size**2 * groups
self.mask_channel = kernel_size**2 * groups
if bias_attr:
# in FCOS-DCN head, specifically need learning_rate and regularizer
dcn_bias_attr = ParamAttr(
initializer=Constant(value=0),
regularizer=dcn_bias_regularizer,
learning_rate=dcn_bias_lr_scale)
else:
# in ResNet backbone, do not need bias
dcn_bias_attr = False
self.conv_dcn = DeformConv2D(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2 * dilation,
dilation=dilation,
deformable_groups=groups,
weight_attr=weight_attr,
bias_attr=dcn_bias_attr)
if lr_scale == 1 and regularizer is None:
offset_bias_attr = ParamAttr(initializer=Constant(0.))
else:
offset_bias_attr = ParamAttr(
initializer=Constant(0.),
learning_rate=lr_scale,
regularizer=regularizer)
self.conv_offset = nn.Conv2D(
in_channels,
groups * 3 * kernel_size**2,
kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
weight_attr=ParamAttr(initializer=Constant(0.0)),
bias_attr=offset_bias_attr)
if skip_quant:
self.conv_offset.skip_quant = True
def forward(self, x):
offset_mask = self.conv_offset(x)
offset, mask = paddle.split(
offset_mask,
num_or_sections=[self.offset_channel, self.mask_channel],
axis=1)
mask = F.sigmoid(mask)
y = self.conv_dcn(x, offset, mask=mask)
return y
class ConvBNLayer(nn.Layer):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
is_dcn=False):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
if not is_dcn:
self._conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias_attr=False)
else:
self._conv = DeformableConvV2(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=2, #groups,
bias_attr=False)
self._batch_norm = nn.BatchNorm(out_channels, act=act)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
is_dcn=False, ):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu')
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
is_dcn=is_dcn)
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False, ):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu')
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None)
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True)
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv1)
y = F.relu(y)
return y
class ResNet(nn.Layer):
def __init__(self,
in_channels=3,
layers=50,
dcn_stage=None,
out_indices=None,
**kwargs):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.dcn_stage = dcn_stage if dcn_stage is not None else [
False, False, False, False
]
self.out_indices = out_indices if out_indices is not None else [
0, 1, 2, 3
]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,
act='relu')
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu')
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu')
self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
self.stages = []
self.out_channels = []
if layers >= 50:
for block in range(len(depth)):
block_list = []
shortcut = False
is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
is_dcn=is_dcn))
shortcut = True
block_list.append(bottleneck_block)
if block in self.out_indices:
self.out_channels.append(num_filters[block] * 4)
self.stages.append(nn.Sequential(*block_list))
else:
for block in range(len(depth)):
block_list = []
shortcut = False
# is_dcn = self.dcn_stage[block]
for i in range(depth[block]):
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0))
shortcut = True
block_list.append(basic_block)
if block in self.out_indices:
self.out_channels.append(num_filters[block])
self.stages.append(nn.Sequential(*block_list))
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
out = []
for i, block in enumerate(self.stages):
y = block(y)
if i in self.out_indices:
out.append(y)
return out
# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
__all__ = ["ResNet_SAST"]
class ConvBNLayer(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm(
out_channels,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv1)
y = F.relu(y)
return y
class ResNet_SAST(nn.Layer):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet_SAST, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
depth = [3, 4, 6, 3, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
# num_channels = [64, 256, 512,
# 1024] if layers >= 50 else [64, 64, 128, 256]
# num_filters = [64, 128, 256, 512]
num_channels = [64, 256, 512,
1024, 2048] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=2,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
self.stages = []
self.out_channels = [3, 64]
if layers >= 50:
for block in range(len(depth)):
block_list = []
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
shortcut = True
block_list.append(bottleneck_block)
self.out_channels.append(num_filters[block] * 4)
self.stages.append(nn.Sequential(*block_list))
else:
for block in range(len(depth)):
block_list = []
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
shortcut = True
block_list.append(basic_block)
self.out_channels.append(num_filters[block])
self.stages.append(nn.Sequential(*block_list))
def forward(self, inputs):
out = [inputs]
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
out.append(y)
y = self.pool2d_max(y)
for block in self.stages:
y = block(y)
out.append(y)
return out
\ No newline at end of file
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import ParamAttr
import paddle.nn as nn
import paddle.nn.functional as F
__all__ = ["ResNet"]
class ConvBNLayer(nn.Layer):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2D(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self._conv = nn.Conv2D(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
weight_attr=ParamAttr(name=name + "_weights"),
bias_attr=False)
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self._batch_norm = nn.BatchNorm(
out_channels,
act=act,
param_attr=ParamAttr(name=bn_name + '_scale'),
bias_attr=ParamAttr(bn_name + '_offset'),
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=stride,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv2)
y = F.relu(y)
return y
class BasicBlock(nn.Layer):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
is_vd_mode=False if if_first else True,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = paddle.add(x=short, y=conv1)
y = F.relu(y)
return y
class ResNet(nn.Layer):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
# depth = [3, 4, 6, 3]
depth = [3, 4, 6, 3, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512, 1024,
2048] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=64,
kernel_size=7,
stride=2,
act='relu',
name="conv1_1")
self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
self.stages = []
self.out_channels = [3, 64]
# num_filters = [64, 128, 256, 512, 512]
if layers >= 50:
for block in range(len(depth)):
block_list = []
shortcut = False
for i in range(depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
shortcut = True
block_list.append(bottleneck_block)
self.out_channels.append(num_filters[block] * 4)
self.stages.append(nn.Sequential(*block_list))
else:
for block in range(len(depth)):
block_list = []
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
basic_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BasicBlock(
in_channels=num_channels[block]
if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name))
shortcut = True
block_list.append(basic_block)
self.out_channels.append(num_filters[block])
self.stages.append(nn.Sequential(*block_list))
def forward(self, inputs):
out = [inputs]
y = self.conv1_1(inputs)
out.append(y)
y = self.pool2d_max(y)
for block in self.stages:
y = block(y)
out.append(y)
return out
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
from paddle import nn
import numpy as np
import cv2
__all__ = ["Kie_backbone"]
class Encoder(nn.Layer):
def __init__(self, num_channels, num_filters):
super(Encoder, self).__init__()
self.conv1 = nn.Conv2D(
num_channels,
num_filters,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
self.bn1 = nn.BatchNorm(num_filters, act='relu')
self.conv2 = nn.Conv2D(
num_filters,
num_filters,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
self.bn2 = nn.BatchNorm(num_filters, act='relu')
self.pool = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
def forward(self, inputs):
x = self.conv1(inputs)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
x_pooled = self.pool(x)
return x, x_pooled
class Decoder(nn.Layer):
def __init__(self, num_channels, num_filters):
super(Decoder, self).__init__()
self.conv1 = nn.Conv2D(
num_channels,
num_filters,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
self.bn1 = nn.BatchNorm(num_filters, act='relu')
self.conv2 = nn.Conv2D(
num_filters,
num_filters,
kernel_size=3,
stride=1,
padding=1,
bias_attr=False)
self.bn2 = nn.BatchNorm(num_filters, act='relu')
self.conv0 = nn.Conv2D(
num_channels,
num_filters,
kernel_size=1,
stride=1,
padding=0,
bias_attr=False)
self.bn0 = nn.BatchNorm(num_filters, act='relu')
def forward(self, inputs_prev, inputs):
x = self.conv0(inputs)
x = self.bn0(x)
x = paddle.nn.functional.interpolate(
x, scale_factor=2, mode='bilinear', align_corners=False)
x = paddle.concat([inputs_prev, x], axis=1)
x = self.conv1(x)
x = self.bn1(x)
x = self.conv2(x)
x = self.bn2(x)
return x
class UNet(nn.Layer):
def __init__(self):
super(UNet, self).__init__()
self.down1 = Encoder(num_channels=3, num_filters=16)
self.down2 = Encoder(num_channels=16, num_filters=32)
self.down3 = Encoder(num_channels=32, num_filters=64)
self.down4 = Encoder(num_channels=64, num_filters=128)
self.down5 = Encoder(num_channels=128, num_filters=256)
self.up1 = Decoder(32, 16)
self.up2 = Decoder(64, 32)
self.up3 = Decoder(128, 64)
self.up4 = Decoder(256, 128)
self.out_channels = 16
def forward(self, inputs):
x1, _ = self.down1(inputs)
_, x2 = self.down2(x1)
_, x3 = self.down3(x2)
_, x4 = self.down4(x3)
_, x5 = self.down5(x4)
x = self.up4(x4, x5)
x = self.up3(x3, x)
x = self.up2(x2, x)
x = self.up1(x1, x)
return x
class Kie_backbone(nn.Layer):
def __init__(self, in_channels, **kwargs):
super(Kie_backbone, self).__init__()
self.out_channels = 16
self.img_feat = UNet()
self.maxpool = nn.MaxPool2D(kernel_size=7)
def bbox2roi(self, bbox_list):
rois_list = []
rois_num = []
for img_id, bboxes in enumerate(bbox_list):
rois_num.append(bboxes.shape[0])
rois_list.append(bboxes)
rois = paddle.concat(rois_list, 0)
rois_num = paddle.to_tensor(rois_num, dtype='int32')
return rois, rois_num
def pre_process(self, img, relations, texts, gt_bboxes, tag, img_size):
img, relations, texts, gt_bboxes, tag, img_size = img.numpy(
), relations.numpy(), texts.numpy(), gt_bboxes.numpy(), tag.numpy(
).tolist(), img_size.numpy()
temp_relations, temp_texts, temp_gt_bboxes = [], [], []
h, w = int(np.max(img_size[:, 0])), int(np.max(img_size[:, 1]))
img = paddle.to_tensor(img[:, :, :h, :w])
batch = len(tag)
for i in range(batch):
num, recoder_len = tag[i][0], tag[i][1]
temp_relations.append(
paddle.to_tensor(
relations[i, :num, :num, :], dtype='float32'))
temp_texts.append(
paddle.to_tensor(
texts[i, :num, :recoder_len], dtype='float32'))
temp_gt_bboxes.append(
paddle.to_tensor(
gt_bboxes[i, :num, ...], dtype='float32'))
return img, temp_relations, temp_texts, temp_gt_bboxes
def forward(self, inputs):
img = inputs[0]
relations, texts, gt_bboxes, tag, img_size = inputs[1], inputs[
2], inputs[3], inputs[5], inputs[-1]
img, relations, texts, gt_bboxes = self.pre_process(
img, relations, texts, gt_bboxes, tag, img_size)
x = self.img_feat(img)
boxes, rois_num = self.bbox2roi(gt_bboxes)
feats = paddle.vision.ops.roi_align(
x, boxes, spatial_scale=1.0, output_size=7, boxes_num=rois_num)
feats = self.maxpool(feats).squeeze(-1).squeeze(-1)
return [relations, texts, feats]
# copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Code is refer from:
https://github.com/RuijieJ/pren/blob/main/Nets/EfficientNet.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
from collections import namedtuple
import paddle
import paddle.nn as nn
import paddle.nn.functional as F
__all__ = ['EfficientNetb3']
class EffB3Params:
@staticmethod
def get_global_params():
"""
The fllowing are efficientnetb3's arch superparams, but to fit for scene
text recognition task, the resolution(image_size) here is changed
from 300 to 64.
"""
GlobalParams = namedtuple('GlobalParams', [
'drop_connect_rate', 'width_coefficient', 'depth_coefficient',
'depth_divisor', 'image_size'
])
global_params = GlobalParams(
drop_connect_rate=0.3,
width_coefficient=1.2,
depth_coefficient=1.4,
depth_divisor=8,
image_size=64)
return global_params
@staticmethod
def get_block_params():
BlockParams = namedtuple('BlockParams', [
'kernel_size', 'num_repeat', 'input_filters', 'output_filters',
'expand_ratio', 'id_skip', 'se_ratio', 'stride'
])
block_params = [
BlockParams(3, 1, 32, 16, 1, True, 0.25, 1),
BlockParams(3, 2, 16, 24, 6, True, 0.25, 2),
BlockParams(5, 2, 24, 40, 6, True, 0.25, 2),
BlockParams(3, 3, 40, 80, 6, True, 0.25, 2),
BlockParams(5, 3, 80, 112, 6, True, 0.25, 1),
BlockParams(5, 4, 112, 192, 6, True, 0.25, 2),
BlockParams(3, 1, 192, 320, 6, True, 0.25, 1)
]
return block_params
class EffUtils:
@staticmethod
def round_filters(filters, global_params):
"""Calculate and round number of filters based on depth multiplier."""
multiplier = global_params.width_coefficient
if not multiplier:
return filters
divisor = global_params.depth_divisor
filters *= multiplier
new_filters = int(filters + divisor / 2) // divisor * divisor
if new_filters < 0.9 * filters:
new_filters += divisor
return int(new_filters)
@staticmethod
def round_repeats(repeats, global_params):
"""Round number of filters based on depth multiplier."""
multiplier = global_params.depth_coefficient
if not multiplier:
return repeats
return int(math.ceil(multiplier * repeats))
class ConvBlock(nn.Layer):
def __init__(self, block_params):
super(ConvBlock, self).__init__()
self.block_args = block_params
self.has_se = (self.block_args.se_ratio is not None) and \
(0 < self.block_args.se_ratio <= 1)
self.id_skip = block_params.id_skip
# expansion phase
self.input_filters = self.block_args.input_filters
output_filters = \
self.block_args.input_filters * self.block_args.expand_ratio
if self.block_args.expand_ratio != 1:
self.expand_conv = nn.Conv2D(
self.input_filters, output_filters, 1, bias_attr=False)
self.bn0 = nn.BatchNorm(output_filters)
# depthwise conv phase
k = self.block_args.kernel_size
s = self.block_args.stride
self.depthwise_conv = nn.Conv2D(
output_filters,
output_filters,
groups=output_filters,
kernel_size=k,
stride=s,
padding='same',
bias_attr=False)
self.bn1 = nn.BatchNorm(output_filters)
# squeeze and excitation layer, if desired
if self.has_se:
num_squeezed_channels = max(1,
int(self.block_args.input_filters *
self.block_args.se_ratio))
self.se_reduce = nn.Conv2D(output_filters, num_squeezed_channels, 1)
self.se_expand = nn.Conv2D(num_squeezed_channels, output_filters, 1)
# output phase
self.final_oup = self.block_args.output_filters
self.project_conv = nn.Conv2D(
output_filters, self.final_oup, 1, bias_attr=False)
self.bn2 = nn.BatchNorm(self.final_oup)
self.swish = nn.Swish()
def drop_connect(self, inputs, p, training):
if not training:
return inputs
batch_size = inputs.shape[0]
keep_prob = 1 - p
random_tensor = keep_prob
random_tensor += paddle.rand([batch_size, 1, 1, 1], dtype=inputs.dtype)
random_tensor = paddle.to_tensor(random_tensor, place=inputs.place)
binary_tensor = paddle.floor(random_tensor)
output = inputs / keep_prob * binary_tensor
return output
def forward(self, inputs, drop_connect_rate=None):
# expansion and depthwise conv
x = inputs
if self.block_args.expand_ratio != 1:
x = self.swish(self.bn0(self.expand_conv(inputs)))
x = self.swish(self.bn1(self.depthwise_conv(x)))
# squeeze and excitation
if self.has_se:
x_squeezed = F.adaptive_avg_pool2d(x, 1)
x_squeezed = self.se_expand(self.swish(self.se_reduce(x_squeezed)))
x = F.sigmoid(x_squeezed) * x
x = self.bn2(self.project_conv(x))
# skip conntection and drop connect
if self.id_skip and self.block_args.stride == 1 and \
self.input_filters == self.final_oup:
if drop_connect_rate:
x = self.drop_connect(
x, p=drop_connect_rate, training=self.training)
x = x + inputs
return x
class EfficientNetb3_PREN(nn.Layer):
def __init__(self, in_channels):
super(EfficientNetb3_PREN, self).__init__()
self.blocks_params = EffB3Params.get_block_params()
self.global_params = EffB3Params.get_global_params()
self.out_channels = []
# stem
stem_channels = EffUtils.round_filters(32, self.global_params)
self.conv_stem = nn.Conv2D(
in_channels, stem_channels, 3, 2, padding='same', bias_attr=False)
self.bn0 = nn.BatchNorm(stem_channels)
self.blocks = []
# to extract three feature maps for fpn based on efficientnetb3 backbone
self.concerned_block_idxes = [7, 17, 25]
concerned_idx = 0
for i, block_params in enumerate(self.blocks_params):
block_params = block_params._replace(
input_filters=EffUtils.round_filters(block_params.input_filters,
self.global_params),
output_filters=EffUtils.round_filters(
block_params.output_filters, self.global_params),
num_repeat=EffUtils.round_repeats(block_params.num_repeat,
self.global_params))
self.blocks.append(
self.add_sublayer("{}-0".format(i), ConvBlock(block_params)))
concerned_idx += 1
if concerned_idx in self.concerned_block_idxes:
self.out_channels.append(block_params.output_filters)
if block_params.num_repeat > 1:
block_params = block_params._replace(
input_filters=block_params.output_filters, stride=1)
for j in range(block_params.num_repeat - 1):
self.blocks.append(
self.add_sublayer('{}-{}'.format(i, j + 1),
ConvBlock(block_params)))
concerned_idx += 1
if concerned_idx in self.concerned_block_idxes:
self.out_channels.append(block_params.output_filters)
self.swish = nn.Swish()
def forward(self, inputs):
outs = []
x = self.swish(self.bn0(self.conv_stem(inputs)))
for idx, block in enumerate(self.blocks):
drop_connect_rate = self.global_params.drop_connect_rate
if drop_connect_rate:
drop_connect_rate *= float(idx) / len(self.blocks)
x = block(x, drop_connect_rate=drop_connect_rate)
if idx in self.concerned_block_idxes:
outs.append(x)
return outs
# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
This code is refer from:
https://github.com/liyunsheng13/micronet/blob/main/backbone/micronet.py
https://github.com/liyunsheng13/micronet/blob/main/backbone/activation.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.nn as nn
from ppocr.modeling.backbones.det_mobilenet_v3 import make_divisible
M0_cfgs = [
# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r
[2, 1, 8, 3, 2, 2, 0, 4, 8, 2, 2, 2, 0, 1, 1],
[2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 2, 1, 1],
[2, 1, 16, 5, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
[1, 1, 32, 5, 1, 4, 4, 4, 32, 4, 4, 2, 2, 1, 1],
[2, 1, 64, 5, 1, 4, 8, 8, 64, 8, 8, 2, 2, 1, 1],
[1, 1, 96, 3, 1, 4, 8, 8, 96, 8, 8, 2, 2, 1, 2],
[1, 1, 384, 3, 1, 4, 12, 12, 0, 0, 0, 2, 2, 1, 2],
]
M1_cfgs = [
# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
[2, 1, 8, 3, 2, 2, 0, 6, 8, 2, 2, 2, 0, 1, 1],
[2, 1, 16, 3, 2, 2, 0, 8, 16, 4, 4, 2, 2, 1, 1],
[2, 1, 16, 5, 2, 2, 0, 16, 16, 4, 4, 2, 2, 1, 1],
[1, 1, 32, 5, 1, 6, 4, 4, 32, 4, 4, 2, 2, 1, 1],
[2, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 1],
[1, 1, 96, 3, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
[1, 1, 576, 3, 1, 6, 12, 12, 0, 0, 0, 2, 2, 1, 2],
]
M2_cfgs = [
# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
[2, 1, 12, 3, 2, 2, 0, 8, 12, 4, 4, 2, 0, 1, 1],
[2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 2, 2, 1, 1],
[1, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 2, 2, 1, 1],
[2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 2, 2, 1, 1],
[1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 2, 2, 1, 2],
[1, 1, 64, 5, 1, 6, 8, 8, 64, 8, 8, 2, 2, 1, 2],
[2, 1, 96, 5, 1, 6, 8, 8, 96, 8, 8, 2, 2, 1, 2],
[1, 1, 128, 3, 1, 6, 12, 12, 128, 8, 8, 2, 2, 1, 2],
[1, 1, 768, 3, 1, 6, 16, 16, 0, 0, 0, 2, 2, 1, 2],
]
M3_cfgs = [
# s, n, c, ks, c1, c2, g1, g2, c3, g3, g4
[2, 1, 16, 3, 2, 2, 0, 12, 16, 4, 4, 0, 2, 0, 1],
[2, 1, 24, 3, 2, 2, 0, 16, 24, 4, 4, 0, 2, 0, 1],
[1, 1, 24, 3, 2, 2, 0, 24, 24, 4, 4, 0, 2, 0, 1],
[2, 1, 32, 5, 1, 6, 6, 6, 32, 4, 4, 0, 2, 0, 1],
[1, 1, 32, 5, 1, 6, 8, 8, 32, 4, 4, 0, 2, 0, 2],
[1, 1, 64, 5, 1, 6, 8, 8, 48, 8, 8, 0, 2, 0, 2],
[1, 1, 80, 5, 1, 6, 8, 8, 80, 8, 8, 0, 2, 0, 2],
[1, 1, 80, 5, 1, 6, 10, 10, 80, 8, 8, 0, 2, 0, 2],
[1, 1, 120, 5, 1, 6, 10, 10, 120, 10, 10, 0, 2, 0, 2],
[1, 1, 120, 5, 1, 6, 12, 12, 120, 10, 10, 0, 2, 0, 2],
[1, 1, 144, 3, 1, 6, 12, 12, 144, 12, 12, 0, 2, 0, 2],
[1, 1, 432, 3, 1, 3, 12, 12, 0, 0, 0, 0, 2, 0, 2],
]
def get_micronet_config(mode):
return eval(mode + '_cfgs')
class MaxGroupPooling(nn.Layer):
def __init__(self, channel_per_group=2):
super(MaxGroupPooling, self).__init__()
self.channel_per_group = channel_per_group
def forward(self, x):
if self.channel_per_group == 1:
return x
# max op
b, c, h, w = x.shape
# reshape
y = paddle.reshape(x, [b, c // self.channel_per_group, -1, h, w])
out = paddle.max(y, axis=2)
return out
class SpatialSepConvSF(nn.Layer):
def __init__(self, inp, oups, kernel_size, stride):
super(SpatialSepConvSF, self).__init__()
oup1, oup2 = oups
self.conv = nn.Sequential(
nn.Conv2D(
inp,
oup1, (kernel_size, 1), (stride, 1), (kernel_size // 2, 0),
bias_attr=False,
groups=1),
nn.BatchNorm2D(oup1),
nn.Conv2D(
oup1,
oup1 * oup2, (1, kernel_size), (1, stride),
(0, kernel_size // 2),
bias_attr=False,
groups=oup1),
nn.BatchNorm2D(oup1 * oup2),
ChannelShuffle(oup1), )
def forward(self, x):
out = self.conv(x)
return out
class ChannelShuffle(nn.Layer):
def __init__(self, groups):
super(ChannelShuffle, self).__init__()
self.groups = groups
def forward(self, x):
b, c, h, w = x.shape
channels_per_group = c // self.groups
# reshape
x = paddle.reshape(x, [b, self.groups, channels_per_group, h, w])
x = paddle.transpose(x, (0, 2, 1, 3, 4))
out = paddle.reshape(x, [b, -1, h, w])
return out
class StemLayer(nn.Layer):
def __init__(self, inp, oup, stride, groups=(4, 4)):
super(StemLayer, self).__init__()
g1, g2 = groups
self.stem = nn.Sequential(
SpatialSepConvSF(inp, groups, 3, stride),
MaxGroupPooling(2) if g1 * g2 == 2 * oup else nn.ReLU6())
def forward(self, x):
out = self.stem(x)
return out
class DepthSpatialSepConv(nn.Layer):
def __init__(self, inp, expand, kernel_size, stride):
super(DepthSpatialSepConv, self).__init__()
exp1, exp2 = expand
hidden_dim = inp * exp1
oup = inp * exp1 * exp2
self.conv = nn.Sequential(
nn.Conv2D(
inp,
inp * exp1, (kernel_size, 1), (stride, 1),
(kernel_size // 2, 0),
bias_attr=False,
groups=inp),
nn.BatchNorm2D(inp * exp1),
nn.Conv2D(
hidden_dim,
oup, (1, kernel_size),
1, (0, kernel_size // 2),
bias_attr=False,
groups=hidden_dim),
nn.BatchNorm2D(oup))
def forward(self, x):
x = self.conv(x)
return x
class GroupConv(nn.Layer):
def __init__(self, inp, oup, groups=2):
super(GroupConv, self).__init__()
self.inp = inp
self.oup = oup
self.groups = groups
self.conv = nn.Sequential(
nn.Conv2D(
inp, oup, 1, 1, 0, bias_attr=False, groups=self.groups[0]),
nn.BatchNorm2D(oup))
def forward(self, x):
x = self.conv(x)
return x
class DepthConv(nn.Layer):
def __init__(self, inp, oup, kernel_size, stride):
super(DepthConv, self).__init__()
self.conv = nn.Sequential(
nn.Conv2D(
inp,
oup,
kernel_size,
stride,
kernel_size // 2,
bias_attr=False,
groups=inp),
nn.BatchNorm2D(oup))
def forward(self, x):
out = self.conv(x)
return out
class DYShiftMax(nn.Layer):
def __init__(self,
inp,
oup,
reduction=4,
act_max=1.0,
act_relu=True,
init_a=[0.0, 0.0],
init_b=[0.0, 0.0],
relu_before_pool=False,
g=None,
expansion=False):
super(DYShiftMax, self).__init__()
self.oup = oup
self.act_max = act_max * 2
self.act_relu = act_relu
self.avg_pool = nn.Sequential(nn.ReLU() if relu_before_pool == True else
nn.Sequential(), nn.AdaptiveAvgPool2D(1))
self.exp = 4 if act_relu else 2
self.init_a = init_a
self.init_b = init_b
# determine squeeze
squeeze = make_divisible(inp // reduction, 4)
if squeeze < 4:
squeeze = 4
self.fc = nn.Sequential(
nn.Linear(inp, squeeze),
nn.ReLU(), nn.Linear(squeeze, oup * self.exp), nn.Hardsigmoid())
if g is None:
g = 1
self.g = g[1]
if self.g != 1 and expansion:
self.g = inp // self.g
self.gc = inp // self.g
index = paddle.to_tensor([range(inp)])
index = paddle.reshape(index, [1, inp, 1, 1])
index = paddle.reshape(index, [1, self.g, self.gc, 1, 1])
indexgs = paddle.split(index, [1, self.g - 1], axis=1)
indexgs = paddle.concat((indexgs[1], indexgs[0]), axis=1)
indexs = paddle.split(indexgs, [1, self.gc - 1], axis=2)
indexs = paddle.concat((indexs[1], indexs[0]), axis=2)
self.index = paddle.reshape(indexs, [inp])
self.expansion = expansion
def forward(self, x):
x_in = x
x_out = x
b, c, _, _ = x_in.shape
y = self.avg_pool(x_in)
y = paddle.reshape(y, [b, c])
y = self.fc(y)
y = paddle.reshape(y, [b, self.oup * self.exp, 1, 1])
y = (y - 0.5) * self.act_max
n2, c2, h2, w2 = x_out.shape
x2 = paddle.to_tensor(x_out.numpy()[:, self.index.numpy(), :, :])
if self.exp == 4:
temp = y.shape
a1, b1, a2, b2 = paddle.split(y, temp[1] // self.oup, axis=1)
a1 = a1 + self.init_a[0]
a2 = a2 + self.init_a[1]
b1 = b1 + self.init_b[0]
b2 = b2 + self.init_b[1]
z1 = x_out * a1 + x2 * b1
z2 = x_out * a2 + x2 * b2
out = paddle.maximum(z1, z2)
elif self.exp == 2:
temp = y.shape
a1, b1 = paddle.split(y, temp[1] // self.oup, axis=1)
a1 = a1 + self.init_a[0]
b1 = b1 + self.init_b[0]
out = x_out * a1 + x2 * b1
return out
class DYMicroBlock(nn.Layer):
def __init__(self,
inp,
oup,
kernel_size=3,
stride=1,
ch_exp=(2, 2),
ch_per_group=4,
groups_1x1=(1, 1),
depthsep=True,
shuffle=False,
activation_cfg=None):
super(DYMicroBlock, self).__init__()
self.identity = stride == 1 and inp == oup
y1, y2, y3 = activation_cfg['dy']
act_reduction = 8 * activation_cfg['ratio']
init_a = activation_cfg['init_a']
init_b = activation_cfg['init_b']
t1 = ch_exp
gs1 = ch_per_group
hidden_fft, g1, g2 = groups_1x1
hidden_dim2 = inp * t1[0] * t1[1]
if gs1[0] == 0:
self.layers = nn.Sequential(
DepthSpatialSepConv(inp, t1, kernel_size, stride),
DYShiftMax(
hidden_dim2,
hidden_dim2,
act_max=2.0,
act_relu=True if y2 == 2 else False,
init_a=init_a,
reduction=act_reduction,
init_b=init_b,
g=gs1,
expansion=False) if y2 > 0 else nn.ReLU6(),
ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
ChannelShuffle(hidden_dim2 // 2)
if shuffle and y2 != 0 else nn.Sequential(),
GroupConv(hidden_dim2, oup, (g1, g2)),
DYShiftMax(
oup,
oup,
act_max=2.0,
act_relu=False,
init_a=[1.0, 0.0],
reduction=act_reduction // 2,
init_b=[0.0, 0.0],
g=(g1, g2),
expansion=False) if y3 > 0 else nn.Sequential(),
ChannelShuffle(g2) if shuffle else nn.Sequential(),
ChannelShuffle(oup // 2)
if shuffle and oup % 2 == 0 and y3 != 0 else nn.Sequential(), )
elif g2 == 0:
self.layers = nn.Sequential(
GroupConv(inp, hidden_dim2, gs1),
DYShiftMax(
hidden_dim2,
hidden_dim2,
act_max=2.0,
act_relu=False,
init_a=[1.0, 0.0],
reduction=act_reduction,
init_b=[0.0, 0.0],
g=gs1,
expansion=False) if y3 > 0 else nn.Sequential(), )
else:
self.layers = nn.Sequential(
GroupConv(inp, hidden_dim2, gs1),
DYShiftMax(
hidden_dim2,
hidden_dim2,
act_max=2.0,
act_relu=True if y1 == 2 else False,
init_a=init_a,
reduction=act_reduction,
init_b=init_b,
g=gs1,
expansion=False) if y1 > 0 else nn.ReLU6(),
ChannelShuffle(gs1[1]) if shuffle else nn.Sequential(),
DepthSpatialSepConv(hidden_dim2, (1, 1), kernel_size, stride)
if depthsep else
DepthConv(hidden_dim2, hidden_dim2, kernel_size, stride),
nn.Sequential(),
DYShiftMax(
hidden_dim2,
hidden_dim2,
act_max=2.0,
act_relu=True if y2 == 2 else False,
init_a=init_a,
reduction=act_reduction,
init_b=init_b,
g=gs1,
expansion=True) if y2 > 0 else nn.ReLU6(),
ChannelShuffle(hidden_dim2 // 4)
if shuffle and y1 != 0 and y2 != 0 else nn.Sequential()
if y1 == 0 and y2 == 0 else ChannelShuffle(hidden_dim2 // 2),
GroupConv(hidden_dim2, oup, (g1, g2)),
DYShiftMax(
oup,
oup,
act_max=2.0,
act_relu=False,
init_a=[1.0, 0.0],
reduction=act_reduction // 2
if oup < hidden_dim2 else act_reduction,
init_b=[0.0, 0.0],
g=(g1, g2),
expansion=False) if y3 > 0 else nn.Sequential(),
ChannelShuffle(g2) if shuffle else nn.Sequential(),
ChannelShuffle(oup // 2)
if shuffle and y3 != 0 else nn.Sequential(), )
def forward(self, x):
identity = x
out = self.layers(x)
if self.identity:
out = out + identity
return out
class MicroNet(nn.Layer):
"""
the MicroNet backbone network for recognition module.
Args:
mode(str): {'M0', 'M1', 'M2', 'M3'}
Four models are proposed based on four different computational costs (4M, 6M, 12M, 21M MAdds)
Default: 'M3'.
"""
def __init__(self, mode='M3', **kwargs):
super(MicroNet, self).__init__()
self.cfgs = get_micronet_config(mode)
activation_cfg = {}
if mode == 'M0':
input_channel = 4
stem_groups = 2, 2
out_ch = 384
activation_cfg['init_a'] = 1.0, 1.0
activation_cfg['init_b'] = 0.0, 0.0
elif mode == 'M1':
input_channel = 6
stem_groups = 3, 2
out_ch = 576
activation_cfg['init_a'] = 1.0, 1.0
activation_cfg['init_b'] = 0.0, 0.0
elif mode == 'M2':
input_channel = 8
stem_groups = 4, 2
out_ch = 768
activation_cfg['init_a'] = 1.0, 1.0
activation_cfg['init_b'] = 0.0, 0.0
elif mode == 'M3':
input_channel = 12
stem_groups = 4, 3
out_ch = 432
activation_cfg['init_a'] = 1.0, 0.5
activation_cfg['init_b'] = 0.0, 0.5
else:
raise NotImplementedError("mode[" + mode +
"_model] is not implemented!")
layers = [StemLayer(3, input_channel, stride=2, groups=stem_groups)]
for idx, val in enumerate(self.cfgs):
s, n, c, ks, c1, c2, g1, g2, c3, g3, g4, y1, y2, y3, r = val
t1 = (c1, c2)
gs1 = (g1, g2)
gs2 = (c3, g3, g4)
activation_cfg['dy'] = [y1, y2, y3]
activation_cfg['ratio'] = r
output_channel = c
layers.append(
DYMicroBlock(
input_channel,
output_channel,
kernel_size=ks,
stride=s,
ch_exp=t1,
ch_per_group=gs1,
groups_1x1=gs2,
depthsep=True,
shuffle=True,
activation_cfg=activation_cfg, ))
input_channel = output_channel
for i in range(1, n):
layers.append(
DYMicroBlock(
input_channel,
output_channel,
kernel_size=ks,
stride=1,
ch_exp=t1,
ch_per_group=gs1,
groups_1x1=gs2,
depthsep=True,
shuffle=True,
activation_cfg=activation_cfg, ))
input_channel = output_channel
self.features = nn.Sequential(*layers)
self.pool = nn.MaxPool2D(kernel_size=2, stride=2, padding=0)
self.out_channels = make_divisible(out_ch)
def forward(self, x):
x = self.features(x)
x = self.pool(x)
return x
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment