Commit b3d6785d authored by myhloli's avatar myhloli
Browse files

refactor(ocr): remove unused code and simplify model architecture

- Remove unused imports and code
- Simplify model architecture by removing unnecessary components
- Update initialization and forward pass logic
- Rename variables for consistency
parent 3cb156f5
...@@ -2,7 +2,8 @@ import os, sys ...@@ -2,7 +2,8 @@ import os, sys
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
from ..common import Activation
class ConvBNLayer(nn.Module): class ConvBNLayer(nn.Module):
......
import torch
from torch import nn
class MTB(nn.Module):
def __init__(self, cnn_num, in_channels):
super(MTB, self).__init__()
self.block = nn.Sequential()
self.out_channels = in_channels
self.cnn_num = cnn_num
if self.cnn_num == 2:
for i in range(self.cnn_num):
self.block.add_module(
'conv_{}'.format(i),
nn.Conv2d(
in_channels=in_channels
if i == 0 else 32 * (2**(i - 1)),
out_channels=32 * (2**i),
kernel_size=3,
stride=2,
padding=1))
self.block.add_module('relu_{}'.format(i), nn.ReLU())
self.block.add_module('bn_{}'.format(i),
nn.BatchNorm2d(32 * (2**i)))
def forward(self, images):
x = self.block(images)
if self.cnn_num == 2:
# (b, w, h, c)
x = x.permute(0, 3, 2, 1)
x_shape = x.shape
x = torch.reshape(
x, (x_shape[0], x_shape[1], x_shape[2] * x_shape[3]))
return x
"""
This code is refer from:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
# import paddle
# from paddle import ParamAttr
# import paddle.nn as nn
# import paddle.nn.functional as F
__all__ = ["ResNet31"]
def conv3x3(in_channel, out_channel, stride=1):
return nn.Conv2d(
in_channel,
out_channel,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channels, channels, stride=1, downsample=False):
super().__init__()
self.conv1 = conv3x3(in_channels, channels, stride)
self.bn1 = nn.BatchNorm2d(channels)
self.relu = nn.ReLU()
self.conv2 = conv3x3(channels, channels)
self.bn2 = nn.BatchNorm2d(channels)
self.downsample = downsample
if downsample:
self.downsample = nn.Sequential(
nn.Conv2d(
in_channels,
channels * self.expansion,
1,
stride,
bias=False),
nn.BatchNorm2d(channels * self.expansion), )
else:
self.downsample = nn.Sequential()
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet31(nn.Module):
'''
Args:
in_channels (int): Number of channels of input image tensor.
layers (list[int]): List of BasicBlock number for each stage.
channels (list[int]): List of out_channels of Conv2d layer.
out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
'''
def __init__(self,
in_channels=3,
layers=[1, 2, 5, 3],
channels=[64, 128, 256, 256, 512, 512, 512],
out_indices=None,
last_stage_pool=False):
super(ResNet31, self).__init__()
assert isinstance(in_channels, int)
assert isinstance(last_stage_pool, bool)
self.out_indices = out_indices
self.last_stage_pool = last_stage_pool
# conv 1 (Conv Conv)
self.conv1_1 = nn.Conv2d(
in_channels, channels[0], kernel_size=3, stride=1, padding=1)
self.bn1_1 = nn.BatchNorm2d(channels[0])
self.relu1_1 = nn.ReLU(inplace=True)
self.conv1_2 = nn.Conv2d(
channels[0], channels[1], kernel_size=3, stride=1, padding=1)
self.bn1_2 = nn.BatchNorm2d(channels[1])
self.relu1_2 = nn.ReLU(inplace=True)
# conv 2 (Max-pooling, Residual block, Conv)
self.pool2 = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block2 = self._make_layer(channels[1], channels[2], layers[0])
self.conv2 = nn.Conv2d(
channels[2], channels[2], kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(channels[2])
self.relu2 = nn.ReLU(inplace=True)
# conv 3 (Max-pooling, Residual block, Conv)
self.pool3 = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block3 = self._make_layer(channels[2], channels[3], layers[1])
self.conv3 = nn.Conv2d(
channels[3], channels[3], kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2d(channels[3])
self.relu3 = nn.ReLU(inplace=True)
# conv 4 (Max-pooling, Residual block, Conv)
self.pool4 = nn.MaxPool2d(
kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True)
self.block4 = self._make_layer(channels[3], channels[4], layers[2])
self.conv4 = nn.Conv2d(
channels[4], channels[4], kernel_size=3, stride=1, padding=1)
self.bn4 = nn.BatchNorm2d(channels[4])
self.relu4 = nn.ReLU(inplace=True)
# conv 5 ((Max-pooling), Residual block, Conv)
self.pool5 = None
if self.last_stage_pool:
self.pool5 = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block5 = self._make_layer(channels[4], channels[5], layers[3])
self.conv5 = nn.Conv2d(
channels[5], channels[5], kernel_size=3, stride=1, padding=1)
self.bn5 = nn.BatchNorm2d(channels[5])
self.relu5 = nn.ReLU(inplace=True)
self.out_channels = channels[-1]
def _make_layer(self, input_channels, output_channels, blocks):
layers = []
for _ in range(blocks):
downsample = None
if input_channels != output_channels:
downsample = nn.Sequential(
nn.Conv2d(
input_channels,
output_channels,
kernel_size=1,
stride=1,
bias=False),
nn.BatchNorm2d(output_channels), )
layers.append(
BasicBlock(
input_channels, output_channels, downsample=downsample))
input_channels = output_channels
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1_1(x)
x = self.bn1_1(x)
x = self.relu1_1(x)
x = self.conv1_2(x)
x = self.bn1_2(x)
x = self.relu1_2(x)
outs = []
for i in range(4):
layer_index = i + 2
pool_layer = getattr(self, 'pool{}'.format(layer_index))
block_layer = getattr(self, 'block{}'.format(layer_index))
conv_layer = getattr(self, 'conv{}'.format(layer_index))
bn_layer = getattr(self, 'bn{}'.format(layer_index))
relu_layer = getattr(self, 'relu{}'.format(layer_index))
if pool_layer is not None:
x = pool_layer(x)
x = block_layer(x)
x = conv_layer(x)
x = bn_layer(x)
x = relu_layer(x)
outs.append(x)
if self.out_indices is not None:
return tuple([outs[i] for i in self.out_indices])
return x
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
__all__ = ["ResNetFPN"]
class ResNetFPN(nn.Module):
def __init__(self, in_channels=1, layers=50, **kwargs):
super(ResNetFPN, self).__init__()
supported_layers = {
18: {
'depth': [2, 2, 2, 2],
'block_class': BasicBlock
},
34: {
'depth': [3, 4, 6, 3],
'block_class': BasicBlock
},
50: {
'depth': [3, 4, 6, 3],
'block_class': BottleneckBlock
},
101: {
'depth': [3, 4, 23, 3],
'block_class': BottleneckBlock
},
152: {
'depth': [3, 8, 36, 3],
'block_class': BottleneckBlock
}
}
stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
num_filters = [64, 128, 256, 512]
self.depth = supported_layers[layers]['depth']
self.conv = ConvBNLayer(
in_channels=in_channels,
out_channels=64,
kernel_size=7,
stride=2,
act="relu",
name="conv1")
self.block_list = nn.ModuleList()
in_ch = 64
if layers >= 50:
for block in range(len(self.depth)):
for i in range(self.depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottlenectBlock = BottleneckBlock(
in_channels=in_ch,
out_channels=num_filters[block],
stride=stride_list[block] if i == 0 else 1,
name=conv_name)
in_ch = num_filters[block] * 4
self.block_list.add_module("bottleneckBlock_{}_{}".format(block, i), bottlenectBlock)
else:
for block in range(len(self.depth)):
for i in range(self.depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
if i == 0 and block != 0:
stride = (2, 1)
else:
stride = (1, 1)
basicBlock = BasicBlock(
in_channels=in_ch,
out_channels=num_filters[block],
stride=stride_list[block] if i == 0 else 1,
is_first=block == i == 0,
name=conv_name)
in_ch = basicBlock.out_channels
self.block_list.add_module(conv_name, basicBlock)
out_ch_list = [in_ch // 4, in_ch // 2, in_ch]
self.base_block = nn.ModuleList()
self.conv_trans = []
self.bn_block = []
for i in [-2, -3]:
in_channels = out_ch_list[i + 1] + out_ch_list[i]
bb_0 = nn.Conv2d(
in_channels=in_channels,
out_channels=out_ch_list[i],
kernel_size=1,
bias=True)
self.base_block.add_module("F_{}_base_block_0".format(i), bb_0)
bb_1 = nn.Conv2d(
in_channels=out_ch_list[i],
out_channels=out_ch_list[i],
kernel_size=3,
padding=1,
bias=True)
self.base_block.add_module("F_{}_base_block_1".format(i), bb_1)
bb_2 = nn.Sequential(
nn.BatchNorm2d(out_ch_list[i]),
Activation("relu")
)
self.base_block.add_module("F_{}_base_block_2".format(i), bb_2)
bb_3 = nn.Conv2d(
in_channels=out_ch_list[i],
out_channels=512,
kernel_size=1,
bias=True)
self.base_block.add_module("F_{}_base_block_3".format(i), bb_3)
self.out_channels = 512
def __call__(self, x):
x = self.conv(x)
fpn_list = []
F = []
for i in range(len(self.depth)):
fpn_list.append(np.sum(self.depth[:i + 1]))
for i, block in enumerate(self.block_list):
x = block(x)
for number in fpn_list:
if i + 1 == number:
F.append(x)
base = F[-1]
j = 0
for i, block in enumerate(self.base_block):
if i % 3 == 0 and i < 6:
j = j + 1
b, c, w, h = F[-j - 1].shape
if [w, h] == list(base.shape[2:]):
base = base
else:
base = self.conv_trans[j - 1](base)
base = self.bn_block[j - 1](base)
base = torch.cat([base, F[-j - 1]], dim=1)
base = block(base)
return base
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=2 if stride == (1, 1) else kernel_size,
dilation=2 if stride == (1, 1) else 1,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False, )
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self.bn = nn.BatchNorm2d(out_channels)
self.act = act
if self.act is not None:
self._act = Activation(act_type=self.act, inplace=True)
def __call__(self, x):
x = self.conv(x)
x = self.bn(x)
if self.act is not None:
x = self._act(x)
return x
class ShortCut(nn.Module):
def __init__(self, in_channels, out_channels, stride, name, is_first=False):
super(ShortCut, self).__init__()
self.use_conv = True
if in_channels != out_channels or stride != 1 or is_first == True:
if stride == (1, 1):
self.conv = ConvBNLayer(
in_channels, out_channels, 1, 1, name=name)
else: # stride==(2,2)
self.conv = ConvBNLayer(
in_channels, out_channels, 1, stride, name=name)
else:
self.use_conv = False
def forward(self, x):
if self.use_conv:
x = self.conv(x)
return x
class BottleneckBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride, name):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
self.short = ShortCut(
in_channels=in_channels,
out_channels=out_channels * 4,
stride=stride,
is_first=False,
name=name + "_branch1")
self.out_channels = out_channels * 4
def forward(self, x):
y = self.conv0(x)
y = self.conv1(y)
y = self.conv2(y)
y = y + self.short(x)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride, name, is_first):
super(BasicBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
act='relu',
stride=stride,
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
self.short = ShortCut(
in_channels=in_channels,
out_channels=out_channels,
stride=stride,
is_first=is_first,
name=name + "_branch1")
self.out_channels = out_channels
def forward(self, x):
y = self.conv0(x)
y = self.conv1(y)
y = y + self.short(x)
return F.relu(y)
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
class ConvBNLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.act = act
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2d(
kernel_size=stride, stride=stride, padding=0, ceil_mode=True)
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=1 if is_vd_mode else stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
self._batch_norm = nn.BatchNorm2d(
out_channels,)
if self.act is not None:
self._act = Activation(act_type=act, inplace=True)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=stride,
is_vd_mode=not if_first and stride[0] != 1,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv2
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
is_vd_mode=not if_first and stride[0] != 1,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv1
y = F.relu(y)
return y
class ResNet(nn.Module):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# self.block_list = list()
self.block_list = nn.Sequential()
if layers >= 50:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152, 200] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
if i == 0 and block != 0:
stride = (2, 1)
else:
stride = (1, 1)
bottleneck_block = BottleneckBlock(in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=stride,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
# self.block_list.append(bottleneck_block)
self.block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
self.out_channels = num_filters[block]
else:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
if i == 0 and block != 0:
stride = (2, 1)
else:
stride = (1, 1)
basic_block = BasicBlock(in_channels=num_channels[block] if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=stride,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
# self.block_list.append(basic_block)
self.block_list.add_module('bb_%d_%d' % (block, i), basic_block)
self.out_channels = num_filters[block]
self.out_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.out_pool(y)
return y
\ No newline at end of file
"""
This code is refer from:
https://github.com/roatienza/deep-text-recognition-benchmark/blob/master/modules/vitstr.py
"""
import numpy as np
import torch
import torch.nn as nn
from pytorchocr.modeling.backbones.rec_svtrnet import Block, PatchEmbed
# import paddle
# import paddle.nn as nn
# from ppocr.modeling.backbones.rec_svtrnet import Block, PatchEmbed, zeros_, trunc_normal_, ones_
scale_dim_heads = {'tiny': [192, 3], 'small': [384, 6], 'base': [768, 12]}
class ViTSTR(nn.Module):
def __init__(self,
img_size=[224, 224],
in_channels=1,
scale='tiny',
seqlen=27,
patch_size=[16, 16],
embed_dim=None,
depth=12,
num_heads=None,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_path_rate=0.,
drop_rate=0.,
attn_drop_rate=0.,
norm_layer='nn.LayerNorm',
act_layer='gelu',
epsilon=1e-6,
out_channels=None,
**kwargs):
super().__init__()
self.seqlen = seqlen
embed_dim = embed_dim if embed_dim is not None else scale_dim_heads[
scale][0]
num_heads = num_heads if num_heads is not None else scale_dim_heads[
scale][1]
out_channels = out_channels if out_channels is not None else embed_dim
self.patch_embed = PatchEmbed(
img_size=img_size,
in_channels=in_channels,
embed_dim=embed_dim,
patch_size=patch_size,
mode='linear')
num_patches = self.patch_embed.num_patches
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.pos_drop = nn.Dropout(p=drop_rate)
dpr = np.linspace(0, drop_path_rate, depth)
self.blocks = nn.ModuleList([
Block(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer,
act_layer=act_layer,
epsilon=epsilon,
prenorm=False) for i in range(depth)
])
self.norm = eval(norm_layer)(embed_dim, eps=epsilon)
self.out_channels = out_channels
torch.nn.init.xavier_normal_(self.pos_embed)
torch.nn.init.xavier_normal_(self.cls_token)
self.apply(self._init_weights)
def _init_weights(self, m):
# weight initialization
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
def forward_features(self, x):
B = x.shape[0]
x = self.patch_embed(x)
# cls_tokens = paddle.tile(self.cls_token, repeat_times=[B, 1, 1])
cls_tokens = self.cls_token.repeat(B, 1, 1)
x = torch.cat((cls_tokens, x), dim=1)
x = x + self.pos_embed
x = self.pos_drop(x)
for blk in self.blocks:
x = blk(x)
x = self.norm(x)
return x
def forward(self, x):
x = self.forward_features(x)
x = x[:, :self.seqlen]
return x.permute(0, 2, 1).unsqueeze(2)
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
__all__ = ['MobileNetV3']
def make_divisible(v, divisor=8, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
def hard_sigmoid(x, slope=0.1666667, offset=0.5,):
return torch.clamp(slope * x + offset, 0., 1.)
def hard_swish(x, inplace=True):
return x * F.relu6(x + 3., inplace=inplace) / 6.
class MobileNetV3(nn.Module):
def __init__(self,
in_channels=3,
model_name='large',
scale=0.5,
disable_se=False,
**kwargs):
"""
the MobilenetV3 backbone network for detection module.
Args:
params(dict): the super parameters for build network
"""
super(MobileNetV3, self).__init__()
self.disable_se = disable_se
if model_name == "large":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, False, 'relu', 1],
[3, 64, 24, False, 'relu', 2],
[3, 72, 24, False, 'relu', 1],
[5, 72, 40, True, 'relu', 2],
[5, 120, 40, True, 'relu', 1],
[5, 120, 40, True, 'relu', 1],
[3, 240, 80, False, 'hardswish', 2],
[3, 200, 80, False, 'hardswish', 1],
[3, 184, 80, False, 'hardswish', 1],
[3, 184, 80, False, 'hardswish', 1],
[3, 480, 112, True, 'hardswish', 1],
[3, 672, 112, True, 'hardswish', 1],
[5, 672, 160, True, 'hardswish', 2],
[5, 960, 160, True, 'hardswish', 1],
[5, 960, 160, True, 'hardswish', 1],
]
cls_ch_squeeze = 960
elif model_name == "small":
cfg = [
# k, exp, c, se, nl, s,
[3, 16, 16, True, 'relu', 2],
[3, 72, 24, False, 'relu', 2],
[3, 88, 24, False, 'relu', 1],
[5, 96, 40, True, 'hardswish', 2],
[5, 240, 40, True, 'hardswish', 1],
[5, 240, 40, True, 'hardswish', 1],
[5, 120, 48, True, 'hardswish', 1],
[5, 144, 48, True, 'hardswish', 1],
[5, 288, 96, True, 'hardswish', 2],
[5, 576, 96, True, 'hardswish', 1],
[5, 576, 96, True, 'hardswish', 1],
]
cls_ch_squeeze = 576
else:
raise NotImplementedError("mode[" + model_name +
"_model] is not implemented!")
supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
assert scale in supported_scale, \
"supported scale are {} but input scale is {}".format(supported_scale, scale)
inplanes = 16
# conv1
self.conv = ConvBNLayer(
in_channels=in_channels,
out_channels=make_divisible(inplanes * scale),
kernel_size=3,
stride=2,
padding=1,
groups=1,
if_act=True,
act='hardswish',
name='conv1')
self.stages = nn.ModuleList()
self.out_channels = []
block_list = []
i = 0
inplanes = make_divisible(inplanes * scale)
for (k, exp, c, se, nl, s) in cfg:
se = se and not self.disable_se
start_idx = 2 if model_name == 'large' else 0
if s == 2 and i > start_idx:
self.out_channels.append(inplanes)
self.stages.append(nn.Sequential(*block_list))
block_list = []
block_list.append(
ResidualUnit(
in_channels=inplanes,
mid_channels=make_divisible(scale * exp),
out_channels=make_divisible(scale * c),
kernel_size=k,
stride=s,
use_se=se,
act=nl,
name="conv" + str(i + 2)))
inplanes = make_divisible(scale * c)
i += 1
block_list.append(
ConvBNLayer(
in_channels=inplanes,
out_channels=make_divisible(scale * cls_ch_squeeze),
kernel_size=1,
stride=1,
padding=0,
groups=1,
if_act=True,
act='hardswish',
name='conv_last'))
self.stages.append(nn.Sequential(*block_list))
self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
# for i, stage in enumerate(self.stages):
# self.add_module(module=stage, name="stage{}".format(i))
def forward(self, x):
x = self.conv(x)
out_list = []
for stage in self.stages:
x = stage(x)
out_list.append(x)
return out_list
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
padding,
groups=1,
if_act=True,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.if_act = if_act
self.act = act
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
bias=False)
self.bn = nn.BatchNorm2d(
out_channels,
)
def forward(self, x):
x = self.conv(x)
x = self.bn(x)
if self.if_act:
if self.act == "relu":
x = F.relu(x)
elif self.act == "hardswish":
x = hard_swish(x)
else:
print("The activation function({}) is selected incorrectly.".
format(self.act))
exit()
return x
class ResidualUnit(nn.Module):
def __init__(self,
in_channels,
mid_channels,
out_channels,
kernel_size,
stride,
use_se,
act=None,
name=''):
super(ResidualUnit, self).__init__()
self.if_shortcut = stride == 1 and in_channels == out_channels
self.if_se = use_se
self.expand_conv = ConvBNLayer(
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=1,
stride=1,
padding=0,
if_act=True,
act=act,
name=name + "_expand")
self.bottleneck_conv = ConvBNLayer(
in_channels=mid_channels,
out_channels=mid_channels,
kernel_size=kernel_size,
stride=stride,
padding=int((kernel_size - 1) // 2),
groups=mid_channels,
if_act=True,
act=act,
name=name + "_depthwise")
if self.if_se:
self.mid_se = SEModule(mid_channels, name=name + "_se")
self.linear_conv = ConvBNLayer(
in_channels=mid_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
if_act=False,
act=None,
name=name + "_linear")
def forward(self, inputs):
x = self.expand_conv(inputs)
x = self.bottleneck_conv(x)
if self.if_se:
x = self.mid_se(x)
x = self.linear_conv(x)
if self.if_shortcut:
x = torch.add(inputs, x)
return x
class SEModule(nn.Module):
def __init__(self, in_channels, reduction=4, name=""):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.conv1 = nn.Conv2d(
in_channels=in_channels,
out_channels=in_channels // reduction,
kernel_size=1,
stride=1,
padding=0,
bias=True)
self.conv2 = nn.Conv2d(
in_channels=in_channels // reduction,
out_channels=in_channels,
kernel_size=1,
stride=1,
padding=0,
bias=True)
def forward(self, inputs):
outputs = self.avg_pool(inputs)
outputs = self.conv1(outputs)
outputs = F.relu(outputs)
outputs = self.conv2(outputs)
outputs = hard_sigmoid(outputs, slope=0.2, offset=0.5)
return inputs * outputs
\ No newline at end of file
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn
class Hswish(nn.Module): class Hswish(nn.Module):
def __init__(self, inplace=True): def __init__(self, inplace=True):
...@@ -10,7 +9,8 @@ class Hswish(nn.Module): ...@@ -10,7 +9,8 @@ class Hswish(nn.Module):
self.inplace = inplace self.inplace = inplace
def forward(self, x): def forward(self, x):
return x * F.relu6(x + 3., inplace=self.inplace) / 6. return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
# out = max(0, min(1, slop*x+offset)) # out = max(0, min(1, slop*x+offset))
# paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None) # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
...@@ -22,7 +22,8 @@ class Hsigmoid(nn.Module): ...@@ -22,7 +22,8 @@ class Hsigmoid(nn.Module):
def forward(self, x): def forward(self, x):
# torch: F.relu6(x + 3., inplace=self.inplace) / 6. # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
# paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
return F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
class GELU(nn.Module): class GELU(nn.Module):
def __init__(self, inplace=True): def __init__(self, inplace=True):
...@@ -43,28 +44,30 @@ class Swish(nn.Module): ...@@ -43,28 +44,30 @@ class Swish(nn.Module):
x.mul_(torch.sigmoid(x)) x.mul_(torch.sigmoid(x))
return x return x
else: else:
return x*torch.sigmoid(x) return x * torch.sigmoid(x)
class Activation(nn.Module): class Activation(nn.Module):
def __init__(self, act_type, inplace=True): def __init__(self, act_type, inplace=True):
super(Activation, self).__init__() super(Activation, self).__init__()
act_type = act_type.lower() act_type = act_type.lower()
if act_type == 'relu': if act_type == "relu":
self.act = nn.ReLU(inplace=inplace) self.act = nn.ReLU(inplace=inplace)
elif act_type == 'relu6': elif act_type == "relu6":
self.act = nn.ReLU6(inplace=inplace) self.act = nn.ReLU6(inplace=inplace)
elif act_type == 'sigmoid': elif act_type == "sigmoid":
raise NotImplementedError raise NotImplementedError
elif act_type == 'hard_sigmoid': elif act_type == "hard_sigmoid":
self.act = Hsigmoid(inplace)#nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)# self.act = Hsigmoid(
elif act_type == 'hard_swish' or act_type == 'hswish': inplace
) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
elif act_type == "hard_swish" or act_type == "hswish":
self.act = Hswish(inplace=inplace) self.act = Hswish(inplace=inplace)
elif act_type == 'leakyrelu': elif act_type == "leakyrelu":
self.act = nn.LeakyReLU(inplace=inplace) self.act = nn.LeakyReLU(inplace=inplace)
elif act_type == 'gelu': elif act_type == "gelu":
self.act = GELU(inplace=inplace) self.act = GELU(inplace=inplace)
elif act_type == 'swish': elif act_type == "swish":
self.act = Swish(inplace=inplace) self.act = Swish(inplace=inplace)
else: else:
raise NotImplementedError raise NotImplementedError
......
...@@ -12,40 +12,32 @@ ...@@ -12,40 +12,32 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['build_head'] __all__ = ["build_head"]
def build_head(config, **kwargs): def build_head(config, **kwargs):
# det head # det head
from .det_db_head import DBHead, PFHeadLocal from .det_db_head import DBHead, PFHeadLocal
from .det_east_head import EASTHead
from .det_sast_head import SASTHead
from .det_pse_head import PSEHead
from .det_fce_head import FCEHead
from .e2e_pg_head import PGHead
# rec head # rec head
from .rec_ctc_head import CTCHead from .rec_ctc_head import CTCHead
from .rec_att_head import AttentionHead
from .rec_srn_head import SRNHead
from .rec_nrtr_head import Transformer
from .rec_sar_head import SARHead
from .rec_can_head import CANHead
from .rec_multi_head import MultiHead from .rec_multi_head import MultiHead
# cls head # cls head
from .cls_head import ClsHead from .cls_head import ClsHead
support_dict = [
'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead','SARHead', 'FCEHead',
'CANHead', 'MultiHead', 'PFHeadLocal',
support_dict = [
"DBHead",
"CTCHead",
"ClsHead",
"MultiHead",
"PFHeadLocal",
] ]
from .table_att_head import TableAttentionHead module_name = config.pop("name")
char_num = config.pop("char_num", 6625)
module_name = config.pop('name') assert module_name in support_dict, Exception(
assert module_name in support_dict, Exception('head only support {}'.format( "head only support {}".format(support_dict)
support_dict)) )
module_class = eval(module_name)(**config, **kwargs) module_class = eval(module_name)(**config, **kwargs)
return module_class return module_class
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment