Commit b3d6785d authored by myhloli's avatar myhloli
Browse files

refactor(ocr): remove unused code and simplify model architecture

- Remove unused imports and code
- Simplify model architecture by removing unnecessary components
- Update initialization and forward pass logic
- Rename variables for consistency
parent 3cb156f5
...@@ -2,7 +2,8 @@ import os, sys ...@@ -2,7 +2,8 @@ import os, sys
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
from ..common import Activation
class ConvBNLayer(nn.Module): class ConvBNLayer(nn.Module):
......
import torch
from torch import nn
class MTB(nn.Module):
def __init__(self, cnn_num, in_channels):
super(MTB, self).__init__()
self.block = nn.Sequential()
self.out_channels = in_channels
self.cnn_num = cnn_num
if self.cnn_num == 2:
for i in range(self.cnn_num):
self.block.add_module(
'conv_{}'.format(i),
nn.Conv2d(
in_channels=in_channels
if i == 0 else 32 * (2**(i - 1)),
out_channels=32 * (2**i),
kernel_size=3,
stride=2,
padding=1))
self.block.add_module('relu_{}'.format(i), nn.ReLU())
self.block.add_module('bn_{}'.format(i),
nn.BatchNorm2d(32 * (2**i)))
def forward(self, images):
x = self.block(images)
if self.cnn_num == 2:
# (b, w, h, c)
x = x.permute(0, 3, 2, 1)
x_shape = x.shape
x = torch.reshape(
x, (x_shape[0], x_shape[1], x_shape[2] * x_shape[3]))
return x
"""
This code is refer from:
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
# import paddle
# from paddle import ParamAttr
# import paddle.nn as nn
# import paddle.nn.functional as F
__all__ = ["ResNet31"]
def conv3x3(in_channel, out_channel, stride=1):
return nn.Conv2d(
in_channel,
out_channel,
kernel_size=3,
stride=stride,
padding=1,
bias=False)
class BasicBlock(nn.Module):
expansion = 1
def __init__(self, in_channels, channels, stride=1, downsample=False):
super().__init__()
self.conv1 = conv3x3(in_channels, channels, stride)
self.bn1 = nn.BatchNorm2d(channels)
self.relu = nn.ReLU()
self.conv2 = conv3x3(channels, channels)
self.bn2 = nn.BatchNorm2d(channels)
self.downsample = downsample
if downsample:
self.downsample = nn.Sequential(
nn.Conv2d(
in_channels,
channels * self.expansion,
1,
stride,
bias=False),
nn.BatchNorm2d(channels * self.expansion), )
else:
self.downsample = nn.Sequential()
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
if self.downsample:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
class ResNet31(nn.Module):
'''
Args:
in_channels (int): Number of channels of input image tensor.
layers (list[int]): List of BasicBlock number for each stage.
channels (list[int]): List of out_channels of Conv2d layer.
out_indices (None | Sequence[int]): Indices of output stages.
last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
'''
def __init__(self,
in_channels=3,
layers=[1, 2, 5, 3],
channels=[64, 128, 256, 256, 512, 512, 512],
out_indices=None,
last_stage_pool=False):
super(ResNet31, self).__init__()
assert isinstance(in_channels, int)
assert isinstance(last_stage_pool, bool)
self.out_indices = out_indices
self.last_stage_pool = last_stage_pool
# conv 1 (Conv Conv)
self.conv1_1 = nn.Conv2d(
in_channels, channels[0], kernel_size=3, stride=1, padding=1)
self.bn1_1 = nn.BatchNorm2d(channels[0])
self.relu1_1 = nn.ReLU(inplace=True)
self.conv1_2 = nn.Conv2d(
channels[0], channels[1], kernel_size=3, stride=1, padding=1)
self.bn1_2 = nn.BatchNorm2d(channels[1])
self.relu1_2 = nn.ReLU(inplace=True)
# conv 2 (Max-pooling, Residual block, Conv)
self.pool2 = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block2 = self._make_layer(channels[1], channels[2], layers[0])
self.conv2 = nn.Conv2d(
channels[2], channels[2], kernel_size=3, stride=1, padding=1)
self.bn2 = nn.BatchNorm2d(channels[2])
self.relu2 = nn.ReLU(inplace=True)
# conv 3 (Max-pooling, Residual block, Conv)
self.pool3 = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block3 = self._make_layer(channels[2], channels[3], layers[1])
self.conv3 = nn.Conv2d(
channels[3], channels[3], kernel_size=3, stride=1, padding=1)
self.bn3 = nn.BatchNorm2d(channels[3])
self.relu3 = nn.ReLU(inplace=True)
# conv 4 (Max-pooling, Residual block, Conv)
self.pool4 = nn.MaxPool2d(
kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True)
self.block4 = self._make_layer(channels[3], channels[4], layers[2])
self.conv4 = nn.Conv2d(
channels[4], channels[4], kernel_size=3, stride=1, padding=1)
self.bn4 = nn.BatchNorm2d(channels[4])
self.relu4 = nn.ReLU(inplace=True)
# conv 5 ((Max-pooling), Residual block, Conv)
self.pool5 = None
if self.last_stage_pool:
self.pool5 = nn.MaxPool2d(
kernel_size=2, stride=2, padding=0, ceil_mode=True)
self.block5 = self._make_layer(channels[4], channels[5], layers[3])
self.conv5 = nn.Conv2d(
channels[5], channels[5], kernel_size=3, stride=1, padding=1)
self.bn5 = nn.BatchNorm2d(channels[5])
self.relu5 = nn.ReLU(inplace=True)
self.out_channels = channels[-1]
def _make_layer(self, input_channels, output_channels, blocks):
layers = []
for _ in range(blocks):
downsample = None
if input_channels != output_channels:
downsample = nn.Sequential(
nn.Conv2d(
input_channels,
output_channels,
kernel_size=1,
stride=1,
bias=False),
nn.BatchNorm2d(output_channels), )
layers.append(
BasicBlock(
input_channels, output_channels, downsample=downsample))
input_channels = output_channels
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1_1(x)
x = self.bn1_1(x)
x = self.relu1_1(x)
x = self.conv1_2(x)
x = self.bn1_2(x)
x = self.relu1_2(x)
outs = []
for i in range(4):
layer_index = i + 2
pool_layer = getattr(self, 'pool{}'.format(layer_index))
block_layer = getattr(self, 'block{}'.format(layer_index))
conv_layer = getattr(self, 'conv{}'.format(layer_index))
bn_layer = getattr(self, 'bn{}'.format(layer_index))
relu_layer = getattr(self, 'relu{}'.format(layer_index))
if pool_layer is not None:
x = pool_layer(x)
x = block_layer(x)
x = conv_layer(x)
x = bn_layer(x)
x = relu_layer(x)
outs.append(x)
if self.out_indices is not None:
return tuple([outs[i] for i in self.out_indices])
return x
\ No newline at end of file
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os, sys
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
__all__ = ["ResNetFPN"]
class ResNetFPN(nn.Module):
def __init__(self, in_channels=1, layers=50, **kwargs):
super(ResNetFPN, self).__init__()
supported_layers = {
18: {
'depth': [2, 2, 2, 2],
'block_class': BasicBlock
},
34: {
'depth': [3, 4, 6, 3],
'block_class': BasicBlock
},
50: {
'depth': [3, 4, 6, 3],
'block_class': BottleneckBlock
},
101: {
'depth': [3, 4, 23, 3],
'block_class': BottleneckBlock
},
152: {
'depth': [3, 8, 36, 3],
'block_class': BottleneckBlock
}
}
stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
num_filters = [64, 128, 256, 512]
self.depth = supported_layers[layers]['depth']
self.conv = ConvBNLayer(
in_channels=in_channels,
out_channels=64,
kernel_size=7,
stride=2,
act="relu",
name="conv1")
self.block_list = nn.ModuleList()
in_ch = 64
if layers >= 50:
for block in range(len(self.depth)):
for i in range(self.depth[block]):
if layers in [101, 152] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
bottlenectBlock = BottleneckBlock(
in_channels=in_ch,
out_channels=num_filters[block],
stride=stride_list[block] if i == 0 else 1,
name=conv_name)
in_ch = num_filters[block] * 4
self.block_list.add_module("bottleneckBlock_{}_{}".format(block, i), bottlenectBlock)
else:
for block in range(len(self.depth)):
for i in range(self.depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
if i == 0 and block != 0:
stride = (2, 1)
else:
stride = (1, 1)
basicBlock = BasicBlock(
in_channels=in_ch,
out_channels=num_filters[block],
stride=stride_list[block] if i == 0 else 1,
is_first=block == i == 0,
name=conv_name)
in_ch = basicBlock.out_channels
self.block_list.add_module(conv_name, basicBlock)
out_ch_list = [in_ch // 4, in_ch // 2, in_ch]
self.base_block = nn.ModuleList()
self.conv_trans = []
self.bn_block = []
for i in [-2, -3]:
in_channels = out_ch_list[i + 1] + out_ch_list[i]
bb_0 = nn.Conv2d(
in_channels=in_channels,
out_channels=out_ch_list[i],
kernel_size=1,
bias=True)
self.base_block.add_module("F_{}_base_block_0".format(i), bb_0)
bb_1 = nn.Conv2d(
in_channels=out_ch_list[i],
out_channels=out_ch_list[i],
kernel_size=3,
padding=1,
bias=True)
self.base_block.add_module("F_{}_base_block_1".format(i), bb_1)
bb_2 = nn.Sequential(
nn.BatchNorm2d(out_ch_list[i]),
Activation("relu")
)
self.base_block.add_module("F_{}_base_block_2".format(i), bb_2)
bb_3 = nn.Conv2d(
in_channels=out_ch_list[i],
out_channels=512,
kernel_size=1,
bias=True)
self.base_block.add_module("F_{}_base_block_3".format(i), bb_3)
self.out_channels = 512
def __call__(self, x):
x = self.conv(x)
fpn_list = []
F = []
for i in range(len(self.depth)):
fpn_list.append(np.sum(self.depth[:i + 1]))
for i, block in enumerate(self.block_list):
x = block(x)
for number in fpn_list:
if i + 1 == number:
F.append(x)
base = F[-1]
j = 0
for i, block in enumerate(self.base_block):
if i % 3 == 0 and i < 6:
j = j + 1
b, c, w, h = F[-j - 1].shape
if [w, h] == list(base.shape[2:]):
base = base
else:
base = self.conv_trans[j - 1](base)
base = self.bn_block[j - 1](base)
base = torch.cat([base, F[-j - 1]], dim=1)
base = block(base)
return base
class ConvBNLayer(nn.Module):
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
act=None,
name=None):
super(ConvBNLayer, self).__init__()
self.conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=2 if stride == (1, 1) else kernel_size,
dilation=2 if stride == (1, 1) else 1,
stride=stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False, )
if name == "conv1":
bn_name = "bn_" + name
else:
bn_name = "bn" + name[3:]
self.bn = nn.BatchNorm2d(out_channels)
self.act = act
if self.act is not None:
self._act = Activation(act_type=self.act, inplace=True)
def __call__(self, x):
x = self.conv(x)
x = self.bn(x)
if self.act is not None:
x = self._act(x)
return x
class ShortCut(nn.Module):
def __init__(self, in_channels, out_channels, stride, name, is_first=False):
super(ShortCut, self).__init__()
self.use_conv = True
if in_channels != out_channels or stride != 1 or is_first == True:
if stride == (1, 1):
self.conv = ConvBNLayer(
in_channels, out_channels, 1, 1, name=name)
else: # stride==(2,2)
self.conv = ConvBNLayer(
in_channels, out_channels, 1, stride, name=name)
else:
self.use_conv = False
def forward(self, x):
if self.use_conv:
x = self.conv(x)
return x
class BottleneckBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride, name):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
self.short = ShortCut(
in_channels=in_channels,
out_channels=out_channels * 4,
stride=stride,
is_first=False,
name=name + "_branch1")
self.out_channels = out_channels * 4
def forward(self, x):
y = self.conv0(x)
y = self.conv1(y)
y = self.conv2(y)
y = y + self.short(x)
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self, in_channels, out_channels, stride, name, is_first):
super(BasicBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
act='relu',
stride=stride,
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
self.short = ShortCut(
in_channels=in_channels,
out_channels=out_channels,
stride=stride,
is_first=is_first,
name=name + "_branch1")
self.out_channels = out_channels
def forward(self, x):
y = self.conv0(x)
y = self.conv1(y)
y = y + self.short(x)
return F.relu(y)
import os, sys
import torch
import torch.nn as nn
import torch.nn.functional as F
from pytorchocr.modeling.common import Activation
class ConvBNLayer(nn.Module):
def __init__(
self,
in_channels,
out_channels,
kernel_size,
stride=1,
groups=1,
is_vd_mode=False,
act=None,
name=None, ):
super(ConvBNLayer, self).__init__()
self.act = act
self.is_vd_mode = is_vd_mode
self._pool2d_avg = nn.AvgPool2d(
kernel_size=stride, stride=stride, padding=0, ceil_mode=True)
self._conv = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=1 if is_vd_mode else stride,
padding=(kernel_size - 1) // 2,
groups=groups,
bias=False)
self._batch_norm = nn.BatchNorm2d(
out_channels,)
if self.act is not None:
self._act = Activation(act_type=act, inplace=True)
def forward(self, inputs):
if self.is_vd_mode:
inputs = self._pool2d_avg(inputs)
y = self._conv(inputs)
y = self._batch_norm(y)
if self.act is not None:
y = self._act(y)
return y
class BottleneckBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2b")
self.conv2 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels * 4,
kernel_size=1,
act=None,
name=name + "_branch2c")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels * 4,
kernel_size=1,
stride=stride,
is_vd_mode=not if_first and stride[0] != 1,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv2
y = F.relu(y)
return y
class BasicBlock(nn.Module):
def __init__(self,
in_channels,
out_channels,
stride,
shortcut=True,
if_first=False,
name=None):
super(BasicBlock, self).__init__()
self.stride = stride
self.conv0 = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=3,
stride=stride,
act='relu',
name=name + "_branch2a")
self.conv1 = ConvBNLayer(
in_channels=out_channels,
out_channels=out_channels,
kernel_size=3,
act=None,
name=name + "_branch2b")
if not shortcut:
self.short = ConvBNLayer(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
is_vd_mode=not if_first and stride[0] != 1,
name=name + "_branch1")
self.shortcut = shortcut
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = short + conv1
y = F.relu(y)
return y
class ResNet(nn.Module):
def __init__(self, in_channels=3, layers=50, **kwargs):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [18, 34, 50, 101, 152, 200]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(
supported_layers, layers)
if layers == 18:
depth = [2, 2, 2, 2]
elif layers == 34 or layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
elif layers == 200:
depth = [3, 12, 48, 3]
num_channels = [64, 256, 512,
1024] if layers >= 50 else [64, 64, 128, 256]
num_filters = [64, 128, 256, 512]
self.conv1_1 = ConvBNLayer(
in_channels=in_channels,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_1")
self.conv1_2 = ConvBNLayer(
in_channels=32,
out_channels=32,
kernel_size=3,
stride=1,
act='relu',
name="conv1_2")
self.conv1_3 = ConvBNLayer(
in_channels=32,
out_channels=64,
kernel_size=3,
stride=1,
act='relu',
name="conv1_3")
self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# self.block_list = list()
self.block_list = nn.Sequential()
if layers >= 50:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
if layers in [101, 152, 200] and block == 2:
if i == 0:
conv_name = "res" + str(block + 2) + "a"
else:
conv_name = "res" + str(block + 2) + "b" + str(i)
else:
conv_name = "res" + str(block + 2) + chr(97 + i)
if i == 0 and block != 0:
stride = (2, 1)
else:
stride = (1, 1)
bottleneck_block = BottleneckBlock(in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
out_channels=num_filters[block],
stride=stride,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
# self.block_list.append(bottleneck_block)
self.block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
self.out_channels = num_filters[block]
else:
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
conv_name = "res" + str(block + 2) + chr(97 + i)
if i == 0 and block != 0:
stride = (2, 1)
else:
stride = (1, 1)
basic_block = BasicBlock(in_channels=num_channels[block] if i == 0 else num_filters[block],
out_channels=num_filters[block],
stride=stride,
shortcut=shortcut,
if_first=block == i == 0,
name=conv_name)
shortcut = True
# self.block_list.append(basic_block)
self.block_list.add_module('bb_%d_%d' % (block, i), basic_block)
self.out_channels = num_filters[block]
self.out_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
def forward(self, inputs):
y = self.conv1_1(inputs)
y = self.conv1_2(y)
y = self.conv1_3(y)
y = self.pool2d_max(y)
for block in self.block_list:
y = block(y)
y = self.out_pool(y)
return y
\ No newline at end of file
"""
This code is refer from:
https://github.com/roatienza/deep-text-recognition-benchmark/blob/master/modules/vitstr.py
"""
import numpy as np
import torch
import torch.nn as nn
from pytorchocr.modeling.backbones.rec_svtrnet import Block, PatchEmbed
# import paddle
# import paddle.nn as nn
# from ppocr.modeling.backbones.rec_svtrnet import Block, PatchEmbed, zeros_, trunc_normal_, ones_
scale_dim_heads = {'tiny': [192, 3], 'small': [384, 6], 'base': [768, 12]}
class ViTSTR(nn.Module):
def __init__(self,
img_size=[224, 224],
in_channels=1,
scale='tiny',
seqlen=27,
patch_size=[16, 16],
embed_dim=None,
depth=12,
num_heads=None,
mlp_ratio=4,
qkv_bias=True,
qk_scale=None,
drop_path_rate=0.,
drop_rate=0.,
attn_drop_rate=0.,
norm_layer='nn.LayerNorm',
act_layer='gelu',
epsilon=1e-6,
out_channels=None,
**kwargs):
super().__init__()
self.seqlen = seqlen
embed_dim = embed_dim if embed_dim is not None else scale_dim_heads[
scale][0]
num_heads = num_heads if num_heads is not None else scale_dim_heads[
scale][1]
out_channels = out_channels if out_channels is not None else embed_dim
self.patch_embed = PatchEmbed(
img_size=img_size,
in_channels=in_channels,
embed_dim=embed_dim,
patch_size=patch_size,
mode='linear')
num_patches = self.patch_embed.num_patches
self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
self.pos_drop = nn.Dropout(p=drop_rate)
dpr = np.linspace(0, drop_path_rate, depth)
self.blocks = nn.ModuleList([
Block(
dim=embed_dim,
num_heads=num_heads,
mlp_ratio=mlp_ratio,
qkv_bias=qkv_bias,
qk_scale=qk_scale,
drop=drop_rate,
attn_drop=attn_drop_rate,
drop_path=dpr[i],
norm_layer=norm_layer,
act_layer=act_layer,
epsilon=epsilon,
prenorm=False) for i in range(depth)
])
self.norm = eval(norm_layer)(embed_dim, eps=epsilon)
self.out_channels = out_channels
torch.nn.init.xavier_normal_(self.pos_embed)
torch.nn.init.xavier_normal_(self.cls_token)
self.apply(self._init_weights)
def _init_weights(self, m):
# weight initialization
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.ConvTranspose2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.LayerNorm):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
def forward_features(self, x):
B = x.shape[0]
x = self.patch_embed(x)
# cls_tokens = paddle.tile(self.cls_token, repeat_times=[B, 1, 1])
cls_tokens = self.cls_token.repeat(B, 1, 1)
x = torch.cat((cls_tokens, x), dim=1)
x = x + self.pos_embed
x = self.pos_drop(x)
for blk in self.blocks:
x = blk(x)
x = self.norm(x)
return x
def forward(self, x):
x = self.forward_features(x)
x = x[:, :self.seqlen]
return x.permute(0, 2, 1).unsqueeze(2)
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn
class Hswish(nn.Module): class Hswish(nn.Module):
def __init__(self, inplace=True): def __init__(self, inplace=True):
...@@ -10,7 +9,8 @@ class Hswish(nn.Module): ...@@ -10,7 +9,8 @@ class Hswish(nn.Module):
self.inplace = inplace self.inplace = inplace
def forward(self, x): def forward(self, x):
return x * F.relu6(x + 3., inplace=self.inplace) / 6. return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
# out = max(0, min(1, slop*x+offset)) # out = max(0, min(1, slop*x+offset))
# paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None) # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
...@@ -22,7 +22,8 @@ class Hsigmoid(nn.Module): ...@@ -22,7 +22,8 @@ class Hsigmoid(nn.Module):
def forward(self, x): def forward(self, x):
# torch: F.relu6(x + 3., inplace=self.inplace) / 6. # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
# paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
return F.relu6(1.2 * x + 3., inplace=self.inplace) / 6. return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
class GELU(nn.Module): class GELU(nn.Module):
def __init__(self, inplace=True): def __init__(self, inplace=True):
...@@ -43,31 +44,33 @@ class Swish(nn.Module): ...@@ -43,31 +44,33 @@ class Swish(nn.Module):
x.mul_(torch.sigmoid(x)) x.mul_(torch.sigmoid(x))
return x return x
else: else:
return x*torch.sigmoid(x) return x * torch.sigmoid(x)
class Activation(nn.Module): class Activation(nn.Module):
def __init__(self, act_type, inplace=True): def __init__(self, act_type, inplace=True):
super(Activation, self).__init__() super(Activation, self).__init__()
act_type = act_type.lower() act_type = act_type.lower()
if act_type == 'relu': if act_type == "relu":
self.act = nn.ReLU(inplace=inplace) self.act = nn.ReLU(inplace=inplace)
elif act_type == 'relu6': elif act_type == "relu6":
self.act = nn.ReLU6(inplace=inplace) self.act = nn.ReLU6(inplace=inplace)
elif act_type == 'sigmoid': elif act_type == "sigmoid":
raise NotImplementedError raise NotImplementedError
elif act_type == 'hard_sigmoid': elif act_type == "hard_sigmoid":
self.act = Hsigmoid(inplace)#nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)# self.act = Hsigmoid(
elif act_type == 'hard_swish' or act_type == 'hswish': inplace
) # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
elif act_type == "hard_swish" or act_type == "hswish":
self.act = Hswish(inplace=inplace) self.act = Hswish(inplace=inplace)
elif act_type == 'leakyrelu': elif act_type == "leakyrelu":
self.act = nn.LeakyReLU(inplace=inplace) self.act = nn.LeakyReLU(inplace=inplace)
elif act_type == 'gelu': elif act_type == "gelu":
self.act = GELU(inplace=inplace) self.act = GELU(inplace=inplace)
elif act_type == 'swish': elif act_type == "swish":
self.act = Swish(inplace=inplace) self.act = Swish(inplace=inplace)
else: else:
raise NotImplementedError raise NotImplementedError
def forward(self, inputs): def forward(self, inputs):
return self.act(inputs) return self.act(inputs)
\ No newline at end of file
...@@ -12,40 +12,32 @@ ...@@ -12,40 +12,32 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
__all__ = ['build_head'] __all__ = ["build_head"]
def build_head(config, **kwargs): def build_head(config, **kwargs):
# det head # det head
from .det_db_head import DBHead, PFHeadLocal from .det_db_head import DBHead, PFHeadLocal
from .det_east_head import EASTHead
from .det_sast_head import SASTHead
from .det_pse_head import PSEHead
from .det_fce_head import FCEHead
from .e2e_pg_head import PGHead
# rec head # rec head
from .rec_ctc_head import CTCHead from .rec_ctc_head import CTCHead
from .rec_att_head import AttentionHead
from .rec_srn_head import SRNHead
from .rec_nrtr_head import Transformer
from .rec_sar_head import SARHead
from .rec_can_head import CANHead
from .rec_multi_head import MultiHead from .rec_multi_head import MultiHead
# cls head # cls head
from .cls_head import ClsHead from .cls_head import ClsHead
support_dict = [
'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead','SARHead', 'FCEHead',
'CANHead', 'MultiHead', 'PFHeadLocal',
support_dict = [
"DBHead",
"CTCHead",
"ClsHead",
"MultiHead",
"PFHeadLocal",
] ]
from .table_att_head import TableAttentionHead module_name = config.pop("name")
char_num = config.pop("char_num", 6625)
module_name = config.pop('name') assert module_name in support_dict, Exception(
assert module_name in support_dict, Exception('head only support {}'.format( "head only support {}".format(support_dict)
support_dict)) )
module_class = eval(module_name)(**config, **kwargs) module_class = eval(module_name)(**config, **kwargs)
return module_class return module_class
\ No newline at end of file
import os, sys
import torch import torch
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch import nn
class ClsHead(nn.Module): class ClsHead(nn.Module):
""" """
...@@ -12,17 +12,12 @@ class ClsHead(nn.Module): ...@@ -12,17 +12,12 @@ class ClsHead(nn.Module):
def __init__(self, in_channels, class_dim, **kwargs): def __init__(self, in_channels, class_dim, **kwargs):
super(ClsHead, self).__init__() super(ClsHead, self).__init__()
self.training = False
self.pool = nn.AdaptiveAvgPool2d(1) self.pool = nn.AdaptiveAvgPool2d(1)
self.fc = nn.Linear( self.fc = nn.Linear(in_channels, class_dim, bias=True)
in_channels,
class_dim,
bias=True)
def forward(self, x): def forward(self, x):
x = self.pool(x) x = self.pool(x)
x = torch.reshape(x, shape=[x.shape[0], x.shape[1]]) x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
x = self.fc(x) x = self.fc(x)
if not self.training: x = F.softmax(x, dim=1)
x = F.softmax(x, dim=1) return x
return x
\ No newline at end of file
import os, sys
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from pytorchocr.modeling.common import Activation from ..common import Activation
from pytorchocr.modeling.backbones.det_mobilenet_v3 import ConvBNLayer from ..backbones.det_mobilenet_v3 import ConvBNLayer
class Head(nn.Module): class Head(nn.Module):
def __init__(self, in_channels, **kwargs): def __init__(self, in_channels, **kwargs):
...@@ -76,13 +75,8 @@ class DBHead(nn.Module): ...@@ -76,13 +75,8 @@ class DBHead(nn.Module):
def forward(self, x): def forward(self, x):
shrink_maps = self.binarize(x) shrink_maps = self.binarize(x)
if not self.training: return {'maps': shrink_maps}
return {'maps': shrink_maps}
threshold_maps = self.thresh(x)
binary_maps = self.step_function(shrink_maps, threshold_maps)
y = torch.cat([shrink_maps, threshold_maps, binary_maps], dim=1)
return {'maps': y}
class LocalModule(nn.Module): class LocalModule(nn.Module):
def __init__(self, in_c, mid_c, use_distance=True): def __init__(self, in_c, mid_c, use_distance=True):
...@@ -101,7 +95,7 @@ class PFHeadLocal(DBHead): ...@@ -101,7 +95,7 @@ class PFHeadLocal(DBHead):
super(PFHeadLocal, self).__init__(in_channels, k, **kwargs) super(PFHeadLocal, self).__init__(in_channels, k, **kwargs)
self.mode = mode self.mode = mode
self.up_conv = nn.interpolate(scale_factor=2, mode="nearest") self.up_conv = nn.Upsample(scale_factor=2, mode="nearest")
if self.mode == 'large': if self.mode == 'large':
self.cbn_layer = LocalModule(in_channels // 4, in_channels // 4) self.cbn_layer = LocalModule(in_channels // 4, in_channels // 4)
elif self.mode == 'small': elif self.mode == 'small':
...@@ -112,10 +106,4 @@ class PFHeadLocal(DBHead): ...@@ -112,10 +106,4 @@ class PFHeadLocal(DBHead):
base_maps = shrink_maps base_maps = shrink_maps
cbn_maps = self.cbn_layer(self.up_conv(f), shrink_maps, None) cbn_maps = self.cbn_layer(self.up_conv(f), shrink_maps, None)
cbn_maps = F.sigmoid(cbn_maps) cbn_maps = F.sigmoid(cbn_maps)
if not self.training: return {'maps': 0.5 * (base_maps + cbn_maps), 'cbn_maps': cbn_maps}
return {'maps': 0.5 * (base_maps + cbn_maps), 'cbn_maps': cbn_maps} \ No newline at end of file
threshold_maps = self.thresh(x)
binary_maps = self.step_function(shrink_maps, threshold_maps)
y = torch.cat([cbn_maps, threshold_maps, binary_maps], dim=1)
return {'maps': y, 'distance_maps': cbn_maps, 'cbn_maps': binary_maps}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment