refactor(ocr): remove unused code and simplify model architecture

- Remove unused imports and code - Simplify model architecture by removing unnecessary components - Update initialization and forward pass logic - Rename variables for consistency

refactor(ocr): remove unused code and simplify model architecture
- Remove unused imports and code - Simplify model architecture by removing unnecessary components - Update initialization and forward pass logic - Rename variables for consistency
b3d6785d · myhloli · 3cb156f5 · b3d6785d · 3cb156f5 · 3cb156f5
Commit b3d6785d authored Apr 01, 2025 by myhloli
20 changed files
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
@@ -2,7 +2,8 @@ import os, sys
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
+from ..common import Activation
 class ConvBNLayer(nn.Module):

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_nrtr_mtb.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_nrtr_mtb.py
-import torch
-from torch import nn
-class MTB(nn.Module):
-    def __init__(self, cnn_num, in_channels):
-        super(MTB, self).__init__()
-        self.block = nn.Sequential()
-        self.out_channels = in_channels
-        self.cnn_num = cnn_num
-        if self.cnn_num == 2:
-            for i in range(self.cnn_num):
-                self.block.add_module(
-                    'conv_{}'.format(i),
-                    nn.Conv2d(
-                        in_channels=in_channels
-                        if i == 0 else 32 * (2**(i - 1)),
-                        out_channels=32 * (2**i),
-                        kernel_size=3,
-                        stride=2,
-                        padding=1))
-                self.block.add_module('relu_{}'.format(i), nn.ReLU())
-                self.block.add_module('bn_{}'.format(i),
-                                        nn.BatchNorm2d(32 * (2**i)))
-    def forward(self, images):
-        x = self.block(images)
-        if self.cnn_num == 2:
-            # (b, w, h, c)
-            x = x.permute(0, 3, 2, 1)
-            x_shape = x.shape
-            x = torch.reshape(
-                x, (x_shape[0], x_shape[1], x_shape[2] * x_shape[3]))
-        return x
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_resnet_31.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_resnet_31.py
-"""
-This code is refer from:
-https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/layers/conv_layer.py
-https://github.com/open-mmlab/mmocr/blob/main/mmocr/models/textrecog/backbones/resnet31_ocr.py
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-# import paddle
-# from paddle import ParamAttr
-# import paddle.nn as nn
-# import paddle.nn.functional as F
-__all__ = ["ResNet31"]
-def conv3x3(in_channel, out_channel, stride=1):
-    return nn.Conv2d(
-        in_channel,
-        out_channel,
-        kernel_size=3,
-        stride=stride,
-        padding=1,
-        bias=False)
-class BasicBlock(nn.Module):
-    expansion = 1
-    def __init__(self, in_channels, channels, stride=1, downsample=False):
-        super().__init__()
-        self.conv1 = conv3x3(in_channels, channels, stride)
-        self.bn1 = nn.BatchNorm2d(channels)
-        self.relu = nn.ReLU()
-        self.conv2 = conv3x3(channels, channels)
-        self.bn2 = nn.BatchNorm2d(channels)
-        self.downsample = downsample
-        if downsample:
-            self.downsample = nn.Sequential(
-                nn.Conv2d(
-                    in_channels,
-                    channels * self.expansion,
-                    1,
-                    stride,
-                    bias=False),
-                nn.BatchNorm2d(channels * self.expansion), )
-        else:
-            self.downsample = nn.Sequential()
-        self.stride = stride
-    def forward(self, x):
-        residual = x
-        out = self.conv1(x)
-        out = self.bn1(out)
-        out = self.relu(out)
-        out = self.conv2(out)
-        out = self.bn2(out)
-        if self.downsample:
-            residual = self.downsample(x)
-        out += residual
-        out = self.relu(out)
-        return out
-class ResNet31(nn.Module):
-    '''
-    Args:
-        in_channels (int): Number of channels of input image tensor.
-        layers (list[int]): List of BasicBlock number for each stage.
-        channels (list[int]): List of out_channels of Conv2d layer.
-        out_indices (None | Sequence[int]): Indices of output stages.
-        last_stage_pool (bool): If True, add `MaxPool2d` layer to last stage.
-    '''
-    def __init__(self,
-                 in_channels=3,
-                 layers=[1, 2, 5, 3],
-                 channels=[64, 128, 256, 256, 512, 512, 512],
-                 out_indices=None,
-                 last_stage_pool=False):
-        super(ResNet31, self).__init__()
-        assert isinstance(in_channels, int)
-        assert isinstance(last_stage_pool, bool)
-        self.out_indices = out_indices
-        self.last_stage_pool = last_stage_pool
-        # conv 1 (Conv Conv)
-        self.conv1_1 = nn.Conv2d(
-            in_channels, channels[0], kernel_size=3, stride=1, padding=1)
-        self.bn1_1 = nn.BatchNorm2d(channels[0])
-        self.relu1_1 = nn.ReLU(inplace=True)
-        self.conv1_2 = nn.Conv2d(
-            channels[0], channels[1], kernel_size=3, stride=1, padding=1)
-        self.bn1_2 = nn.BatchNorm2d(channels[1])
-        self.relu1_2 = nn.ReLU(inplace=True)
-        # conv 2 (Max-pooling, Residual block, Conv)
-        self.pool2 = nn.MaxPool2d(
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self.block2 = self._make_layer(channels[1], channels[2], layers[0])
-        self.conv2 = nn.Conv2d(
-            channels[2], channels[2], kernel_size=3, stride=1, padding=1)
-        self.bn2 = nn.BatchNorm2d(channels[2])
-        self.relu2 = nn.ReLU(inplace=True)
-        # conv 3 (Max-pooling, Residual block, Conv)
-        self.pool3 = nn.MaxPool2d(
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self.block3 = self._make_layer(channels[2], channels[3], layers[1])
-        self.conv3 = nn.Conv2d(
-            channels[3], channels[3], kernel_size=3, stride=1, padding=1)
-        self.bn3 = nn.BatchNorm2d(channels[3])
-        self.relu3 = nn.ReLU(inplace=True)
-        # conv 4 (Max-pooling, Residual block, Conv)
-        self.pool4 = nn.MaxPool2d(
-            kernel_size=(2, 1), stride=(2, 1), padding=0, ceil_mode=True)
-        self.block4 = self._make_layer(channels[3], channels[4], layers[2])
-        self.conv4 = nn.Conv2d(
-            channels[4], channels[4], kernel_size=3, stride=1, padding=1)
-        self.bn4 = nn.BatchNorm2d(channels[4])
-        self.relu4 = nn.ReLU(inplace=True)
-        # conv 5 ((Max-pooling), Residual block, Conv)
-        self.pool5 = None
-        if self.last_stage_pool:
-            self.pool5 = nn.MaxPool2d(
-                kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self.block5 = self._make_layer(channels[4], channels[5], layers[3])
-        self.conv5 = nn.Conv2d(
-            channels[5], channels[5], kernel_size=3, stride=1, padding=1)
-        self.bn5 = nn.BatchNorm2d(channels[5])
-        self.relu5 = nn.ReLU(inplace=True)
-        self.out_channels = channels[-1]
-    def _make_layer(self, input_channels, output_channels, blocks):
-        layers = []
-        for _ in range(blocks):
-            downsample = None
-            if input_channels != output_channels:
-                downsample = nn.Sequential(
-                    nn.Conv2d(
-                        input_channels,
-                        output_channels,
-                        kernel_size=1,
-                        stride=1,
-                        bias=False),
-                    nn.BatchNorm2d(output_channels), )
-            layers.append(
-                BasicBlock(
-                    input_channels, output_channels, downsample=downsample))
-            input_channels = output_channels
-        return nn.Sequential(*layers)
-    def forward(self, x):
-        x = self.conv1_1(x)
-        x = self.bn1_1(x)
-        x = self.relu1_1(x)
-        x = self.conv1_2(x)
-        x = self.bn1_2(x)
-        x = self.relu1_2(x)
-        outs = []
-        for i in range(4):
-            layer_index = i + 2
-            pool_layer = getattr(self, 'pool{}'.format(layer_index))
-            block_layer = getattr(self, 'block{}'.format(layer_index))
-            conv_layer = getattr(self, 'conv{}'.format(layer_index))
-            bn_layer = getattr(self, 'bn{}'.format(layer_index))
-            relu_layer = getattr(self, 'relu{}'.format(layer_index))
-            if pool_layer is not None:
-                x = pool_layer(x)
-            x = block_layer(x)
-            x = conv_layer(x)
-            x = bn_layer(x)
-            x = relu_layer(x)
-            outs.append(x)
-        if self.out_indices is not None:
-            return tuple([outs[i] for i in self.out_indices])
-        return x
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_resnet_fpn.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_resnet_fpn.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import os, sys
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
-__all__ = ["ResNetFPN"]
-class ResNetFPN(nn.Module):
-    def __init__(self, in_channels=1, layers=50, **kwargs):
-        super(ResNetFPN, self).__init__()
-        supported_layers = {
-            18: {
-                'depth': [2, 2, 2, 2],
-                'block_class': BasicBlock
-            },
-            34: {
-                'depth': [3, 4, 6, 3],
-                'block_class': BasicBlock
-            },
-            50: {
-                'depth': [3, 4, 6, 3],
-                'block_class': BottleneckBlock
-            },
-            101: {
-                'depth': [3, 4, 23, 3],
-                'block_class': BottleneckBlock
-            },
-            152: {
-                'depth': [3, 8, 36, 3],
-                'block_class': BottleneckBlock
-            }
-        }
-        stride_list = [(2, 2), (2, 2), (1, 1), (1, 1)]
-        num_filters = [64, 128, 256, 512]
-        self.depth = supported_layers[layers]['depth']
-        self.conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=64,
-            kernel_size=7,
-            stride=2,
-            act="relu",
-            name="conv1")
-        self.block_list = nn.ModuleList()
-        in_ch = 64
-        if layers >= 50:
-            for block in range(len(self.depth)):
-                for i in range(self.depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottlenectBlock = BottleneckBlock(
-                        in_channels=in_ch,
-                        out_channels=num_filters[block],
-                        stride=stride_list[block] if i == 0 else 1,
-                        name=conv_name)
-                    in_ch = num_filters[block] * 4
-                    self.block_list.add_module("bottleneckBlock_{}_{}".format(block, i), bottlenectBlock)
-        else:
-            for block in range(len(self.depth)):
-                for i in range(self.depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    if i == 0 and block != 0:
-                        stride = (2, 1)
-                    else:
-                        stride = (1, 1)
-                    basicBlock = BasicBlock(
-                            in_channels=in_ch,
-                            out_channels=num_filters[block],
-                            stride=stride_list[block] if i == 0 else 1,
-                            is_first=block == i == 0,
-                            name=conv_name)
-                    in_ch = basicBlock.out_channels
-                    self.block_list.add_module(conv_name, basicBlock)
-        out_ch_list = [in_ch // 4, in_ch // 2, in_ch]
-        self.base_block = nn.ModuleList()
-        self.conv_trans = []
-        self.bn_block = []
-        for i in [-2, -3]:
-            in_channels = out_ch_list[i + 1] + out_ch_list[i]
-            bb_0 = nn.Conv2d(
-                        in_channels=in_channels,
-                        out_channels=out_ch_list[i],
-                        kernel_size=1,
-                        bias=True)
-            self.base_block.add_module("F_{}_base_block_0".format(i), bb_0)
-            bb_1 = nn.Conv2d(
-                        in_channels=out_ch_list[i],
-                        out_channels=out_ch_list[i],
-                        kernel_size=3,
-                        padding=1,
-                        bias=True)
-            self.base_block.add_module("F_{}_base_block_1".format(i), bb_1)
-            bb_2 = nn.Sequential(
-                nn.BatchNorm2d(out_ch_list[i]),
-                Activation("relu")
-            )
-            self.base_block.add_module("F_{}_base_block_2".format(i), bb_2)
-        bb_3 = nn.Conv2d(
-                    in_channels=out_ch_list[i],
-                    out_channels=512,
-                    kernel_size=1,
-                    bias=True)
-        self.base_block.add_module("F_{}_base_block_3".format(i), bb_3)
-        self.out_channels = 512
-    def __call__(self, x):
-        x = self.conv(x)
-        fpn_list = []
-        F = []
-        for i in range(len(self.depth)):
-            fpn_list.append(np.sum(self.depth[:i + 1]))
-        for i, block in enumerate(self.block_list):
-            x = block(x)
-            for number in fpn_list:
-                if i + 1 == number:
-                    F.append(x)
-        base = F[-1]
-        j = 0
-        for i, block in enumerate(self.base_block):
-            if i % 3 == 0 and i < 6:
-                j = j + 1
-                b, c, w, h = F[-j - 1].shape
-                if [w, h] == list(base.shape[2:]):
-                    base = base
-                else:
-                    base = self.conv_trans[j - 1](base)
-                    base = self.bn_block[j - 1](base)
-                base = torch.cat([base, F[-j - 1]], dim=1)
-            base = block(base)
-        return base
-class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 groups=1,
-                 act=None,
-                 name=None):
-        super(ConvBNLayer, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=2 if stride == (1, 1) else kernel_size,
-            dilation=2 if stride == (1, 1) else 1,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False, )
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.act = act
-        if self.act is not None:
-            self._act = Activation(act_type=self.act, inplace=True)
-    def __call__(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.act is not None:
-            x = self._act(x)
-        return x
-class ShortCut(nn.Module):
-    def __init__(self, in_channels, out_channels, stride, name, is_first=False):
-        super(ShortCut, self).__init__()
-        self.use_conv = True
-        if in_channels != out_channels or stride != 1 or is_first == True:
-            if stride == (1, 1):
-                self.conv = ConvBNLayer(
-                    in_channels, out_channels, 1, 1, name=name)
-            else:  # stride==(2,2)
-                self.conv = ConvBNLayer(
-                    in_channels, out_channels, 1, stride, name=name)
-        else:
-            self.use_conv = False
-    def forward(self, x):
-        if self.use_conv:
-            x = self.conv(x)
-        return x
-class BottleneckBlock(nn.Module):
-    def __init__(self, in_channels, out_channels, stride, name):
-        super(BottleneckBlock, self).__init__()
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        self.conv2 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None,
-            name=name + "_branch2c")
-        self.short = ShortCut(
-            in_channels=in_channels,
-            out_channels=out_channels * 4,
-            stride=stride,
-            is_first=False,
-            name=name + "_branch1")
-        self.out_channels = out_channels * 4
-    def forward(self, x):
-        y = self.conv0(x)
-        y = self.conv1(y)
-        y = self.conv2(y)
-        y = y + self.short(x)
-        y = F.relu(y)
-        return y
-class BasicBlock(nn.Module):
-    def __init__(self, in_channels, out_channels, stride, name, is_first):
-        super(BasicBlock, self).__init__()
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act='relu',
-            stride=stride,
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None,
-            name=name + "_branch2b")
-        self.short = ShortCut(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            stride=stride,
-            is_first=is_first,
-            name=name + "_branch1")
-        self.out_channels = out_channels
-    def forward(self, x):
-        y = self.conv0(x)
-        y = self.conv1(y)
-        y = y + self.short(x)
-        return F.relu(y)
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_resnet_vd.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_resnet_vd.py
-import os, sys
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
-class ConvBNLayer(nn.Module):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            groups=1,
-            is_vd_mode=False,
-            act=None,
-            name=None, ):
-        super(ConvBNLayer, self).__init__()
-        self.act = act
-        self.is_vd_mode = is_vd_mode
-        self._pool2d_avg = nn.AvgPool2d(
-            kernel_size=stride, stride=stride, padding=0, ceil_mode=True)
-        self._conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=1 if is_vd_mode else stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False)
-        self._batch_norm = nn.BatchNorm2d(
-            out_channels,)
-        if self.act is not None:
-            self._act = Activation(act_type=act, inplace=True)
-    def forward(self, inputs):
-        if self.is_vd_mode:
-            inputs = self._pool2d_avg(inputs)
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if self.act is not None:
-            y = self._act(y)
-        return y
-class BottleneckBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BottleneckBlock, self).__init__()
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        self.conv2 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None,
-            name=name + "_branch2c")
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels * 4,
-                kernel_size=1,
-                stride=stride,
-                is_vd_mode=not if_first and stride[0] != 1,
-                name=name + "_branch1")
-        self.shortcut = shortcut
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = short + conv2
-        y = F.relu(y)
-        return y
-class BasicBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None,
-            name=name + "_branch2b")
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=stride,
-                is_vd_mode=not if_first and stride[0] != 1,
-                name=name + "_branch1")
-        self.shortcut = shortcut
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = short + conv1
-        y = F.relu(y)
-        return y
-class ResNet(nn.Module):
-    def __init__(self, in_channels=3, layers=50, **kwargs):
-        super(ResNet, self).__init__()
-        self.layers = layers
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512,
-                        1024] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-        self.conv1_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=32,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_1")
-        self.conv1_2 = ConvBNLayer(
-            in_channels=32,
-            out_channels=32,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_2")
-        self.conv1_3 = ConvBNLayer(
-            in_channels=32,
-            out_channels=64,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_3")
-        self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        # self.block_list = list()
-        self.block_list = nn.Sequential()
-        if layers >= 50:
-            for block in range(len(depth)):
-                shortcut = False
-                for i in range(depth[block]):
-                    if layers in [101, 152, 200] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    if i == 0 and block != 0:
-                        stride = (2, 1)
-                    else:
-                        stride = (1, 1)
-                    bottleneck_block = BottleneckBlock(in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
-                                                       out_channels=num_filters[block],
-                                                       stride=stride,
-                                                       shortcut=shortcut,
-                                                       if_first=block == i == 0,
-                                                       name=conv_name)
-                    shortcut = True
-                    # self.block_list.append(bottleneck_block)
-                    self.block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
-                self.out_channels = num_filters[block]
-        else:
-            for block in range(len(depth)):
-                shortcut = False
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    if i == 0 and block != 0:
-                        stride = (2, 1)
-                    else:
-                        stride = (1, 1)
-                    basic_block = BasicBlock(in_channels=num_channels[block] if i == 0 else num_filters[block],
-                                             out_channels=num_filters[block],
-                                             stride=stride,
-                                             shortcut=shortcut,
-                                             if_first=block == i == 0,
-                                             name=conv_name)
-                    shortcut = True
-                    # self.block_list.append(basic_block)
-                    self.block_list.add_module('bb_%d_%d' % (block, i), basic_block)
-                self.out_channels = num_filters[block]
-        self.out_pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
-    def forward(self, inputs):
-        y = self.conv1_1(inputs)
-        y = self.conv1_2(y)
-        y = self.conv1_3(y)
-        y = self.pool2d_max(y)
-        for block in self.block_list:
-            y = block(y)
-        y = self.out_pool(y)
-        return y
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_vitstr.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_vitstr.py
-"""
-This code is refer from:
-https://github.com/roatienza/deep-text-recognition-benchmark/blob/master/modules/vitstr.py
-"""
-import numpy as np
-import torch
-import torch.nn as nn
-from pytorchocr.modeling.backbones.rec_svtrnet import Block, PatchEmbed
-# import paddle
-# import paddle.nn as nn
-# from ppocr.modeling.backbones.rec_svtrnet import Block, PatchEmbed, zeros_, trunc_normal_, ones_
-scale_dim_heads = {'tiny': [192, 3], 'small': [384, 6], 'base': [768, 12]}
-class ViTSTR(nn.Module):
-    def __init__(self,
-                 img_size=[224, 224],
-                 in_channels=1,
-                 scale='tiny',
-                 seqlen=27,
-                 patch_size=[16, 16],
-                 embed_dim=None,
-                 depth=12,
-                 num_heads=None,
-                 mlp_ratio=4,
-                 qkv_bias=True,
-                 qk_scale=None,
-                 drop_path_rate=0.,
-                 drop_rate=0.,
-                 attn_drop_rate=0.,
-                 norm_layer='nn.LayerNorm',
-                 act_layer='gelu',
-                 epsilon=1e-6,
-                 out_channels=None,
-                 **kwargs):
-        super().__init__()
-        self.seqlen = seqlen
-        embed_dim = embed_dim if embed_dim is not None else scale_dim_heads[
-            scale][0]
-        num_heads = num_heads if num_heads is not None else scale_dim_heads[
-            scale][1]
-        out_channels = out_channels if out_channels is not None else embed_dim
-        self.patch_embed = PatchEmbed(
-            img_size=img_size,
-            in_channels=in_channels,
-            embed_dim=embed_dim,
-            patch_size=patch_size,
-            mode='linear')
-        num_patches = self.patch_embed.num_patches
-        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
-        self.pos_drop = nn.Dropout(p=drop_rate)
-        dpr = np.linspace(0, drop_path_rate, depth)
-        self.blocks = nn.ModuleList([
-            Block(
-                dim=embed_dim,
-                num_heads=num_heads,
-                mlp_ratio=mlp_ratio,
-                qkv_bias=qkv_bias,
-                qk_scale=qk_scale,
-                drop=drop_rate,
-                attn_drop=attn_drop_rate,
-                drop_path=dpr[i],
-                norm_layer=norm_layer,
-                act_layer=act_layer,
-                epsilon=epsilon,
-                prenorm=False) for i in range(depth)
-        ])
-        self.norm = eval(norm_layer)(embed_dim, eps=epsilon)
-        self.out_channels = out_channels
-        torch.nn.init.xavier_normal_(self.pos_embed)
-        torch.nn.init.xavier_normal_(self.cls_token)
-        self.apply(self._init_weights)
-    def _init_weights(self, m):
-        # weight initialization
-        if isinstance(m, nn.Conv2d):
-            nn.init.kaiming_normal_(m.weight, mode='fan_out')
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.BatchNorm2d):
-            nn.init.ones_(m.weight)
-            nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.Linear):
-            nn.init.normal_(m.weight, 0, 0.01)
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.ConvTranspose2d):
-            nn.init.kaiming_normal_(m.weight, mode='fan_out')
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.ones_(m.weight)
-            nn.init.zeros_(m.bias)
-    def forward_features(self, x):
-        B = x.shape[0]
-        x = self.patch_embed(x)
-        # cls_tokens = paddle.tile(self.cls_token, repeat_times=[B, 1, 1])
-        cls_tokens = self.cls_token.repeat(B, 1, 1)
-        x = torch.cat((cls_tokens, x), dim=1)
-        x = x + self.pos_embed
-        x = self.pos_drop(x)
-        for blk in self.blocks:
-            x = blk(x)
-        x = self.norm(x)
-        return x
-    def forward(self, x):
-        x = self.forward_features(x)
-        x = x[:, :self.seqlen]
-        return x.permute(0, 2, 1).unsqueeze(2)
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/table_mobilenet_v3.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/table_mobilenet_v3.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-__all__ = ['MobileNetV3']
-def make_divisible(v, divisor=8, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-def hard_sigmoid(x, slope=0.1666667, offset=0.5,):
-    return torch.clamp(slope * x + offset, 0., 1.)
-def hard_swish(x, inplace=True):
-    return x * F.relu6(x + 3., inplace=inplace) / 6.
-class MobileNetV3(nn.Module):
-    def __init__(self,
-                 in_channels=3,
-                 model_name='large',
-                 scale=0.5,
-                 disable_se=False,
-                 **kwargs):
-        """
-        the MobilenetV3 backbone network for detection module.
-        Args:
-            params(dict): the super parameters for build network
-        """
-        super(MobileNetV3, self).__init__()
-        self.disable_se = disable_se
-        if model_name == "large":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, 'relu', 1],
-                [3, 64, 24, False, 'relu', 2],
-                [3, 72, 24, False, 'relu', 1],
-                [5, 72, 40, True, 'relu', 2],
-                [5, 120, 40, True, 'relu', 1],
-                [5, 120, 40, True, 'relu', 1],
-                [3, 240, 80, False, 'hardswish', 2],
-                [3, 200, 80, False, 'hardswish', 1],
-                [3, 184, 80, False, 'hardswish', 1],
-                [3, 184, 80, False, 'hardswish', 1],
-                [3, 480, 112, True, 'hardswish', 1],
-                [3, 672, 112, True, 'hardswish', 1],
-                [5, 672, 160, True, 'hardswish', 2],
-                [5, 960, 160, True, 'hardswish', 1],
-                [5, 960, 160, True, 'hardswish', 1],
-            ]
-            cls_ch_squeeze = 960
-        elif model_name == "small":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, 'relu', 2],
-                [3, 72, 24, False, 'relu', 2],
-                [3, 88, 24, False, 'relu', 1],
-                [5, 96, 40, True, 'hardswish', 2],
-                [5, 240, 40, True, 'hardswish', 1],
-                [5, 240, 40, True, 'hardswish', 1],
-                [5, 120, 48, True, 'hardswish', 1],
-                [5, 144, 48, True, 'hardswish', 1],
-                [5, 288, 96, True, 'hardswish', 2],
-                [5, 576, 96, True, 'hardswish', 1],
-                [5, 576, 96, True, 'hardswish', 1],
-            ]
-            cls_ch_squeeze = 576
-        else:
-            raise NotImplementedError("mode[" + model_name +
-                                      "_model] is not implemented!")
-        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert scale in supported_scale, \
-            "supported scale are {} but input scale is {}".format(supported_scale, scale)
-        inplanes = 16
-        # conv1
-        self.conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=make_divisible(inplanes * scale),
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=1,
-            if_act=True,
-            act='hardswish',
-            name='conv1')
-        self.stages = nn.ModuleList()
-        self.out_channels = []
-        block_list = []
-        i = 0
-        inplanes = make_divisible(inplanes * scale)
-        for (k, exp, c, se, nl, s) in cfg:
-            se = se and not self.disable_se
-            start_idx = 2 if model_name == 'large' else 0
-            if s == 2 and i > start_idx:
-                self.out_channels.append(inplanes)
-                self.stages.append(nn.Sequential(*block_list))
-                block_list = []
-            block_list.append(
-                ResidualUnit(
-                    in_channels=inplanes,
-                    mid_channels=make_divisible(scale * exp),
-                    out_channels=make_divisible(scale * c),
-                    kernel_size=k,
-                    stride=s,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2)))
-            inplanes = make_divisible(scale * c)
-            i += 1
-        block_list.append(
-            ConvBNLayer(
-                in_channels=inplanes,
-                out_channels=make_divisible(scale * cls_ch_squeeze),
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                groups=1,
-                if_act=True,
-                act='hardswish',
-                name='conv_last'))
-        self.stages.append(nn.Sequential(*block_list))
-        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
-        # for i, stage in enumerate(self.stages):
-        #     self.add_module(module=stage, name="stage{}".format(i))
-    def forward(self, x):
-        x = self.conv(x)
-        out_list = []
-        for stage in self.stages:
-            x = stage(x)
-            out_list.append(x)
-        return out_list
-class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 padding,
-                 groups=1,
-                 if_act=True,
-                 act=None,
-                 name=None):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.act = act
-        self.conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias=False)
-        self.bn = nn.BatchNorm2d(
-            out_channels,
-        )
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.if_act:
-            if self.act == "relu":
-                x = F.relu(x)
-            elif self.act == "hardswish":
-                x = hard_swish(x)
-            else:
-                print("The activation function({}) is selected incorrectly.".
-                      format(self.act))
-                exit()
-        return x
-class ResidualUnit(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 mid_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 use_se,
-                 act=None,
-                 name=''):
-        super(ResidualUnit, self).__init__()
-        self.if_shortcut = stride == 1 and in_channels == out_channels
-        self.if_se = use_se
-        self.expand_conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=mid_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + "_expand")
-        self.bottleneck_conv = ConvBNLayer(
-            in_channels=mid_channels,
-            out_channels=mid_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=int((kernel_size - 1) // 2),
-            groups=mid_channels,
-            if_act=True,
-            act=act,
-            name=name + "_depthwise")
-        if self.if_se:
-            self.mid_se = SEModule(mid_channels, name=name + "_se")
-        self.linear_conv = ConvBNLayer(
-            in_channels=mid_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            if_act=False,
-            act=None,
-            name=name + "_linear")
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
-        x = self.bottleneck_conv(x)
-        if self.if_se:
-            x = self.mid_se(x)
-        x = self.linear_conv(x)
-        if self.if_shortcut:
-            x = torch.add(inputs, x)
-        return x
-class SEModule(nn.Module):
-    def __init__(self, in_channels, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv1 = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=in_channels // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=True)
-        self.conv2 = nn.Conv2d(
-            in_channels=in_channels // reduction,
-            out_channels=in_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=True)
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = hard_sigmoid(outputs, slope=0.2, offset=0.5)
-        return inputs * outputs
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/table_resnet_vd.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/table_resnet_vd.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
+from torch import nn
 class Hswish(nn.Module):
    def __init__(self, inplace=True):
@@ -10,7 +9,8 @@ class Hswish(nn.Module):
        self.inplace = inplace
    def forward(self, x):
-        return x * F.relu6(x + 3., inplace=self.inplace) / 6.
+        return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
 # out = max(0, min(1, slop*x+offset))
 # paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
@@ -22,7 +22,8 @@ class Hsigmoid(nn.Module):
    def forward(self, x):
        # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
        # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
-        return F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
+        return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
 class GELU(nn.Module):
    def __init__(self, inplace=True):
@@ -43,28 +44,30 @@ class Swish(nn.Module):
            x.mul_(torch.sigmoid(x))
            return x
        else:
-            return x*torch.sigmoid(x)
+            return x * torch.sigmoid(x)
 class Activation(nn.Module):
    def __init__(self, act_type, inplace=True):
        super(Activation, self).__init__()
        act_type = act_type.lower()
-        if act_type == 'relu':
+        if act_type == "relu":
            self.act = nn.ReLU(inplace=inplace)
-        elif act_type == 'relu6':
+        elif act_type == "relu6":
            self.act = nn.ReLU6(inplace=inplace)
-        elif act_type == 'sigmoid':
+        elif act_type == "sigmoid":
            raise NotImplementedError
-        elif act_type == 'hard_sigmoid':
+        elif act_type == "hard_sigmoid":
-            self.act = Hsigmoid(inplace)#nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
+            self.act = Hsigmoid(
-        elif act_type == 'hard_swish' or act_type == 'hswish':
+                inplace
+            )  # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
+        elif act_type == "hard_swish" or act_type == "hswish":
            self.act = Hswish(inplace=inplace)
-        elif act_type == 'leakyrelu':
+        elif act_type == "leakyrelu":
            self.act = nn.LeakyReLU(inplace=inplace)
-        elif act_type == 'gelu':
+        elif act_type == "gelu":
            self.act = GELU(inplace=inplace)
-        elif act_type == 'swish':
+        elif act_type == "swish":
            self.act = Swish(inplace=inplace)
        else:
            raise NotImplementedError

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
@@ -12,40 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__all__ = ['build_head']
+__all__ = ["build_head"]
 def build_head(config, **kwargs):
    # det head
    from .det_db_head import DBHead, PFHeadLocal
-    from .det_east_head import EASTHead
-    from .det_sast_head import SASTHead
-    from .det_pse_head import PSEHead
-    from .det_fce_head import FCEHead
-    from .e2e_pg_head import PGHead
    # rec head
    from .rec_ctc_head import CTCHead
-    from .rec_att_head import AttentionHead
-    from .rec_srn_head import SRNHead
-    from .rec_nrtr_head import Transformer
-    from .rec_sar_head import SARHead
-    from .rec_can_head import CANHead
    from .rec_multi_head import MultiHead
    # cls head
    from .cls_head import ClsHead
-    support_dict = [
-        'DBHead', 'PSEHead', 'EASTHead', 'SASTHead', 'CTCHead', 'ClsHead', 'AttentionHead',
-        'SRNHead', 'PGHead', 'Transformer', 'TableAttentionHead','SARHead', 'FCEHead',
-        'CANHead', 'MultiHead', 'PFHeadLocal',
+    support_dict = [
+        "DBHead",
+        "CTCHead",
+        "ClsHead",
+        "MultiHead",
+        "PFHeadLocal",
    ]
-    from .table_att_head import TableAttentionHead
+    module_name = config.pop("name")
+    char_num = config.pop("char_num", 6625)
-    module_name = config.pop('name')
+    assert module_name in support_dict, Exception(
-    assert module_name in support_dict, Exception('head only support {}'.format(
+        "head only support {}".format(support_dict)
-        support_dict))
+    )
    module_class = eval(module_name)(**config, **kwargs)
    return module_class
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_east_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_east_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_fce_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_fce_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_pse_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_pse_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_sast_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_sast_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/e2e_pg_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/e2e_pg_head.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/multiheadAttention.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/multiheadAttention.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_att_head.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_att_head.py