refactor(ocr): remove unused code and simplify model architecture

- Remove unused imports and code - Simplify model architecture by removing unnecessary components - Update initialization and forward pass logic - Rename variables for consistency

refactor(ocr): remove unused code and simplify model architecture
- Remove unused imports and code - Simplify model architecture by removing unnecessary components - Update initialization and forward pass logic - Rename variables for consistency
b3d6785d · myhloli · 3cb156f5 · b3d6785d · b3d6785d · b3d6785d
Commit b3d6785d authored Apr 01, 2025 by myhloli
20 changed files
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
@@ -6,10 +6,9 @@ import numpy as np
 from loguru import logger

 from magic_pdf.libs.config_reader import get_device
-from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils import check_img, preprocess_image, sorted_boxes, \
-    merge_det_boxes, update_det_boxes, get_rotate_crop_image
-from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.tools.infer.predict_system import TextSystem
-import tools.infer.pytorchocr_utility as utility
+from .ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
+from .tools.infer.predict_system import TextSystem
+from .tools.infer import pytorchocr_utility as utility
 import argparse


@@ -20,14 +19,9 @@ class PytorchPaddleOCR(TextSystem):

        self.lang = kwargs.get('lang', 'ch')

-        # kwargs['cls_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_mobile_v2.0_cls_infer.pth"
-
        if self.lang == 'ch':
            kwargs['det_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_det_infer.pth"
            kwargs['rec_model_path'] = "/Users/myhloli/Downloads/ch_ptocr_v4_rec_infer.pth"
-            kwargs['det_yaml_path'] = "/Users/myhloli/Downloads/PaddleOCR2Pytorch-main/configs/det/ch_PP-OCRv4/ch_PP-OCRv4_det_student.yml"
-            kwargs['rec_yaml_path'] = "/Users/myhloli/Downloads/PaddleOCR2Pytorch-main/configs/rec/PP-OCRv4/ch_PP-OCRv4_rec.yml"
-            kwargs['rec_image_shape'] = '3,48,320'

        kwargs['device'] = get_device()


--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py
-import os, sys
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from collections import OrderedDict
-import numpy as np
-import cv2
+import os
 import torch
-
-from pytorchocr.modeling.architectures.base_model import BaseModel
+from .modeling.architectures.base_model import BaseModel

 class BaseOCRV20:
    def __init__(self, config, **kwargs):
@@ -17,45 +12,6 @@ class BaseOCRV20:
    def build_net(self, **kwargs):
        self.net = BaseModel(self.config, **kwargs)

-
-    def load_paddle_weights(self, weights_path):
-        raise NotImplementedError('implemented in converter.')
-        print('paddle weights loading...')
-        import paddle.fluid as fluid
-        with fluid.dygraph.guard():
-            para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
-
-        for k,v in self.net.state_dict().items():
-            name = k
-
-            if name.endswith('num_batches_tracked'):
-                continue
-
-            if name.endswith('running_mean'):
-                ppname = name.replace('running_mean', '_mean')
-            elif name.endswith('running_var'):
-                ppname = name.replace('running_var', '_variance')
-            elif name.endswith('bias') or name.endswith('weight'):
-                ppname = name
-            elif 'lstm' in name:
-                ppname = name
-
-            else:
-                print('Redundance:')
-                print(name)
-                raise ValueError
-            try:
-                if ppname.endswith('fc.weight'):
-                    self.net.state_dict()[k].copy_(torch.Tensor(para_state_dict[ppname].T))
-                else:
-                    self.net.state_dict()[k].copy_(torch.Tensor(para_state_dict[ppname]))
-            except Exception as e:
-                print('pytorch: {}, {}'.format(k, v.size()))
-                print('paddle: {}, {}'.format(ppname, para_state_dict[ppname].shape))
-                raise e
-
-        print('model is loaded: {}'.format(weights_path))
-
    def read_pytorch_weights(self, weights_path):
        if not os.path.exists(weights_path):
            raise FileNotFoundError('{} is not existed.'.format(weights_path))
@@ -74,38 +30,9 @@ class BaseOCRV20:
        print('weights is loaded.')

    def load_pytorch_weights(self, weights_path):
-        self.net.load_state_dict(torch.load(weights_path))
+        self.net.load_state_dict(torch.load(weights_path, weights_only=True))
        print('model is loaded: {}'.format(weights_path))

-
-    def save_pytorch_weights(self, weights_path):
-        try:
-            torch.save(self.net.state_dict(), weights_path, _use_new_zipfile_serialization=False)
-        except:
-            torch.save(self.net.state_dict(), weights_path) # _use_new_zipfile_serialization=False for torch>=1.6.0
-        print('model is saved: {}'.format(weights_path))
-
-
-    def print_pytorch_state_dict(self):
-        print('pytorch:')
-        for k,v in self.net.state_dict().items():
-            print('{}----{}'.format(k,type(v)))
-
-    def read_paddle_weights(self, weights_path):
-        import paddle.fluid as fluid
-        with fluid.dygraph.guard():
-            para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
-        return para_state_dict, opti_state_dict
-
-    def print_paddle_state_dict(self, weights_path):
-        import paddle.fluid as fluid
-        with fluid.dygraph.guard():
-            para_state_dict, opti_state_dict = fluid.load_dygraph(weights_path)
-        print('paddle"')
-        for k,v in para_state_dict.items():
-            print('{}----{}'.format(k,type(v)))
-
-
    def inference(self, inputs):
        with torch.no_grad():
            infer = self.net(inputs)

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py
@@ -18,7 +18,6 @@ import copy
 # from paddle.io import Dataset, DataLoader, BatchSampler, DistributedBatchSampler
 # import paddle.distributed as dist

-from pytorchocr.data.imaug import transform, create_operators
-# from pytorchocr.data.simple_dataset import SimpleDataSet
-# from pytorchocr.data.lmdb_dataset import LMDBDateSet
+from .imaug import transform, create_operators
+

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
@@ -15,7 +15,7 @@ from .operators import *

 # from .east_process import *
 # from .sast_process import *
-from .gen_table_mask import *
+# from .gen_table_mask import *

 def transform(data, ops=None):
    """ transform """

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/gen_table_mask.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/gen_table_mask.py
-"""
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import sys
-import six
-import cv2
-import numpy as np
-
-
-class GenTableMask(object):
-    """ gen table mask """
-
-    def __init__(self, shrink_h_max, shrink_w_max, mask_type=0, **kwargs):
-        self.shrink_h_max = 5
-        self.shrink_w_max = 5
-        self.mask_type = mask_type
-
-    def projection(self, erosion, h, w, spilt_threshold=0):
-        # 水平投影
-        projection_map = np.ones_like(erosion)
-        project_val_array = [0 for _ in range(0, h)]
-
-        for j in range(0, h):
-            for i in range(0, w):
-                if erosion[j, i] == 255:
-                    project_val_array[j] += 1
-        # 根据数组，获取切割点
-        start_idx = 0  # 记录进入字符区的索引
-        end_idx = 0  # 记录进入空白区域的索引
-        in_text = False  # 是否遍历到了字符区内
-        box_list = []
-        for i in range(len(project_val_array)):
-            if in_text == False and project_val_array[i] > spilt_threshold:  # 进入字符区了
-                in_text = True
-                start_idx = i
-            elif project_val_array[i] <= spilt_threshold and in_text == True:  # 进入空白区了
-                end_idx = i
-                in_text = False
-                if end_idx - start_idx <= 2:
-                    continue
-                box_list.append((start_idx, end_idx + 1))
-
-        if in_text:
-            box_list.append((start_idx, h - 1))
-        # 绘制投影直方图
-        for j in range(0, h):
-            for i in range(0, project_val_array[j]):
-                projection_map[j, i] = 0
-        return box_list, projection_map
-
-    def projection_cx(self, box_img):
-        box_gray_img = cv2.cvtColor(box_img, cv2.COLOR_BGR2GRAY)
-        h, w = box_gray_img.shape
-        # 灰度图片进行二值化处理
-        ret, thresh1 = cv2.threshold(box_gray_img, 200, 255, cv2.THRESH_BINARY_INV)
-        # 纵向腐蚀
-        if h < w:
-            kernel = np.ones((2, 1), np.uint8)
-            erode = cv2.erode(thresh1, kernel, iterations=1)
-        else:
-            erode = thresh1
-        # 水平膨胀
-        kernel = np.ones((1, 5), np.uint8)
-        erosion = cv2.dilate(erode, kernel, iterations=1)
-        # 水平投影
-        projection_map = np.ones_like(erosion)
-        project_val_array = [0 for _ in range(0, h)]
-
-        for j in range(0, h):
-            for i in range(0, w):
-                if erosion[j, i] == 255:
-                    project_val_array[j] += 1
-        # 根据数组，获取切割点
-        start_idx = 0  # 记录进入字符区的索引
-        end_idx = 0  # 记录进入空白区域的索引
-        in_text = False  # 是否遍历到了字符区内
-        box_list = []
-        spilt_threshold = 0
-        for i in range(len(project_val_array)):
-            if in_text == False and project_val_array[i] > spilt_threshold:  # 进入字符区了
-                in_text = True
-                start_idx = i
-            elif project_val_array[i] <= spilt_threshold and in_text == True:  # 进入空白区了
-                end_idx = i
-                in_text = False
-                if end_idx - start_idx <= 2:
-                    continue
-                box_list.append((start_idx, end_idx + 1))
-
-        if in_text:
-            box_list.append((start_idx, h - 1))
-        # 绘制投影直方图
-        for j in range(0, h):
-            for i in range(0, project_val_array[j]):
-                projection_map[j, i] = 0
-        split_bbox_list = []
-        if len(box_list) > 1:
-            for i, (h_start, h_end) in enumerate(box_list):
-                if i == 0:
-                    h_start = 0
-                if i == len(box_list):
-                    h_end = h
-                word_img = erosion[h_start:h_end + 1, :]
-                word_h, word_w = word_img.shape
-                w_split_list, w_projection_map = self.projection(word_img.T, word_w, word_h)
-                w_start, w_end = w_split_list[0][0], w_split_list[-1][1]
-                if h_start > 0:
-                    h_start -= 1
-                h_end += 1
-                word_img = box_img[h_start:h_end + 1:, w_start:w_end + 1, :]
-                split_bbox_list.append([w_start, h_start, w_end, h_end])
-        else:
-            split_bbox_list.append([0, 0, w, h])
-        return split_bbox_list
-
-    def shrink_bbox(self, bbox):
-        left, top, right, bottom = bbox
-        sh_h = min(max(int((bottom - top) * 0.1), 1), self.shrink_h_max)
-        sh_w = min(max(int((right - left) * 0.1), 1), self.shrink_w_max)
-        left_new = left + sh_w
-        right_new = right - sh_w
-        top_new = top + sh_h
-        bottom_new = bottom - sh_h
-        if left_new >= right_new:
-            left_new = left
-            right_new = right
-        if top_new >= bottom_new:
-            top_new = top
-            bottom_new = bottom
-        return [left_new, top_new, right_new, bottom_new]
-
-    def __call__(self, data):
-        img = data['image']
-        cells = data['cells']
-        height, width = img.shape[0:2]
-        if self.mask_type == 1:
-            mask_img = np.zeros((height, width), dtype=np.float32)
-        else:
-            mask_img = np.zeros((height, width, 3), dtype=np.float32)
-        cell_num = len(cells)
-        for cno in range(cell_num):
-            if "bbox" in cells[cno]:
-                bbox = cells[cno]['bbox']
-                left, top, right, bottom = bbox
-                box_img = img[top:bottom, left:right, :].copy()
-                split_bbox_list = self.projection_cx(box_img)
-                for sno in range(len(split_bbox_list)):
-                    split_bbox_list[sno][0] += left
-                    split_bbox_list[sno][1] += top
-                    split_bbox_list[sno][2] += left
-                    split_bbox_list[sno][3] += top
-
-                for sno in range(len(split_bbox_list)):
-                    left, top, right, bottom = split_bbox_list[sno]
-                    left, top, right, bottom = self.shrink_bbox([left, top, right, bottom])
-                    if self.mask_type == 1:
-                        mask_img[top:bottom, left:right] = 1.0
-                        data['mask_img'] = mask_img
-                    else:
-                        mask_img[top:bottom, left:right, :] = (255, 255, 255)
-                        data['image'] = mask_img
-        return data
-
-
-class ResizeTableImage(object):
-    def __init__(self, max_len, **kwargs):
-        super(ResizeTableImage, self).__init__()
-        self.max_len = max_len
-
-    def get_img_bbox(self, cells):
-        bbox_list = []
-        if len(cells) == 0:
-            return bbox_list
-        cell_num = len(cells)
-        for cno in range(cell_num):
-            if "bbox" in cells[cno]:
-                bbox = cells[cno]['bbox']
-                bbox_list.append(bbox)
-        return bbox_list
-
-    def resize_img_table(self, img, bbox_list, max_len):
-        height, width = img.shape[0:2]
-        ratio = max_len / (max(height, width) * 1.0)
-        resize_h = int(height * ratio)
-        resize_w = int(width * ratio)
-        img_new = cv2.resize(img, (resize_w, resize_h))
-        bbox_list_new = []
-        for bno in range(len(bbox_list)):
-            left, top, right, bottom = bbox_list[bno].copy()
-            left = int(left * ratio)
-            top = int(top * ratio)
-            right = int(right * ratio)
-            bottom = int(bottom * ratio)
-            bbox_list_new.append([left, top, right, bottom])
-        return img_new, bbox_list_new
-
-    def __call__(self, data):
-        img = data['image']
-        if 'cells' not in data:
-            cells = []
-        else:
-            cells = data['cells']
-        bbox_list = self.get_img_bbox(cells)
-        img_new, bbox_list_new = self.resize_img_table(img, bbox_list, self.max_len)
-        data['image'] = img_new
-        cell_num = len(cells)
-        bno = 0
-        for cno in range(cell_num):
-            if "bbox" in data['cells'][cno]:
-                data['cells'][cno]['bbox'] = bbox_list_new[bno]
-                bno += 1
-        data['max_len'] = self.max_len
-        return data
-
-
-class PaddingTableImage(object):
-    def __init__(self, **kwargs):
-        super(PaddingTableImage, self).__init__()
-
-    def __call__(self, data):
-        img = data['image']
-        max_len = data['max_len']
-        padding_img = np.zeros((max_len, max_len, 3), dtype=np.float32)
-        height, width = img.shape[0:2]
-        padding_img[0:height, 0:width, :] = img.copy()
-        data['image'] = padding_img
-        return data
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/arch_config.yaml
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/arch_config.yaml
+ch_ptocr_mobile_v2.0_cls_infer:
+  model_type: cls
+  algorithm: CLS
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.35
+    model_name: small
+  Neck:
+  Head:
+    name: ClsHead
+    class_dim: 2
+
+Multilingual_PP-OCRv3_det_infer:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+    disable_se: True
+  Neck:
+    name: RSEFPN
+    out_channels: 96
+    shortcut: True
+  Head:
+    name: DBHead
+    k: 50
+
+en_PP-OCRv3_det_infer:
+  model_type: det
+  algorithm: DB
+  Transform:
+  Backbone:
+    name: MobileNetV3
+    scale: 0.5
+    model_name: large
+    disable_se: True
+  Neck:
+    name: RSEFPN
+    out_channels: 96
+    shortcut: True
+  Head:
+    name: DBHead
+    k: 50
+
+en_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR_LCNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 97 #'blank' + ...(62) + ' '
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+ch_PP-OCRv4_det_infer:
+  model_type: det
+  algorithm: DB
+  Transform: null
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.75
+    det: True
+  Neck:
+    name: RSEFPN
+    out_channels: 96
+    shortcut: True
+  Head:
+    name: DBHead
+    k: 50
+
+ch_PP-OCRv4_det_server_infer:
+  model_type: det
+  algorithm: DB
+  Transform: null
+  Backbone:
+    name: PPHGNet_small
+    det: True
+  Neck:
+    name: LKPAN
+    out_channels: 256
+    intracl: true
+  Head:
+    name: PFHeadLocal
+    k: 50
+    mode: "large"
+
+ch_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR_LCNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+ch_PP-OCRv4_rec_server_infer:
+  model_type: rec
+  algorithm: SVTR_HGNet
+  Transform:
+  Backbone:
+    name: PPHGNet_small
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+chinese_cht_PP-OCRv3_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [1, 2]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 8423
+    fc_decay: 0.00001
+
+latin_PP-OCRv3_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 187
+    fc_decay: 0.00001
+
+cyrillic_PP-OCRv3_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 165
+    fc_decay: 0.00001
+
+arabic_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 164
+    fc_decay: 0.00001
+
+korean_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 3690
+    fc_decay: 0.00001
+
+japan_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 4401
+    fc_decay: 0.00001
+
+ta_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 130
+    fc_decay: 0.00001
+
+te_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 153
+    fc_decay: 0.00001
+
+ka_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 155
+    fc_decay: 0.00001
+
+devanagari_PP-OCRv4_rec_infer:
+  model_type: rec
+  algorithm: SVTR
+  Transform:
+  Backbone:
+    name: MobileNetV1Enhance
+    scale: 0.5
+    last_conv_stride: [ 1, 2 ]
+    last_pool_type: avg
+  Neck:
+    name: SequenceEncoder
+    encoder_type: svtr
+    dims: 64
+    depth: 2
+    hidden_dims: 120
+    use_guide: True
+  Head:
+    name: CTCHead
+    out_channels: 169
+    fc_decay: 0.00001
+
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
@@ -14,7 +14,7 @@

 import copy

-__all__ = ['build_model']
+__all__ = ["build_model"]


 def build_model(config, **kwargs):
@@ -22,4 +22,4 @@ def build_model(config, **kwargs):

    config = copy.deepcopy(config)
    module_class = BaseModel(config, **kwargs)
-    return module_class
\ No newline at end of file
+    return module_class
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
-import os, sys
-# import torch
-import torch.nn as nn
-# import torch.nn.functional as F
-# from pytorchocr.modeling.common import Activation
+from torch import nn
+
+from ..backbones import build_backbone
+from ..heads import build_head
+from ..necks import build_neck

-from pytorchocr.modeling.transforms import build_transform
-from pytorchocr.modeling.backbones import build_backbone
-from pytorchocr.modeling.necks import build_neck
-from pytorchocr.modeling.heads import build_head

 class BaseModel(nn.Module):
    def __init__(self, config, **kwargs):
@@ -18,27 +14,14 @@ class BaseModel(nn.Module):
        """
        super(BaseModel, self).__init__()

-        in_channels = config.get('in_channels', 3)
-        model_type = config['model_type']
-        # build transfrom,
-        # for rec, transfrom can be TPS,None
-        # for det and cls, transfrom shoule to be None,
-        # if you make model differently, you can use transfrom in det and cls
-        if 'Transform' not in config or config['Transform'] is None:
-            self.use_transform = False
-        else:
-            self.use_transform = True
-            config['Transform']['in_channels'] = in_channels
-            self.transform = build_transform(config['Transform'])
-            in_channels = self.transform.out_channels
-            # raise NotImplementedError
-
+        in_channels = config.get("in_channels", 3)
+        model_type = config["model_type"]
        # build backbone, backbone is need for del, rec and cls
-        if 'Backbone' not in config or config['Backbone'] is None:
+        if "Backbone" not in config or config["Backbone"] is None:
            self.use_backbone = False
        else:
            self.use_backbone = True
-            config["Backbone"]['in_channels'] = in_channels
+            config["Backbone"]["in_channels"] = in_channels
            self.backbone = build_backbone(config["Backbone"], model_type)
            in_channels = self.backbone.out_channels

@@ -46,20 +29,20 @@ class BaseModel(nn.Module):
        # for rec, neck can be cnn,rnn or reshape(None)
        # for det, neck can be FPN, BIFPN and so on.
        # for cls, neck should be none
-        if 'Neck' not in config or config['Neck'] is None:
+        if "Neck" not in config or config["Neck"] is None:
            self.use_neck = False
        else:
            self.use_neck = True
-            config['Neck']['in_channels'] = in_channels
-            self.neck = build_neck(config['Neck'])
+            config["Neck"]["in_channels"] = in_channels
+            self.neck = build_neck(config["Neck"])
            in_channels = self.neck.out_channels

        # # build head, head is need for det, rec and cls
-        if 'Head' not in config or config['Head'] is None:
+        if "Head" not in config or config["Head"] is None:
            self.use_head = False
        else:
            self.use_head = True
-            config["Head"]['in_channels'] = in_channels
+            config["Head"]["in_channels"] = in_channels
            self.head = build_head(config["Head"], **kwargs)

        self.return_all_feats = config.get("return_all_feats", False)
@@ -70,7 +53,7 @@ class BaseModel(nn.Module):
        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
@@ -81,15 +64,12 @@ class BaseModel(nn.Module):
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.ConvTranspose2d):
-                nn.init.kaiming_normal_(m.weight, mode='fan_out')
+                nn.init.kaiming_normal_(m.weight, mode="fan_out")
                if m.bias is not None:
                    nn.init.zeros_(m.bias)

-
    def forward(self, x):
        y = dict()
-        if self.use_transform:
-            x = self.transform(x)
        if self.use_backbone:
            x = self.backbone(x)
        if isinstance(x, dict):
@@ -107,9 +87,9 @@ class BaseModel(nn.Module):
        if self.use_head:
            x = self.head(x)
        # for multi head, save ctc neck out for udml
-        if isinstance(x, dict) and 'ctc_nect' in x.keys():
-            y['neck_out'] = x['ctc_neck']
-            y['head_out'] = x
+        if isinstance(x, dict) and "ctc_nect" in x.keys():
+            y["neck_out"] = x["ctc_neck"]
+            y["head_out"] = x
        elif isinstance(x, dict):
            y.update(x)
        else:
@@ -122,4 +102,4 @@ class BaseModel(nn.Module):
            else:
                return {final_name: x}
        else:
-            return x
\ No newline at end of file
+            return x
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
@@ -12,45 +12,51 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-__all__ = ['build_backbone']
+__all__ = ["build_backbone"]


 def build_backbone(config, model_type):
-    if model_type == 'det':
+    if model_type == "det":
        from .det_mobilenet_v3 import MobileNetV3
-        from .det_resnet import ResNet
-        from .det_resnet_vd import ResNet_vd
-        from .det_resnet_vd_sast import ResNet_SAST
+        from .rec_hgnet import PPHGNet_small
        from .rec_lcnetv3 import PPLCNetV3
+
+        support_dict = [
+            "MobileNetV3",
+            "ResNet",
+            "ResNet_vd",
+            "ResNet_SAST",
+            "PPLCNetV3",
+            "PPHGNet_small",
+        ]
+    elif model_type == "rec" or model_type == "cls":
        from .rec_hgnet import PPHGNet_small
-        support_dict = ['MobileNetV3', 'ResNet', 'ResNet_vd', 'ResNet_SAST', 'PPLCNetV3', 'PPHGNet_small']
-    elif model_type == 'rec' or model_type == 'cls':
+        from .rec_lcnetv3 import PPLCNetV3
        from .rec_mobilenet_v3 import MobileNetV3
-        from .rec_resnet_vd import ResNet
-        from .rec_resnet_fpn import ResNetFPN
-        from .rec_mv1_enhance import MobileNetV1Enhance
-        from .rec_nrtr_mtb import MTB
-        from .rec_resnet_31 import ResNet31
        from .rec_svtrnet import SVTRNet
-        from .rec_vitstr import ViTSTR
-        from .rec_densenet import DenseNet
-        from .rec_lcnetv3 import PPLCNetV3
-        from .rec_hgnet import PPHGNet_small
-        support_dict = ['MobileNetV1Enhance', 'MobileNetV3', 'ResNet', 'ResNetFPN', 'MTB',
-                        'ResNet31', 'SVTRNet', 'ViTSTR', 'DenseNet', 'PPLCNetV3', 'PPHGNet_small']
-    elif model_type == 'e2e':
-        from .e2e_resnet_vd_pg import ResNet
-        support_dict = ['ResNet']
-    elif model_type == "table":
-        from .table_resnet_vd import ResNet
-        from .table_mobilenet_v3 import MobileNetV3
-        support_dict = ["ResNet", "MobileNetV3"]
+        from .rec_mv1_enhance import MobileNetV1Enhance
+
+        support_dict = [
+            "MobileNetV1Enhance",
+            "MobileNetV3",
+            "ResNet",
+            "ResNetFPN",
+            "MTB",
+            "ResNet31",
+            "SVTRNet",
+            "ViTSTR",
+            "DenseNet",
+            "PPLCNetV3",
+            "PPHGNet_small",
+        ]
    else:
        raise NotImplementedError

-    module_name = config.pop('name')
+    module_name = config.pop("name")
    assert module_name in support_dict, Exception(
-        'when model typs is {}, backbone only support {}'.format(model_type,
-                                                                 support_dict))
+        "when model typs is {}, backbone only support {}".format(
+            model_type, support_dict
+        )
+    )
    module_class = eval(module_name)(**config)
-    return module_class
\ No newline at end of file
+    return module_class
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
-import os, sys
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
+from torch import nn
+
+from ..common import Activation
+

 def make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
@@ -14,16 +13,18 @@ def make_divisible(v, divisor=8, min_value=None):


 class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 padding,
-                 groups=1,
-                 if_act=True,
-                 act=None,
-                 name=None):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        padding,
+        groups=1,
+        if_act=True,
+        act=None,
+        name=None,
+    ):
        super(ConvBNLayer, self).__init__()
        self.if_act = if_act
        self.conv = nn.Conv2d(
@@ -33,11 +34,12 @@ class ConvBNLayer(nn.Module):
            stride=stride,
            padding=padding,
            groups=groups,
-            bias=False)
+            bias=False,
+        )

        self.bn = nn.BatchNorm2d(
            out_channels,
-            )
+        )
        if self.if_act:
            self.act = Activation(act_type=act, inplace=True)

@@ -59,16 +61,18 @@ class SEModule(nn.Module):
            kernel_size=1,
            stride=1,
            padding=0,
-            bias=True)
-        self.relu1 = Activation(act_type='relu', inplace=True)
+            bias=True,
+        )
+        self.relu1 = Activation(act_type="relu", inplace=True)
        self.conv2 = nn.Conv2d(
            in_channels=in_channels // reduction,
            out_channels=in_channels,
            kernel_size=1,
            stride=1,
            padding=0,
-            bias=True)
-        self.hard_sigmoid = Activation(act_type='hard_sigmoid', inplace=True)
+            bias=True,
+        )
+        self.hard_sigmoid = Activation(act_type="hard_sigmoid", inplace=True)

    def forward(self, inputs):
        outputs = self.avg_pool(inputs)
@@ -81,15 +85,17 @@ class SEModule(nn.Module):


 class ResidualUnit(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 mid_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 use_se,
-                 act=None,
-                 name=''):
+    def __init__(
+        self,
+        in_channels,
+        mid_channels,
+        out_channels,
+        kernel_size,
+        stride,
+        use_se,
+        act=None,
+        name="",
+    ):
        super(ResidualUnit, self).__init__()
        self.if_shortcut = stride == 1 and in_channels == out_channels
        self.if_se = use_se
@@ -102,7 +108,8 @@ class ResidualUnit(nn.Module):
            padding=0,
            if_act=True,
            act=act,
-            name=name + "_expand")
+            name=name + "_expand",
+        )
        self.bottleneck_conv = ConvBNLayer(
            in_channels=mid_channels,
            out_channels=mid_channels,
@@ -112,7 +119,8 @@ class ResidualUnit(nn.Module):
            groups=mid_channels,
            if_act=True,
            act=act,
-            name=name + "_depthwise")
+            name=name + "_depthwise",
+        )
        if self.if_se:
            self.mid_se = SEModule(mid_channels, name=name + "_se")
        self.linear_conv = ConvBNLayer(
@@ -123,7 +131,8 @@ class ResidualUnit(nn.Module):
            padding=0,
            if_act=False,
            act=None,
-            name=name + "_linear")
+            name=name + "_linear",
+        )

    def forward(self, inputs):
        x = self.expand_conv(inputs)
@@ -137,12 +146,9 @@ class ResidualUnit(nn.Module):


 class MobileNetV3(nn.Module):
-    def __init__(self,
-                 in_channels=3,
-                 model_name='large',
-                 scale=0.5,
-                 disable_se=False,
-                 **kwargs):
+    def __init__(
+        self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs
+    ):
        """
        the MobilenetV3 backbone network for detection module.
        Args:
@@ -155,46 +161,48 @@ class MobileNetV3(nn.Module):
        if model_name == "large":
            cfg = [
                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, 'relu', 1],
-                [3, 64, 24, False, 'relu', 2],
-                [3, 72, 24, False, 'relu', 1],
-                [5, 72, 40, True, 'relu', 2],
-                [5, 120, 40, True, 'relu', 1],
-                [5, 120, 40, True, 'relu', 1],
-                [3, 240, 80, False, 'hard_swish', 2],
-                [3, 200, 80, False, 'hard_swish', 1],
-                [3, 184, 80, False, 'hard_swish', 1],
-                [3, 184, 80, False, 'hard_swish', 1],
-                [3, 480, 112, True, 'hard_swish', 1],
-                [3, 672, 112, True, 'hard_swish', 1],
-                [5, 672, 160, True, 'hard_swish', 2],
-                [5, 960, 160, True, 'hard_swish', 1],
-                [5, 960, 160, True, 'hard_swish', 1],
+                [3, 16, 16, False, "relu", 1],
+                [3, 64, 24, False, "relu", 2],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", 2],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hard_swish", 2],
+                [3, 200, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 480, 112, True, "hard_swish", 1],
+                [3, 672, 112, True, "hard_swish", 1],
+                [5, 672, 160, True, "hard_swish", 2],
+                [5, 960, 160, True, "hard_swish", 1],
+                [5, 960, 160, True, "hard_swish", 1],
            ]
            cls_ch_squeeze = 960
        elif model_name == "small":
            cfg = [
                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, 'relu', 2],
-                [3, 72, 24, False, 'relu', 2],
-                [3, 88, 24, False, 'relu', 1],
-                [5, 96, 40, True, 'hard_swish', 2],
-                [5, 240, 40, True, 'hard_swish', 1],
-                [5, 240, 40, True, 'hard_swish', 1],
-                [5, 120, 48, True, 'hard_swish', 1],
-                [5, 144, 48, True, 'hard_swish', 1],
-                [5, 288, 96, True, 'hard_swish', 2],
-                [5, 576, 96, True, 'hard_swish', 1],
-                [5, 576, 96, True, 'hard_swish', 1],
+                [3, 16, 16, True, "relu", 2],
+                [3, 72, 24, False, "relu", 2],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hard_swish", 2],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 120, 48, True, "hard_swish", 1],
+                [5, 144, 48, True, "hard_swish", 1],
+                [5, 288, 96, True, "hard_swish", 2],
+                [5, 576, 96, True, "hard_swish", 1],
+                [5, 576, 96, True, "hard_swish", 1],
            ]
            cls_ch_squeeze = 576
        else:
-            raise NotImplementedError("mode[" + model_name +
-                                      "_model] is not implemented!")
+            raise NotImplementedError(
+                "mode[" + model_name + "_model] is not implemented!"
+            )

        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert scale in supported_scale, \
-            "supported scale are {} but input scale is {}".format(supported_scale, scale)
+        assert (
+            scale in supported_scale
+        ), "supported scale are {} but input scale is {}".format(supported_scale, scale)
        inplanes = 16
        # conv1
        self.conv = ConvBNLayer(
@@ -205,15 +213,16 @@ class MobileNetV3(nn.Module):
            padding=1,
            groups=1,
            if_act=True,
-            act='hard_swish',
-            name='conv1')
+            act="hard_swish",
+            name="conv1",
+        )

        self.stages = nn.ModuleList()
        self.out_channels = []
        block_list = []
        i = 0
        inplanes = make_divisible(inplanes * scale)
-        for (k, exp, c, se, nl, s) in cfg:
+        for k, exp, c, se, nl, s in cfg:
            se = se and not self.disable_se
            if s == 2 and i > 2:
                self.out_channels.append(inplanes)
@@ -228,7 +237,9 @@ class MobileNetV3(nn.Module):
                    stride=s,
                    use_se=se,
                    act=nl,
-                    name="conv" + str(i + 2)))
+                    name="conv" + str(i + 2),
+                )
+            )
            inplanes = make_divisible(scale * c)
            i += 1
        block_list.append(
@@ -240,8 +251,10 @@ class MobileNetV3(nn.Module):
                padding=0,
                groups=1,
                if_act=True,
-                act='hard_swish',
-                name='conv_last'))
+                act="hard_swish",
+                name="conv_last",
+            )
+        )
        self.stages.append(nn.Sequential(*block_list))
        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
        # for i, stage in enumerate(self.stages):

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_resnet.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_resnet.py
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .det_resnet_vd import DeformableConvV2, ConvBNLayer
-
-
-class BottleneckBlock(nn.Module):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 stride,
-                 shortcut=True,
-                 is_dcn=False):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=1,
-            act="relu", )
-        self.conv1 = ConvBNLayer(
-            in_channels=num_filters,
-            out_channels=num_filters,
-            kernel_size=3,
-            stride=stride,
-            act="relu",
-            is_dcn=is_dcn,
-            # dcn_groups=1,
-        )
-        self.conv2 = ConvBNLayer(
-            in_channels=num_filters,
-            out_channels=num_filters * 4,
-            kernel_size=1,
-            act=None, )
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=num_channels,
-                out_channels=num_filters * 4,
-                kernel_size=1,
-                stride=stride, )
-
-        self.shortcut = shortcut
-
-        self._num_channels_out = num_filters * 4
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-
-        y = torch.add(short, conv2)
-        y = F.relu(y)
-        return y
-
-
-class BasicBlock(nn.Module):
-    def __init__(self,
-                 num_channels,
-                 num_filters,
-                 stride,
-                 shortcut=True,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=3,
-            stride=stride,
-            act="relu")
-        self.conv1 = ConvBNLayer(
-            in_channels=num_filters,
-            out_channels=num_filters,
-            kernel_size=3,
-            act=None)
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=num_channels,
-                out_channels=num_filters,
-                kernel_size=1,
-                stride=stride)
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = torch.add(short, conv1)
-        y = F.relu(y)
-        return y
-
-
-class ResNet(nn.Module):
-    def __init__(self,
-                 in_channels=3,
-                 layers=50,
-                 out_indices=None,
-                 dcn_stage=None):
-        super(ResNet, self).__init__()
-
-        self.layers = layers
-        self.input_image_channel = in_channels
-
-        supported_layers = [18, 34, 50, 101, 152]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        num_channels = [64, 256, 512,
-                        1024] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-
-        self.dcn_stage = dcn_stage if dcn_stage is not None else [
-            False, False, False, False
-        ]
-        self.out_indices = out_indices if out_indices is not None else [
-            0, 1, 2, 3
-        ]
-
-        self.conv = ConvBNLayer(
-            in_channels=self.input_image_channel,
-            out_channels=64,
-            kernel_size=7,
-            stride=2,
-            act="relu", )
-        self.pool2d_max = nn.MaxPool2d(
-            kernel_size=3,
-            stride=2,
-            padding=1, )
-
-        self.stages = nn.ModuleList()
-        self.out_channels = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                shortcut = False
-                block_list = nn.Sequential()
-                is_dcn = self.dcn_stage[block]
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneck_block = BottleneckBlock(
-                            num_channels=num_channels[block]
-                            if i == 0 else num_filters[block] * 4,
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            is_dcn=is_dcn)
-                    block_list.add_module(conv_name, bottleneck_block)
-                    shortcut = True
-                if block in self.out_indices:
-                    self.out_channels.append(num_filters[block] * 4)
-                self.stages.append(block_list)
-        else:
-            for block in range(len(depth)):
-                shortcut = False
-                block_list = nn.Sequential()
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = BasicBlock(
-                            num_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
-                            num_filters=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut)
-                    block_list.add_module(conv_name, basic_block)
-                    shortcut = True
-                if block in self.out_indices:
-                    self.out_channels.append(num_filters[block])
-                self.stages.append(block_list)
-
-    def forward(self, inputs):
-        y = self.conv(inputs)
-        y = self.pool2d_max(y)
-        out = []
-        for i, block in enumerate(self.stages):
-            y = block(y)
-            if i in self.out_indices:
-                out.append(y)
-        return out
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_resnet_vd.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_resnet_vd.py
-
-
-import os, sys
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
-import torchvision
-
-class DeformableConvV2(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 padding=0,
-                 dilation=1,
-                 groups=1,
-                 weight_attr=None,
-                 bias_attr=None,
-                 lr_scale=1,
-                 regularizer=None,
-                 skip_quant=False,
-                 dcn_bias_regularizer=None,
-                 dcn_bias_lr_scale=2.):
-        super(DeformableConvV2, self).__init__()
-        self.offset_channel = 2 * kernel_size**2 * groups
-        self.mask_channel = kernel_size**2 * groups
-
-        if bias_attr:
-            # in FCOS-DCN head, specifically need learning_rate and regularizer
-            dcn_bias_attr = True
-        else:
-            # in ResNet backbone, do not need bias
-            dcn_bias_attr = False
-        self.conv_dcn = torchvision.ops.DeformConv2d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2 * dilation,
-            dilation=dilation,
-            groups=groups//2 if groups > 1 else 1,
-            bias=dcn_bias_attr)
-
-        self.conv_offset = nn.Conv2d(
-            in_channels,
-            groups * 3 * kernel_size**2,
-            kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            bias=True)
-        if skip_quant:
-            self.conv_offset.skip_quant = True
-
-    def forward(self, x):
-        offset_mask = self.conv_offset(x)
-        offset, mask = torch.split(
-            offset_mask,
-            split_size_or_sections=[self.offset_channel, self.mask_channel],
-            dim=1)
-        mask = torch.sigmoid(mask)
-        y = self.conv_dcn(x, offset, mask=mask)
-        return y
-
-class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 groups=1,
-                 dcn_groups=1,
-                 is_vd_mode=False,
-                 act=None,
-                 name=None,
-                 is_dcn=False,
-                 ):
-        super(ConvBNLayer, self).__init__()
-
-        self.is_vd_mode = is_vd_mode
-        self.act = act
-        self._pool2d_avg = nn.AvgPool2d(
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        if not is_dcn:
-            self._conv = nn.Conv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=(kernel_size - 1) // 2,
-                groups=groups,
-                bias=False)
-        else:
-            self._conv = DeformableConvV2(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=kernel_size,
-                stride=stride,
-                padding=(kernel_size - 1) // 2,
-                groups=dcn_groups,
-                bias_attr=False)
-
-        self._batch_norm = nn.BatchNorm2d(
-            out_channels,
-            track_running_stats=True,
-            )
-
-        if act is not None:
-            self._act = Activation(act_type=act, inplace=True)
-
-
-    def forward(self, inputs):
-        if self.is_vd_mode:
-            inputs = self._pool2d_avg(inputs)
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if self.act is not None:
-            y = self._act(y)
-        return y
-
-
-class BottleneckBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None,
-                 is_dcn=False,
-                 ):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b",
-            is_dcn=is_dcn,
-            dcn_groups=2,
-        )
-        self.conv2 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None,
-            name=name + "_branch2c")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels * 4,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = torch.add(short, conv2)
-        y = F.relu(y)
-        return y
-
-
-class BasicBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None,
-            name=name + "_branch2b")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = short + conv1
-        y = F.relu(y)
-        return y
-
-
-class ResNet_vd(nn.Module):
-    def __init__(self,
-                 in_channels=3,
-                 layers=50,
-                 dcn_stage=None,
-                 out_indices=None,
-                 **kwargs):
-        super(ResNet_vd, self).__init__()
-
-        self.layers = layers
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            depth = [3, 4, 6, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512,
-                        1024] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512]
-
-        self.dcn_stage = dcn_stage if dcn_stage is not None else [
-            False, False, False, False
-        ]
-        self.out_indices = out_indices if out_indices is not None else [
-            0, 1, 2, 3
-        ]
-
-        self.conv1_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=32,
-            kernel_size=3,
-            stride=2,
-            act='relu',
-            name="conv1_1")
-        self.conv1_2 = ConvBNLayer(
-            in_channels=32,
-            out_channels=32,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_2")
-        self.conv1_3 = ConvBNLayer(
-            in_channels=32,
-            out_channels=64,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_3")
-        self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.stages = nn.ModuleList()
-        self.out_channels = []
-        if layers >= 50:
-            for block in range(len(depth)):
-                # block_list = []
-                block_list = nn.Sequential()
-                shortcut = False
-                is_dcn = self.dcn_stage[block]
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneck_block = BottleneckBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block] * 4,
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name,
-                            is_dcn=is_dcn,
-                    )
-
-                    shortcut = True
-                    block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
-                if block in self.out_indices:
-                    self.out_channels.append(num_filters[block] * 4)
-                # self.stages.append(nn.Sequential(*block_list))
-                self.stages.append(block_list)
-        else:
-            for block in range(len(depth)):
-                # block_list = []
-                block_list = nn.Sequential()
-                shortcut = False
-                # is_dcn = self.dcn_stage[block]
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = BasicBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name)
-
-                    shortcut = True
-                    block_list.add_module('bb_%d_%d' % (block, i), basic_block)
-                    # block_list.append(basic_block)
-                if block in self.out_indices:
-                    self.out_channels.append(num_filters[block])
-                self.stages.append(block_list)
-
-                # self.stages.append(nn.Sequential(*block_list))
-
-
-    def forward(self, inputs):
-        y = self.conv1_1(inputs)
-        y = self.conv1_2(y)
-        y = self.conv1_3(y)
-        y = self.pool2d_max(y)
-        out = []
-        for i, block in enumerate(self.stages):
-            y = block(y)
-            if i in self.out_indices:
-                out.append(y)
-        return out
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_resnet_vd_sast.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_resnet_vd_sast.py
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os, sys
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
-# import paddle
-# from paddle import ParamAttr
-# import paddle.nn as nn
-# import paddle.nn.functional as F
-
-__all__ = ["ResNet_SAST"]
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            groups=1,
-            is_vd_mode=False,
-            act=None,
-            name=None, ):
-        super(ConvBNLayer, self).__init__()
-
-        self.is_vd_mode = is_vd_mode
-        self._pool2d_avg = nn.AvgPool2d(
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self._conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False)
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = nn.BatchNorm2d(
-            out_channels,)
-        self.act = act
-        if act is not None:
-            self._act = Activation(act_type=act)
-
-
-    def forward(self, inputs):
-        if self.is_vd_mode:
-            inputs = self._pool2d_avg(inputs)
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if self.act:
-            y = self._act(y)
-        return y
-
-
-class BottleneckBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        self.conv2 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None,
-            name=name + "_branch2c")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels * 4,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = torch.add(short, conv2)
-        y = F.relu(y)
-        return y
-
-
-class BasicBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None,
-            name=name + "_branch2b")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = torch.add(short, conv1)
-        y = F.relu(y)
-        return y
-
-
-class ResNet_SAST(nn.Module):
-    def __init__(self, in_channels=3, layers=50, **kwargs):
-        super(ResNet_SAST, self).__init__()
-
-        self.layers = layers
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            # depth = [3, 4, 6, 3]
-            depth = [3, 4, 6, 3, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        # num_channels = [64, 256, 512,
-        #                 1024] if layers >= 50 else [64, 64, 128, 256]
-        # num_filters = [64, 128, 256, 512]
-        num_channels = [64, 256, 512,
-                        1024, 2048] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512, 512]
-
-        self.conv1_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=32,
-            kernel_size=3,
-            stride=2,
-            act='relu',
-            name="conv1_1")
-        self.conv1_2 = ConvBNLayer(
-            in_channels=32,
-            out_channels=32,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_2")
-        self.conv1_3 = ConvBNLayer(
-            in_channels=32,
-            out_channels=64,
-            kernel_size=3,
-            stride=1,
-            act='relu',
-            name="conv1_3")
-        # self.pool2d_max = nn.MaxPool2D(kernel_size=3, stride=2, padding=1)
-        self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.stages = nn.ModuleList()
-        self.out_channels = [3, 64]
-        if layers >= 50:
-            for block in range(len(depth)):
-                # block_list = []
-                block_list = nn.Sequential()
-                shortcut = False
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneck_block = BottleneckBlock(
-                        in_channels=num_channels[block] if i == 0 else num_filters[block] * 4,
-                        out_channels=num_filters[block],
-                        stride=2 if i == 0 and block != 0 else 1,
-                        shortcut=shortcut,
-                        if_first=block == i == 0,
-                        name=conv_name
-                    )
-                    shortcut = True
-                    # block_list.append(bottleneck_block)
-                    block_list.add_module('bb_%d_%d' % (block, i), bottleneck_block)
-                self.out_channels.append(num_filters[block] * 4)
-                # self.stages.append(nn.Sequential(*block_list))
-                self.stages.append(block_list)
-        else:
-            for block in range(len(depth)):
-                # block_list = []
-                block_list = nn.Sequential()
-                shortcut = False
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basic_block = BasicBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name)
-                    shortcut = True
-                    # block_list.append(basic_block)
-                    block_list.add_module('bb_%d_%d' % (block, i), basic_block)
-                self.out_channels.append(num_filters[block])
-                # self.stages.append(nn.Sequential(*block_list))
-                self.stages.append(block_list)
-
-    def forward(self, inputs):
-        out = [inputs]
-        y = self.conv1_1(inputs)
-        y = self.conv1_2(y)
-        y = self.conv1_3(y)
-        out.append(y)
-        y = self.pool2d_max(y)
-        for block in self.stages:
-            y = block(y)
-            out.append(y)
-        return out
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/e2e_resnet_vd_pg.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/e2e_resnet_vd_pg.py
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
-
-__all__ = ["ResNet"]
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(
-            self,
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=1,
-            groups=1,
-            is_vd_mode=False,
-            act=None,
-            name=None, ):
-        super(ConvBNLayer, self).__init__()
-
-        self.is_vd_mode = is_vd_mode
-        self._pool2d_avg = nn.AvgPool2d(
-            kernel_size=2, stride=2, padding=0, ceil_mode=True)
-        self._conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False)
-        if name == "conv1":
-            bn_name = "bn_" + name
-        else:
-            bn_name = "bn" + name[3:]
-        self._batch_norm = nn.BatchNorm2d(out_channels)
-        self.act = act
-        if self.act is not None:
-            self._act = Activation(act_type=self.act, inplace=True)
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if self.act is not None:
-            y = self._act(y)
-        return y
-
-
-class BottleneckBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BottleneckBlock, self).__init__()
-
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2b")
-        self.conv2 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels * 4,
-            kernel_size=1,
-            act=None,
-            name=name + "_branch2c")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels * 4,
-                kernel_size=1,
-                stride=stride,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-        conv2 = self.conv2(conv1)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = torch.add(short, conv2)
-        y = F.relu(y)
-        return y
-
-
-class BasicBlock(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 shortcut=True,
-                 if_first=False,
-                 name=None):
-        super(BasicBlock, self).__init__()
-        self.stride = stride
-        self.conv0 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            stride=stride,
-            act='relu',
-            name=name + "_branch2a")
-        self.conv1 = ConvBNLayer(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=3,
-            act=None,
-            name=name + "_branch2b")
-
-        if not shortcut:
-            self.short = ConvBNLayer(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=1,
-                is_vd_mode=False if if_first else True,
-                name=name + "_branch1")
-
-        self.shortcut = shortcut
-
-    def forward(self, inputs):
-        y = self.conv0(inputs)
-        conv1 = self.conv1(y)
-
-        if self.shortcut:
-            short = inputs
-        else:
-            short = self.short(inputs)
-        y = torch.add(short, conv1)
-        y = F.relu(y)
-        return y
-
-
-class ResNet(nn.Module):
-    def __init__(self, in_channels=3, layers=50, **kwargs):
-        super(ResNet, self).__init__()
-
-        self.layers = layers
-        supported_layers = [18, 34, 50, 101, 152, 200]
-        assert layers in supported_layers, \
-            "supported layers are {} but input layer is {}".format(
-                supported_layers, layers)
-
-        if layers == 18:
-            depth = [2, 2, 2, 2]
-        elif layers == 34 or layers == 50:
-            # depth = [3, 4, 6, 3]
-            depth = [3, 4, 6, 3, 3]
-        elif layers == 101:
-            depth = [3, 4, 23, 3]
-        elif layers == 152:
-            depth = [3, 8, 36, 3]
-        elif layers == 200:
-            depth = [3, 12, 48, 3]
-        num_channels = [64, 256, 512, 1024,
-                        2048] if layers >= 50 else [64, 64, 128, 256]
-        num_filters = [64, 128, 256, 512, 512]
-
-        self.conv1_1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=64,
-            kernel_size=7,
-            stride=2,
-            act='relu',
-            name="conv1_1")
-        self.pool2d_max = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-
-        self.stages =  nn.ModuleList()
-        self.out_channels = [3, 64]
-        # num_filters = [64, 128, 256, 512, 512]
-        if layers >= 50:
-            for block in range(len(depth)):
-                block_list = nn.Sequential()
-                shortcut = False
-                for i in range(depth[block]):
-                    if layers in [101, 152] and block == 2:
-                        if i == 0:
-                            conv_name = "res" + str(block + 2) + "a"
-                        else:
-                            conv_name = "res" + str(block + 2) + "b" + str(i)
-                    else:
-                        conv_name = "res" + str(block + 2) + chr(97 + i)
-                    bottleneckBlock = BottleneckBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block] * 4,
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name)
-                    shortcut = True
-                    block_list.add_module('bb_%d_%d' % (block, i), bottleneckBlock)
-                self.out_channels.append(num_filters[block] * 4)
-                self.stages.append(block_list)
-        else:
-            for block in range(len(depth)):
-                block_list = nn.Sequential()
-                shortcut = False
-                for i in range(depth[block]):
-                    conv_name = "res" + str(block + 2) + chr(97 + i)
-                    basicBlock = BasicBlock(
-                            in_channels=num_channels[block]
-                            if i == 0 else num_filters[block],
-                            out_channels=num_filters[block],
-                            stride=2 if i == 0 and block != 0 else 1,
-                            shortcut=shortcut,
-                            if_first=block == i == 0,
-                            name=conv_name)
-                    shortcut = True
-                    block_list.add_module('bb_%d_%d' % (block, i), basicBlock)
-                self.out_channels.append(num_filters[block])
-                self.stages.append(block_list)
-
-
-    def forward(self, inputs):
-        out = [inputs]
-        y = self.conv1_1(inputs)
-        out.append(y)
-        y = self.pool2d_max(y)
-        for block in self.stages:
-            y = block(y)
-            out.append(y)
-        return out
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_densenet.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_densenet.py
-"""
-This code is refer from:
-https://github.com/LBH1024/CAN/models/densenet.py
-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class Bottleneck(nn.Module):
-    def __init__(self, nChannels, growthRate, use_dropout):
-        super(Bottleneck, self).__init__()
-        interChannels = 4 * growthRate
-        self.bn1 = nn.BatchNorm2d(interChannels)
-        self.conv1 = nn.Conv2d(
-            nChannels, interChannels, kernel_size=1,
-            bias=True)  # Xavier initialization
-        self.bn2 = nn.BatchNorm2d(growthRate)
-        self.conv2 = nn.Conv2d(
-            interChannels, growthRate, kernel_size=3, padding=1,
-            bias=True)  # Xavier initialization
-        self.use_dropout = use_dropout
-        self.dropout = nn.Dropout(p=0.2)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        if self.use_dropout:
-            out = self.dropout(out)
-        out = F.relu(self.bn2(self.conv2(out)))
-        if self.use_dropout:
-            out = self.dropout(out)
-        out = torch.cat([x, out], 1)
-        return out
-
-
-class SingleLayer(nn.Module):
-    def __init__(self, nChannels, growthRate, use_dropout):
-        super(SingleLayer, self).__init__()
-        self.bn1 = nn.BatchNorm2d(nChannels)
-        self.conv1 = nn.Conv2d(
-            nChannels, growthRate, kernel_size=3, padding=1, bias=False)
-
-        self.use_dropout = use_dropout
-        self.dropout = nn.Dropout(p=0.2)
-
-    def forward(self, x):
-        out = self.conv1(F.relu(x))
-        if self.use_dropout:
-            out = self.dropout(out)
-
-        out = torch.cat([x, out], 1)
-        return out
-
-
-class Transition(nn.Module):
-    def __init__(self, nChannels, out_channels, use_dropout):
-        super(Transition, self).__init__()
-        self.bn1 = nn.BatchNorm2d(out_channels)
-        self.conv1 = nn.Conv2d(
-            nChannels, out_channels, kernel_size=1, bias=False)
-        self.use_dropout = use_dropout
-        self.dropout = nn.Dropout(p=0.2)
-
-    def forward(self, x):
-        out = F.relu(self.bn1(self.conv1(x)))
-        if self.use_dropout:
-            out = self.dropout(out)
-        out = F.avg_pool2d(out, 2, ceil_mode=True, count_include_pad=False)
-        return out
-
-
-class DenseNet(nn.Module):
-    def __init__(self, growthRate, reduction, bottleneck, use_dropout,
-                 input_channel, **kwargs):
-        super(DenseNet, self).__init__()
-
-        nDenseBlocks = 16
-        nChannels = 2 * growthRate
-
-        self.conv1 = nn.Conv2d(
-            input_channel,
-            nChannels,
-            kernel_size=7,
-            padding=3,
-            stride=2,
-            bias=False)
-        self.dense1 = self._make_dense(nChannels, growthRate, nDenseBlocks,
-                                       bottleneck, use_dropout)
-        nChannels += nDenseBlocks * growthRate
-        out_channels = int(math.floor(nChannels * reduction))
-        self.trans1 = Transition(nChannels, out_channels, use_dropout)
-
-        nChannels = out_channels
-        self.dense2 = self._make_dense(nChannels, growthRate, nDenseBlocks,
-                                       bottleneck, use_dropout)
-        nChannels += nDenseBlocks * growthRate
-        out_channels = int(math.floor(nChannels * reduction))
-        self.trans2 = Transition(nChannels, out_channels, use_dropout)
-
-        nChannels = out_channels
-        self.dense3 = self._make_dense(nChannels, growthRate, nDenseBlocks,
-                                       bottleneck, use_dropout)
-        self.out_channels = out_channels
-
-    def _make_dense(self, nChannels, growthRate, nDenseBlocks, bottleneck,
-                    use_dropout):
-        layers = []
-        for i in range(int(nDenseBlocks)):
-            if bottleneck:
-                layers.append(Bottleneck(nChannels, growthRate, use_dropout))
-            else:
-                layers.append(SingleLayer(nChannels, growthRate, use_dropout))
-            nChannels += growthRate
-        return nn.Sequential(*layers)
-
-    def forward(self, inputs):
-        x, x_m, y = inputs
-        out = self.conv1(x)
-        out = F.relu(out, inplace=True)
-        out = F.max_pool2d(out, 2, ceil_mode=True)
-        out = self.dense1(out)
-        out = self.trans1(out)
-        out = self.dense2(out)
-        out = self.trans2(out)
-        out = self.dense3(out)
-        return out, x_m, y
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
+from torch import nn


 class ConvBNAct(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 groups=1,
-                 use_act=True):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True
+    ):
        super().__init__()
        self.use_act = use_act
        self.conv = nn.Conv2d(
@@ -20,7 +16,8 @@ class ConvBNAct(nn.Module):
            stride,
            padding=(kernel_size - 1) // 2,
            groups=groups,
-            bias=False)
+            bias=False,
+        )
        self.bn = nn.BatchNorm2d(out_channels)
        if self.use_act:
            self.act = nn.ReLU()
@@ -42,7 +39,8 @@ class ESEModule(nn.Module):
            out_channels=channels,
            kernel_size=1,
            stride=1,
-            padding=0)
+            padding=0,
+        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
@@ -55,12 +53,13 @@ class ESEModule(nn.Module):

 class HG_Block(nn.Module):
    def __init__(
-            self,
-            in_channels,
-            mid_channels,
-            out_channels,
-            layer_num,
-            identity=False, ):
+        self,
+        in_channels,
+        mid_channels,
+        out_channels,
+        layer_num,
+        identity=False,
+    ):
        super().__init__()
        self.identity = identity

@@ -70,14 +69,18 @@ class HG_Block(nn.Module):
                in_channels=in_channels,
                out_channels=mid_channels,
                kernel_size=3,
-                stride=1))
+                stride=1,
+            )
+        )
        for _ in range(layer_num - 1):
            self.layers.append(
                ConvBNAct(
                    in_channels=mid_channels,
                    out_channels=mid_channels,
                    kernel_size=3,
-                    stride=1))
+                    stride=1,
+                )
+            )

        # feature aggregation
        total_channels = in_channels + layer_num * mid_channels
@@ -85,7 +88,8 @@ class HG_Block(nn.Module):
            in_channels=total_channels,
            out_channels=out_channels,
            kernel_size=1,
-            stride=1)
+            stride=1,
+        )
        self.att = ESEModule(out_channels)

    def forward(self, x):
@@ -104,14 +108,16 @@ class HG_Block(nn.Module):


 class HG_Stage(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 mid_channels,
-                 out_channels,
-                 block_num,
-                 layer_num,
-                 downsample=True,
-                 stride=[2, 1]):
+    def __init__(
+        self,
+        in_channels,
+        mid_channels,
+        out_channels,
+        block_num,
+        layer_num,
+        downsample=True,
+        stride=[2, 1],
+    ):
        super().__init__()
        self.downsample = downsample
        if downsample:
@@ -121,24 +127,19 @@ class HG_Stage(nn.Module):
                kernel_size=3,
                stride=stride,
                groups=in_channels,
-                use_act=False)
+                use_act=False,
+            )

        blocks_list = []
        blocks_list.append(
-            HG_Block(
-                in_channels,
-                mid_channels,
-                out_channels,
-                layer_num,
-                identity=False))
+            HG_Block(in_channels, mid_channels, out_channels, layer_num, identity=False)
+        )
        for _ in range(block_num - 1):
            blocks_list.append(
                HG_Block(
-                    out_channels,
-                    mid_channels,
-                    out_channels,
-                    layer_num,
-                    identity=True))
+                    out_channels, mid_channels, out_channels, layer_num, identity=True
+                )
+            )
        self.blocks = nn.Sequential(*blocks_list)

    def forward(self, x):
@@ -164,29 +165,31 @@ class PPHGNet(nn.Module):
    """

    def __init__(
-            self,
-            stem_channels,
-            stage_config,
-            layer_num,
-            in_channels=3,
-            det=False,
-            out_indices=None):
+        self,
+        stem_channels,
+        stage_config,
+        layer_num,
+        in_channels=3,
+        det=False,
+        out_indices=None,
+    ):
        super().__init__()
        self.det = det
-        self.out_indices = out_indices if out_indices is not None else [
-            0, 1, 2, 3
-        ]
+        self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]

        # stem
        stem_channels.insert(0, in_channels)
-        self.stem = nn.Sequential(* [
-            ConvBNAct(
-                in_channels=stem_channels[i],
-                out_channels=stem_channels[i + 1],
-                kernel_size=3,
-                stride=2 if i == 0 else 1) for i in range(
-                    len(stem_channels) - 1)
-        ])
+        self.stem = nn.Sequential(
+            *[
+                ConvBNAct(
+                    in_channels=stem_channels[i],
+                    out_channels=stem_channels[i + 1],
+                    kernel_size=3,
+                    stride=2 if i == 0 else 1,
+                )
+                for i in range(len(stem_channels) - 1)
+            ]
+        )

        if self.det:
            self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
@@ -194,11 +197,25 @@ class PPHGNet(nn.Module):
        self.stages = nn.ModuleList()
        self.out_channels = []
        for block_id, k in enumerate(stage_config):
-            in_channels, mid_channels, out_channels, block_num, downsample, stride = stage_config[
-                k]
+            (
+                in_channels,
+                mid_channels,
+                out_channels,
+                block_num,
+                downsample,
+                stride,
+            ) = stage_config[k]
            self.stages.append(
-                HG_Stage(in_channels, mid_channels, out_channels, block_num,
-                         layer_num, downsample, stride))
+                HG_Stage(
+                    in_channels,
+                    mid_channels,
+                    out_channels,
+                    block_num,
+                    layer_num,
+                    downsample,
+                    stride,
+                )
+            )
            if block_id in self.out_indices:
                self.out_channels.append(out_channels)

@@ -237,32 +254,6 @@ class PPHGNet(nn.Module):
        return x


-def PPHGNet_tiny(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNet_tiny
-    Args:
-        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNet_tiny` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, blocks, downsample
-        "stage1": [96, 96, 224, 1, False, [2, 1]],
-        "stage2": [224, 128, 448, 1, True, [1, 2]],
-        "stage3": [448, 160, 512, 2, True, [2, 1]],
-        "stage4": [512, 192, 768, 1, True, [2, 1]],
-    }
-
-    model = PPHGNet(
-        stem_channels=[48, 48, 96],
-        stage_config=stage_config,
-        layer_num=5,
-        **kwargs)
-    return model
-
-
 def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
    """
    PPHGNet_small
@@ -294,31 +285,6 @@ def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
        stage_config=stage_config_det if det else stage_config_rec,
        layer_num=6,
        det=det,
-        **kwargs)
-    return model
-
-
-def PPHGNet_base(pretrained=False, use_ssld=True, **kwargs):
-    """
-    PPHGNet_base
-    Args:
-        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNet_base` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, blocks, downsample
-        "stage1": [160, 192, 320, 1, False, [2, 1]],
-        "stage2": [320, 224, 640, 2, True, [1, 2]],
-        "stage3": [640, 256, 960, 3, True, [2, 1]],
-        "stage4": [960, 288, 1280, 2, True, [2, 1]],
-    }
-
-    model = PPHGNet(
-        stem_channels=[96, 96, 160],
-        stage_config=stage_config,
-        layer_num=7,
-        **kwargs)
+        **kwargs
+    )
    return model
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
@@ -12,43 +12,54 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+from __future__ import absolute_import, division, print_function

 import torch
-import torch.nn as nn
 import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
+from torch import nn

-# from paddle.nn.initializer import Constant, KaimingNormal
-# from paddle.nn import AdaptiveAvgPool2D, BatchNorm2D, Conv2D, Dropout, Hardsigmoid, Hardswish, Identity, Linear, ReLU
-# from paddle.regularizer import L2Decay
+from ..common import Activation

 NET_CONFIG_det = {
    "blocks2":
-    #k, in_c, out_c, s, use_se
+    # k, in_c, out_c, s, use_se
    [[3, 16, 32, 1, False]],
    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
-    "blocks5":
-    [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False],
-     [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
-    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True],
-                [5, 512, 512, 1, False], [5, 512, 512, 1, False]]
+    "blocks5": [
+        [3, 128, 256, 2, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+    ],
+    "blocks6": [
+        [5, 256, 512, 2, True],
+        [5, 512, 512, 1, True],
+        [5, 512, 512, 1, False],
+        [5, 512, 512, 1, False],
+    ],
 }

 NET_CONFIG_rec = {
    "blocks2":
-    #k, in_c, out_c, s, use_se
+    # k, in_c, out_c, s, use_se
    [[3, 16, 32, 1, False]],
    "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
    "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
-    "blocks5":
-    [[3, 128, 256, (1, 2), False], [5, 256, 256, 1, False],
-     [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
-    "blocks6": [[5, 256, 512, (2, 1), True], [5, 512, 512, 1, True],
-                [5, 512, 512, (2, 1), False], [5, 512, 512, 1, False]]
+    "blocks5": [
+        [3, 128, 256, (1, 2), False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+        [5, 256, 256, 1, False],
+    ],
+    "blocks6": [
+        [5, 256, 512, (2, 1), True],
+        [5, 512, 512, 1, True],
+        [5, 512, 512, (2, 1), False],
+        [5, 512, 512, 1, False],
+    ],
 }


@@ -62,8 +73,7 @@ def make_divisible(v, divisor=16, min_value=None):


 class LearnableAffineBlock(nn.Module):
-    def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0,
-                 lab_lr=0.1):
+    def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
        super().__init__()
        self.scale = nn.Parameter(torch.Tensor([scale_value]))
        self.bias = nn.Parameter(torch.Tensor([bias_value]))
@@ -73,13 +83,9 @@ class LearnableAffineBlock(nn.Module):


 class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 groups=1,
-                 lr_mult=1.0):
+    def __init__(
+        self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
+    ):
        super().__init__()
        self.conv = nn.Conv2d(
            in_channels=in_channels,
@@ -88,7 +94,8 @@ class ConvBNLayer(nn.Module):
            stride=stride,
            padding=(kernel_size - 1) // 2,
            groups=groups,
-            bias=False)
+            bias=False,
+        )

        self.bn = nn.BatchNorm2d(
            out_channels,
@@ -115,15 +122,17 @@ class Act(nn.Module):


 class LearnableRepLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 groups=1,
-                 num_conv_branches=1,
-                 lr_mult=1.0,
-                 lab_lr=0.1):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        groups=1,
+        num_conv_branches=1,
+        lr_mult=1.0,
+        lab_lr=0.1,
+    ):
        super().__init__()
        self.is_repped = False
        self.groups = groups
@@ -134,27 +143,35 @@ class LearnableRepLayer(nn.Module):
        self.num_conv_branches = num_conv_branches
        self.padding = (kernel_size - 1) // 2

-        self.identity = nn.BatchNorm2d(
-            num_features=in_channels,
-        ) if out_channels == in_channels and stride == 1 else None
+        self.identity = (
+            nn.BatchNorm2d(
+                num_features=in_channels,
+            )
+            if out_channels == in_channels and stride == 1
+            else None
+        )
+
+        self.conv_kxk = nn.ModuleList(
+            [
+                ConvBNLayer(
+                    in_channels,
+                    out_channels,
+                    kernel_size,
+                    stride,
+                    groups=groups,
+                    lr_mult=lr_mult,
+                )
+                for _ in range(self.num_conv_branches)
+            ]
+        )

-        self.conv_kxk = nn.ModuleList([
+        self.conv_1x1 = (
            ConvBNLayer(
-                in_channels,
-                out_channels,
-                kernel_size,
-                stride,
-                groups=groups,
-                lr_mult=lr_mult) for _ in range(self.num_conv_branches)
-        ])
-
-        self.conv_1x1 = ConvBNLayer(
-            in_channels,
-            out_channels,
-            1,
-            stride,
-            groups=groups,
-            lr_mult=lr_mult) if kernel_size > 1 else None
+                in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
+            )
+            if kernel_size > 1
+            else None
+        )

        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
        self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
@@ -192,7 +209,8 @@ class LearnableRepLayer(nn.Module):
            kernel_size=self.kernel_size,
            stride=self.stride,
            padding=self.padding,
-            groups=self.groups)
+            groups=self.groups,
+        )
        self.reparam_conv.weight.data = kernel
        self.reparam_conv.bias.data = bias
        self.is_repped = True
@@ -205,8 +223,9 @@ class LearnableRepLayer(nn.Module):

    def _get_kernel_bias(self):
        kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
-        kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(kernel_conv_1x1,
-                                                      self.kernel_size // 2)
+        kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
+            kernel_conv_1x1, self.kernel_size // 2
+        )

        kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)

@@ -233,15 +252,16 @@ class LearnableRepLayer(nn.Module):
            eps = branch.bn._epsilon
        else:
            assert isinstance(branch, nn.BatchNorm2d)
-            if not hasattr(self, 'id_tensor'):
+            if not hasattr(self, "id_tensor"):
                input_dim = self.in_channels // self.groups
                kernel_value = torch.zeros(
-                    (self.in_channels, input_dim, self.kernel_size,
-                     self.kernel_size),
-                    dtype=branch.weight.dtype)
+                    (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
+                    dtype=branch.weight.dtype,
+                )
                for i in range(self.in_channels):
-                    kernel_value[i, i % input_dim, self.kernel_size // 2,
-                                 self.kernel_size // 2] = 1
+                    kernel_value[
+                        i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
+                    ] = 1
                self.id_tensor = kernel_value
            kernel = self.id_tensor
            running_mean = branch._mean
@@ -287,15 +307,17 @@ class SELayer(nn.Module):


 class LCNetV3Block(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 dw_size,
-                 use_se=False,
-                 conv_kxk_num=4,
-                 lr_mult=1.0,
-                 lab_lr=0.1):
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        stride,
+        dw_size,
+        use_se=False,
+        conv_kxk_num=4,
+        lr_mult=1.0,
+        lab_lr=0.1,
+    ):
        super().__init__()
        self.use_se = use_se
        self.dw_conv = LearnableRepLayer(
@@ -306,7 +328,8 @@ class LCNetV3Block(nn.Module):
            groups=in_channels,
            num_conv_branches=conv_kxk_num,
            lr_mult=lr_mult,
-            lab_lr=lab_lr)
+            lab_lr=lab_lr,
+        )
        if use_se:
            self.se = SELayer(in_channels, lr_mult=lr_mult)
        self.pw_conv = LearnableRepLayer(
@@ -316,7 +339,8 @@ class LCNetV3Block(nn.Module):
            stride=1,
            num_conv_branches=conv_kxk_num,
            lr_mult=lr_mult,
-            lab_lr=lab_lr)
+            lab_lr=lab_lr,
+        )

    def forward(self, x):
        x = self.dw_conv(x)
@@ -327,13 +351,15 @@ class LCNetV3Block(nn.Module):


 class PPLCNetV3(nn.Module):
-    def __init__(self,
-                 scale=1.0,
-                 conv_kxk_num=4,
-                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-                 lab_lr=0.1,
-                 det=False,
-                 **kwargs):
+    def __init__(
+        self,
+        scale=1.0,
+        conv_kxk_num=4,
+        lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
+        lab_lr=0.1,
+        det=False,
+        **kwargs
+    ):
        super().__init__()
        self.scale = scale
        self.lr_mult_list = lr_mult_list
@@ -341,90 +367,102 @@ class PPLCNetV3(nn.Module):

        self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec

-        assert isinstance(self.lr_mult_list, (
-            list, tuple
-        )), "lr_mult_list should be in (list, tuple) but got {}".format(
-            type(self.lr_mult_list))
-        assert len(self.lr_mult_list
-                   ) == 6, "lr_mult_list length should be 6 but got {}".format(
-                       len(self.lr_mult_list))
+        assert isinstance(
+            self.lr_mult_list, (list, tuple)
+        ), "lr_mult_list should be in (list, tuple) but got {}".format(
+            type(self.lr_mult_list)
+        )
+        assert (
+            len(self.lr_mult_list) == 6
+        ), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))

        self.conv1 = ConvBNLayer(
            in_channels=3,
            out_channels=make_divisible(16 * scale),
            kernel_size=3,
            stride=2,
-            lr_mult=self.lr_mult_list[0])
-
-        self.blocks2 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[1],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks2"])
-        ])
-
-        self.blocks3 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[2],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks3"])
-        ])
-
-        self.blocks4 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[3],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks4"])
-        ])
-
-        self.blocks5 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[4],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks5"])
-        ])
-
-        self.blocks6 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[5],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks6"])
-        ])
+            lr_mult=self.lr_mult_list[0],
+        )
+
+        self.blocks2 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[1],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
+            ]
+        )
+
+        self.blocks3 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[2],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
+            ]
+        )
+
+        self.blocks4 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[3],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
+            ]
+        )
+
+        self.blocks5 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[4],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
+            ]
+        )
+
+        self.blocks6 = nn.Sequential(
+            *[
+                LCNetV3Block(
+                    in_channels=make_divisible(in_c * scale),
+                    out_channels=make_divisible(out_c * scale),
+                    dw_size=k,
+                    stride=s,
+                    use_se=se,
+                    conv_kxk_num=conv_kxk_num,
+                    lr_mult=self.lr_mult_list[5],
+                    lab_lr=lab_lr,
+                )
+                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
+            ]
+        )
        self.out_channels = make_divisible(512 * scale)

        if self.det:
@@ -436,15 +474,19 @@ class PPLCNetV3(nn.Module):
                make_divisible(self.net_config["blocks6"][-1][2] * scale),
            ]

-            self.layer_list = nn.ModuleList([
-                nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
-                nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
-                nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
-                nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0)
-            ])
+            self.layer_list = nn.ModuleList(
+                [
+                    nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
+                    nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
+                    nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
+                    nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
+                ]
+            )
            self.out_channels = [
-                int(mv_c[0] * scale), int(mv_c[1] * scale),
-                int(mv_c[2] * scale), int(mv_c[3] * scale)
+                int(mv_c[0] * scale),
+                int(mv_c[1] * scale),
+                int(mv_c[2] * scale),
+                int(mv_c[3] * scale),
            ]

    def forward(self, x):

--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3_bak.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3_bak.py
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
-
-NET_CONFIG_det = {
-    "blocks2":
-    # k, in_c, out_c, s, use_se
-        [[3, 16, 32, 1, False]],
-    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
-    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
-    "blocks5":
-        [[3, 128, 256, 2, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False],
-         [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
-    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True],
-                [5, 512, 512, 1, False], [5, 512, 512, 1, False]]
-}
-
-NET_CONFIG_rec = {
-    "blocks2":
-    # k, in_c, out_c, s, use_se
-        [[3, 16, 32, 1, False]],
-    "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
-    "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
-    "blocks5":
-        [[3, 128, 256, (1, 2), False], [5, 256, 256, 1, False],
-         [5, 256, 256, 1, False], [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
-    "blocks6": [[5, 256, 512, (2, 1), True], [5, 512, 512, 1, True],
-                [5, 512, 512, (2, 1), False], [5, 512, 512, 1, False]]
-}
-
-
-def make_divisible(v, divisor=16, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class LearnableAffineBlock(nn.Module):
-    def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0,
-                 lab_lr=0.1):
-        super().__init__()
-        self.scale = nn.Parameter(torch.Tensor([scale_value]))
-        self.bias = nn.Parameter(torch.Tensor([bias_value]))
-
-    def forward(self, x):
-        return self.scale * x + self.bias
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride,
-                 groups=1,
-                 lr_mult=1.0):
-        super().__init__()
-        self.conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False)
-
-        self.bn = nn.BatchNorm2d(out_channels)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        return x
-
-
-class Act(nn.Module):
-    def __init__(self, act="hard_swish", lr_mult=1.0, lab_lr=0.1):
-        super().__init__()
-        assert act in ['hard_swish', 'relu']
-        self.act = Activation(act)
-        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
-
-    def forward(self, x):
-        return self.lab(self.act(x))
-
-
-class LearnableRepLayer(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 kernel_size,
-                 stride=1,
-                 groups=1,
-                 num_conv_branches=1,
-                 lr_mult=1.0,
-                 lab_lr=0.1):
-        super().__init__()
-        self.is_repped = False
-        self.groups = groups
-        self.stride = stride
-        self.kernel_size = kernel_size
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.num_conv_branches = num_conv_branches
-        self.padding = (kernel_size - 1) // 2
-        self.identity = nn.BatchNorm2d(in_channels) if out_channels == in_channels and stride == 1 else None
-
-        self.conv_kxk = nn.ModuleList([
-            ConvBNLayer(
-                in_channels,
-                out_channels,
-                kernel_size,
-                stride,
-                groups=groups,
-                lr_mult=lr_mult) for _ in range(self.num_conv_branches)
-        ])
-
-        self.conv_1x1 = ConvBNLayer(
-            in_channels,
-            out_channels,
-            1,
-            stride,
-            groups=groups,
-            lr_mult=lr_mult) if kernel_size > 1 else None
-        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
-        self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
-
-    def forward(self, x):
-        # for export
-        if self.is_repped:
-            out = self.lab(self.reparam_conv(x))
-            if self.stride != 2:
-                out = self.act(out)
-            return out
-
-        out = 0
-        if self.identity is not None:
-            out += self.identity(x)
-
-        if self.conv_1x1 is not None:
-            out += self.conv_1x1(x)
-
-        for conv in self.conv_kxk:
-            out += conv(x)
-
-        out = self.lab(out)
-        if self.stride != 2:
-            out = self.act(out)
-        return out
-
-    def rep(self):
-        if self.is_repped:
-            return
-        kernel, bias = self._get_kernel_bias()
-        self.reparam_conv = nn.Conv2d(
-            in_channels=self.in_channels,
-            out_channels=self.out_channels,
-            kernel_size=self.kernel_size,
-            stride=self.stride,
-            padding=self.padding,
-            groups=self.groups)
-        self.reparam_conv.weight.data = kernel
-        self.reparam_conv.bias.data = bias
-        self.is_repped = True
-
-    def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
-        if not isinstance(kernel1x1, torch.Tensor):
-            return 0
-        else:
-            return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
-
-    def _get_kernel_bias(self):
-        kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
-        kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(kernel_conv_1x1,
-                                                      self.kernel_size // 2)
-
-        kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
-
-        kernel_conv_kxk = 0
-        bias_conv_kxk = 0
-        for conv in self.conv_kxk:
-            kernel, bias = self._fuse_bn_tensor(conv)
-            kernel_conv_kxk += kernel
-            bias_conv_kxk += bias
-
-        kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
-        bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
-        return kernel_reparam, bias_reparam
-
-    def _fuse_bn_tensor(self, branch):
-        if not branch:
-            return 0, 0
-        elif isinstance(branch, ConvBNLayer):
-            kernel = branch.conv.weight
-            running_mean = branch.bn.running_mean
-            running_var = branch.bn.running_var
-            gamma = branch.bn.weight
-            beta = branch.bn.bias
-            eps = branch.bn.eps
-        else:
-            assert isinstance(branch, nn.BatchNorm2d)
-            if not hasattr(self, 'id_tensor'):
-                input_dim = self.in_channels // self.groups
-                kernel_value = torch.zeros(
-                    (self.in_channels, input_dim, self.kernel_size,
-                     self.kernel_size),
-                    dtype=branch.weight.dtype)
-                for i in range(self.in_channels):
-                    kernel_value[i, i % input_dim, self.kernel_size // 2,
-                                    self.kernel_size // 2] = 1
-                self.id_tensor = kernel_value
-            kernel = self.id_tensor
-            running_mean = branch.running_mean
-            running_var = branch.running_var
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch.eps
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape((-1, 1, 1, 1))
-        return kernel * t, beta - running_mean * gamma / std
-
-
-class SELayer(nn.Module):
-    def __init__(self, channel, reduction=4, lr_mult=1.0):
-        super().__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv1 = nn.Conv2d(
-            in_channels=channel,
-            out_channels=channel // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0)
-        self.relu = nn.ReLU()
-        self.conv2 = nn.Conv2d(
-            in_channels=channel // reduction,
-            out_channels=channel,
-            kernel_size=1,
-            stride=1,
-            padding=0)
-        self.hardsigmoid = Activation('hard_sigmoid')
-
-    def forward(self, x):
-        identity = x
-        x = self.avg_pool(x)
-        x = self.conv1(x)
-        x = self.relu(x)
-        x = self.conv2(x)
-        x = self.hardsigmoid(x)
-        x = x * identity
-        return x
-
-
-class LCNetV3Block(nn.Module):
-    def __init__(self,
-                 in_channels,
-                 out_channels,
-                 stride,
-                 dw_size,
-                 use_se=False,
-                 conv_kxk_num=4,
-                 lr_mult=1.0,
-                 lab_lr=0.1):
-        super().__init__()
-        self.use_se = use_se
-        self.dw_conv = LearnableRepLayer(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=dw_size,
-            stride=stride,
-            groups=in_channels,
-            num_conv_branches=conv_kxk_num,
-            lr_mult=lr_mult,
-            lab_lr=lab_lr)
-        if use_se:
-            self.se = SELayer(in_channels, lr_mult=lr_mult)
-        self.pw_conv = LearnableRepLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            num_conv_branches=conv_kxk_num,
-            lr_mult=lr_mult,
-            lab_lr=lab_lr)
-
-    def forward(self, x):
-        x = self.dw_conv(x)
-        if self.use_se:
-            x = self.se(x)
-        x = self.pw_conv(x)
-        return x
-
-
-class PPLCNetV3(nn.Module):
-    def __init__(self,
-                 scale=1.0,
-                 conv_kxk_num=4,
-                 lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-                 lab_lr=0.1,
-                 det=False,
-                 **kwargs):
-        super().__init__()
-        self.scale = scale
-        self.lr_mult_list = lr_mult_list
-        self.det = det
-
-        self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
-
-        assert isinstance(self.lr_mult_list, (
-            list, tuple
-        )), "lr_mult_list should be in (list, tuple) but got {}".format(
-            type(self.lr_mult_list))
-        assert len(self.lr_mult_list
-                   ) == 6, "lr_mult_list length should be 6 but got {}".format(
-            len(self.lr_mult_list))
-
-        self.conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=make_divisible(16 * scale),
-            kernel_size=3,
-            stride=2,
-            lr_mult=self.lr_mult_list[0])
-
-        self.blocks2 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[1],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks2"])
-        ])
-
-        self.blocks3 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[2],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks3"])
-        ])
-
-        self.blocks4 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[3],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks4"])
-        ])
-
-        self.blocks5 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[4],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks5"])
-        ])
-
-        self.blocks6 = nn.Sequential(*[
-            LCNetV3Block(
-                in_channels=make_divisible(in_c * scale),
-                out_channels=make_divisible(out_c * scale),
-                dw_size=k,
-                stride=s,
-                use_se=se,
-                conv_kxk_num=conv_kxk_num,
-                lr_mult=self.lr_mult_list[5],
-                lab_lr=lab_lr)
-            for i, (k, in_c, out_c, s, se
-                    ) in enumerate(self.net_config["blocks6"])
-        ])
-        self.out_channels = make_divisible(512 * scale)
-
-        if self.det:
-            mv_c = [16, 24, 56, 480]
-            self.out_channels = [
-                make_divisible(self.net_config["blocks3"][-1][2] * scale),
-                make_divisible(self.net_config["blocks4"][-1][2] * scale),
-                make_divisible(self.net_config["blocks5"][-1][2] * scale),
-                make_divisible(self.net_config["blocks6"][-1][2] * scale),
-            ]
-
-            self.layer_list = nn.ModuleList([
-                nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
-                nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
-                nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
-                nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0)
-            ])
-            self.out_channels = [
-                int(mv_c[0] * scale), int(mv_c[1] * scale),
-                int(mv_c[2] * scale), int(mv_c[3] * scale)
-            ]
-
-    def forward(self, x):
-        out_list = []
-        x = self.conv1(x)
-
-        x = self.blocks2(x)
-        x = self.blocks3(x)
-        out_list.append(x)
-        x = self.blocks4(x)
-        out_list.append(x)
-        x = self.blocks5(x)
-        out_list.append(x)
-        import numpy as np
-        x = torch.Tensor(np.load('../PaddleOCR4debug/tmp.npy'))
-        x = self.blocks6(x)
-        out_list.append(x)
-
-        if self.det:
-            out_list[0] = self.layer_list[0](out_list[0])
-            out_list[1] = self.layer_list[1](out_list[1])
-            out_list[2] = self.layer_list[2](out_list[2])
-            out_list[3] = self.layer_list[3](out_list[3])
-            return out_list
-
-        if self.training:
-            x = F.adaptive_avg_pool2d(x, [1, 40])
-        else:
-            x = F.avg_pool2d(x, [3, 2])
-        return x
\ No newline at end of file
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
+++ b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
-import os, sys
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from pytorchocr.modeling.common import Activation
+from torch import nn
+
+from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible

-from .det_mobilenet_v3 import ResidualUnit, ConvBNLayer, make_divisible

 class MobileNetV3(nn.Module):
-    def __init__(self,
-                 in_channels=3,
-                 model_name='small',
-                 scale=0.5,
-                 large_stride=None,
-                 small_stride=None,
-                 **kwargs):
+    def __init__(
+        self,
+        in_channels=3,
+        model_name="small",
+        scale=0.5,
+        large_stride=None,
+        small_stride=None,
+        **kwargs
+    ):
        super(MobileNetV3, self).__init__()
        if small_stride is None:
            small_stride = [2, 2, 2, 2]
        if large_stride is None:
            large_stride = [1, 2, 2, 2]

-        assert isinstance(large_stride, list), "large_stride type must " \
-                                               "be list but got {}".format(type(large_stride))
-        assert isinstance(small_stride, list), "small_stride type must " \
-                                               "be list but got {}".format(type(small_stride))
-        assert len(large_stride) == 4, "large_stride length must be " \
-                                       "4 but got {}".format(len(large_stride))
-        assert len(small_stride) == 4, "small_stride length must be " \
-                                       "4 but got {}".format(len(small_stride))
+        assert isinstance(
+            large_stride, list
+        ), "large_stride type must " "be list but got {}".format(type(large_stride))
+        assert isinstance(
+            small_stride, list
+        ), "small_stride type must " "be list but got {}".format(type(small_stride))
+        assert (
+            len(large_stride) == 4
+        ), "large_stride length must be " "4 but got {}".format(len(large_stride))
+        assert (
+            len(small_stride) == 4
+        ), "small_stride length must be " "4 but got {}".format(len(small_stride))

        if model_name == "large":
            cfg = [
                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, 'relu', large_stride[0]],
-                [3, 64, 24, False, 'relu', (large_stride[1], 1)],
-                [3, 72, 24, False, 'relu', 1],
-                [5, 72, 40, True, 'relu', (large_stride[2], 1)],
-                [5, 120, 40, True, 'relu', 1],
-                [5, 120, 40, True, 'relu', 1],
-                [3, 240, 80, False, 'hard_swish', 1],
-                [3, 200, 80, False, 'hard_swish', 1],
-                [3, 184, 80, False, 'hard_swish', 1],
-                [3, 184, 80, False, 'hard_swish', 1],
-                [3, 480, 112, True, 'hard_swish', 1],
-                [3, 672, 112, True, 'hard_swish', 1],
-                [5, 672, 160, True, 'hard_swish', (large_stride[3], 1)],
-                [5, 960, 160, True, 'hard_swish', 1],
-                [5, 960, 160, True, 'hard_swish', 1],
+                [3, 16, 16, False, "relu", large_stride[0]],
+                [3, 64, 24, False, "relu", (large_stride[1], 1)],
+                [3, 72, 24, False, "relu", 1],
+                [5, 72, 40, True, "relu", (large_stride[2], 1)],
+                [5, 120, 40, True, "relu", 1],
+                [5, 120, 40, True, "relu", 1],
+                [3, 240, 80, False, "hard_swish", 1],
+                [3, 200, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 184, 80, False, "hard_swish", 1],
+                [3, 480, 112, True, "hard_swish", 1],
+                [3, 672, 112, True, "hard_swish", 1],
+                [5, 672, 160, True, "hard_swish", (large_stride[3], 1)],
+                [5, 960, 160, True, "hard_swish", 1],
+                [5, 960, 160, True, "hard_swish", 1],
            ]
            cls_ch_squeeze = 960
        elif model_name == "small":
            cfg = [
                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, 'relu', (small_stride[0], 1)],
-                [3, 72, 24, False, 'relu', (small_stride[1], 1)],
-                [3, 88, 24, False, 'relu', 1],
-                [5, 96, 40, True, 'hard_swish', (small_stride[2], 1)],
-                [5, 240, 40, True, 'hard_swish', 1],
-                [5, 240, 40, True, 'hard_swish', 1],
-                [5, 120, 48, True, 'hard_swish', 1],
-                [5, 144, 48, True, 'hard_swish', 1],
-                [5, 288, 96, True, 'hard_swish', (small_stride[3], 1)],
-                [5, 576, 96, True, 'hard_swish', 1],
-                [5, 576, 96, True, 'hard_swish', 1],
+                [3, 16, 16, True, "relu", (small_stride[0], 1)],
+                [3, 72, 24, False, "relu", (small_stride[1], 1)],
+                [3, 88, 24, False, "relu", 1],
+                [5, 96, 40, True, "hard_swish", (small_stride[2], 1)],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 240, 40, True, "hard_swish", 1],
+                [5, 120, 48, True, "hard_swish", 1],
+                [5, 144, 48, True, "hard_swish", 1],
+                [5, 288, 96, True, "hard_swish", (small_stride[3], 1)],
+                [5, 576, 96, True, "hard_swish", 1],
+                [5, 576, 96, True, "hard_swish", 1],
            ]
            cls_ch_squeeze = 576
        else:
-            raise NotImplementedError("mode[" + model_name +
-                                      "_model] is not implemented!")
+            raise NotImplementedError(
+                "mode[" + model_name + "_model] is not implemented!"
+            )

        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert scale in supported_scale, \
-            "supported scales are {} but input scale is {}".format(supported_scale, scale)
+        assert (
+            scale in supported_scale
+        ), "supported scales are {} but input scale is {}".format(
+            supported_scale, scale
+        )

        inplanes = 16
        # conv1
@@ -83,12 +90,13 @@ class MobileNetV3(nn.Module):
            padding=1,
            groups=1,
            if_act=True,
-            act='hard_swish',
-            name='conv1')
+            act="hard_swish",
+            name="conv1",
+        )
        i = 0
        block_list = []
        inplanes = make_divisible(inplanes * scale)
-        for (k, exp, c, se, nl, s) in cfg:
+        for k, exp, c, se, nl, s in cfg:
            block_list.append(
                ResidualUnit(
                    in_channels=inplanes,
@@ -98,7 +106,9 @@ class MobileNetV3(nn.Module):
                    stride=s,
                    use_se=se,
                    act=nl,
-                    name='conv' + str(i + 2)))
+                    name="conv" + str(i + 2),
+                )
+            )
            inplanes = make_divisible(scale * c)
            i += 1
        self.blocks = nn.Sequential(*block_list)
@@ -111,8 +121,9 @@ class MobileNetV3(nn.Module):
            padding=0,
            groups=1,
            if_act=True,
-            act='hard_swish',
-            name='conv_last')
+            act="hard_swish",
+            name="conv_last",
+        )

        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.out_channels = make_divisible(scale * cls_ch_squeeze)
@@ -122,4 +133,4 @@ class MobileNetV3(nn.Module):
        x = self.blocks(x)
        x = self.conv2(x)
        x = self.pool(x)
-        return x
\ No newline at end of file
+        return x