model_init.py 5.69 KB
Newer Older
1
import torch
2
3
from loguru import logger

4
from magic_pdf.config.constants import MODEL_NAME
5
from magic_pdf.libs.config_reader import get_device
6
from magic_pdf.model.model_list import AtomicModel
7
8
9
10
from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import \
    DocLayoutYOLOModel
from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import \
    Layoutlmv3_Predictor
11
12
from magic_pdf.model.sub_modules.mfd.yolov8.YOLOv8 import YOLOv8MFDModel
from magic_pdf.model.sub_modules.mfr.unimernet.Unimernet import UnimernetModel
13
14
15
16
from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_273_mod import \
    ModifiedPaddleOCR
from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import \
    RapidTableModel
17
# from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
18
19
20
21
from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import \
    StructTableModel
from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import \
    TableMasterPaddleModel
22
23


24
def table_model_init(table_model_type, model_path, max_time, _device_='cpu', ocr_engine=None):
25
26
27
28
    if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
        table_model = StructTableModel(model_path, max_new_tokens=2048, max_time=max_time)
    elif table_model_type == MODEL_NAME.TABLE_MASTER:
        config = {
29
30
            'model_dir': model_path,
            'device': _device_
31
32
33
        }
        table_model = TableMasterPaddleModel(config)
    elif table_model_type == MODEL_NAME.RAPID_TABLE:
34
        table_model = RapidTableModel(ocr_engine)
35
    else:
36
        logger.error('table model type not allow')
37
38
39
40
41
42
        exit(1)

    return table_model


def mfd_model_init(weight, device='cpu'):
43
44
    if str(device).startswith("npu"):
        device = torch.device(device)
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
    mfd_model = YOLOv8MFDModel(weight, device)
    return mfd_model


def mfr_model_init(weight_dir, cfg_path, device='cpu'):
    mfr_model = UnimernetModel(weight_dir, cfg_path, device)
    return mfr_model


def layout_model_init(weight, config_file, device):
    model = Layoutlmv3_Predictor(weight, config_file, device)
    return model


def doclayout_yolo_model_init(weight, device='cpu'):
60
61
    if str(device).startswith("npu"):
        device = torch.device(device)
62
63
64
65
66
67
68
69
70
71
    model = DocLayoutYOLOModel(weight, device)
    return model


def ocr_model_init(show_log: bool = False,
                   det_db_box_thresh=0.3,
                   lang=None,
                   use_dilation=True,
                   det_db_unclip_ratio=1.8,
                   ):
72

73
74
75
76
    # use_npu = False
    # device = get_device()
    # if str(device).startswith("npu"):
    #     use_npu = True
77

78
    if lang is not None and lang != '':
79
80
81
82
83
84
        model = ModifiedPaddleOCR(
            show_log=show_log,
            det_db_box_thresh=det_db_box_thresh,
            lang=lang,
            use_dilation=use_dilation,
            det_db_unclip_ratio=det_db_unclip_ratio,
85
            # use_npu=use_npu,
86
87
88
89
90
91
92
        )
    else:
        model = ModifiedPaddleOCR(
            show_log=show_log,
            det_db_box_thresh=det_db_box_thresh,
            use_dilation=use_dilation,
            det_db_unclip_ratio=det_db_unclip_ratio,
93
            # use_npu=use_npu,
94
95
96
97
98
99
100
101
102
103
104
105
106
107
        )
    return model


class AtomModelSingleton:
    _instance = None
    _models = {}

    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance

    def get_atom_model(self, atom_model_name: str, **kwargs):
108

109
110
        lang = kwargs.get('lang', None)
        layout_model_name = kwargs.get('layout_model_name', None)
111
112
113
        table_model_name = kwargs.get('table_model_name', None)

        if atom_model_name in [AtomicModel.OCR]:
114
            key = (atom_model_name, lang)
115
116
117
118
119
120
121
        elif atom_model_name in [AtomicModel.Layout]:
            key = (atom_model_name, layout_model_name)
        elif atom_model_name in [AtomicModel.Table]:
            key = (atom_model_name, table_model_name)
        else:
            key = atom_model_name

122
123
        if key not in self._models:
            self._models[key] = atom_model_init(model_name=atom_model_name, **kwargs)
124
        return self._models[key]
125
126
127
128

def atom_model_init(model_name: str, **kwargs):
    atom_model = None
    if model_name == AtomicModel.Layout:
129
        if kwargs.get('layout_model_name') == MODEL_NAME.LAYOUTLMv3:
130
            atom_model = layout_model_init(
131
132
133
                kwargs.get('layout_weights'),
                kwargs.get('layout_config_file'),
                kwargs.get('device')
134
            )
135
        elif kwargs.get('layout_model_name') == MODEL_NAME.DocLayout_YOLO:
136
            atom_model = doclayout_yolo_model_init(
137
138
                kwargs.get('doclayout_yolo_weights'),
                kwargs.get('device')
139
140
141
            )
    elif model_name == AtomicModel.MFD:
        atom_model = mfd_model_init(
142
143
            kwargs.get('mfd_weights'),
            kwargs.get('device')
144
145
146
        )
    elif model_name == AtomicModel.MFR:
        atom_model = mfr_model_init(
147
148
149
            kwargs.get('mfr_weight_dir'),
            kwargs.get('mfr_cfg_path'),
            kwargs.get('device')
150
151
152
        )
    elif model_name == AtomicModel.OCR:
        atom_model = ocr_model_init(
153
154
            kwargs.get('ocr_show_log'),
            kwargs.get('det_db_box_thresh'),
155
            kwargs.get('lang'),
156
157
158
        )
    elif model_name == AtomicModel.Table:
        atom_model = table_model_init(
159
160
161
            kwargs.get('table_model_name'),
            kwargs.get('table_model_path'),
            kwargs.get('table_max_time'),
162
            kwargs.get('device'),
163
164
        )
    else:
165
        logger.error('model name not allow')
166
167
168
        exit(1)

    if atom_model is None:
169
        logger.error('model init failed')
170
171
172
        exit(1)
    else:
        return atom_model