diff --git a/magic_pdf/__init__.py b/magic_pdf/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/config/__init__.py b/magic_pdf/config/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/config/constants.py b/magic_pdf/config/constants.py
deleted file mode 100644
index b18d630bbd1144816c4e0102c7f5ad53bc6d6194..0000000000000000000000000000000000000000
--- a/magic_pdf/config/constants.py
+++ /dev/null
@@ -1,60 +0,0 @@
-"""span维度自定义字段."""
-# span是否是跨页合并的
-CROSS_PAGE = 'cross_page'
-
-"""
-block维度自定义字段
-"""
-# block中lines是否被删除
-LINES_DELETED = 'lines_deleted'
-
-# table recognition max time default value
-TABLE_MAX_TIME_VALUE = 400
-
-# pp_table_result_max_length
-TABLE_MAX_LEN = 480
-
-# table master structure dict
-TABLE_MASTER_DICT = 'table_master_structure_dict.txt'
-
-# table master dir
-TABLE_MASTER_DIR = 'table_structure_tablemaster_infer/'
-
-# pp detect model dir
-DETECT_MODEL_DIR = 'ch_PP-OCRv4_det_infer'
-
-# pp rec model dir
-REC_MODEL_DIR = 'ch_PP-OCRv4_rec_infer'
-
-# pp rec char dict path
-REC_CHAR_DICT = 'ppocr_keys_v1.txt'
-
-# pp rec copy rec directory
-PP_REC_DIRECTORY = '.paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer'
-
-# pp rec copy det directory
-PP_DET_DIRECTORY = '.paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer'
-
-
-class MODEL_NAME:
-    # pp table structure algorithm
-    TABLE_MASTER = 'tablemaster'
-    # struct eqtable
-    STRUCT_EQTABLE = 'struct_eqtable'
-
-    DocLayout_YOLO = 'doclayout_yolo'
-
-    LAYOUTLMv3 = 'layoutlmv3'
-
-    YOLO_V8_MFD = 'yolo_v8_mfd'
-
-    UniMerNet_v2_Small = 'unimernet_small'
-
-    RAPID_TABLE = 'rapid_table'
-
-    YOLO_V11_LangDetect = 'yolo_v11n_langdetect'
-
-
-PARSE_TYPE_TXT = 'txt'
-PARSE_TYPE_OCR = 'ocr'
-
diff --git a/magic_pdf/config/drop_reason.py b/magic_pdf/config/drop_reason.py
deleted file mode 100644
index d75d5676b81481c987f6c4d4948aaa82e9a4c86f..0000000000000000000000000000000000000000
--- a/magic_pdf/config/drop_reason.py
+++ /dev/null
@@ -1,35 +0,0 @@
-class DropReason:
-    TEXT_BLCOK_HOR_OVERLAP = 'text_block_horizontal_overlap'  # 文字块有水平互相覆盖，导致无法准确定位文字顺序
-    USEFUL_BLOCK_HOR_OVERLAP = (
-        'useful_block_horizontal_overlap'  # 需保留的block水平覆盖
-    )
-    COMPLICATED_LAYOUT = 'complicated_layout'  # 复杂的布局，暂时不支持
-    TOO_MANY_LAYOUT_COLUMNS = 'too_many_layout_columns'  # 目前不支持分栏超过2列的
-    COLOR_BACKGROUND_TEXT_BOX = 'color_background_text_box'  # 含有带色块的PDF，色块会改变阅读顺序，目前不支持带底色文字块的PDF。
-    HIGH_COMPUTATIONAL_lOAD_BY_IMGS = (
-        'high_computational_load_by_imgs'  # 含特殊图片，计算量太大，从而丢弃
-    )
-    HIGH_COMPUTATIONAL_lOAD_BY_SVGS = (
-        'high_computational_load_by_svgs'  # 特殊的SVG图，计算量太大，从而丢弃
-    )
-    HIGH_COMPUTATIONAL_lOAD_BY_TOTAL_PAGES = 'high_computational_load_by_total_pages'  # 计算量超过负荷，当前方法下计算量消耗过大
-    MISS_DOC_LAYOUT_RESULT = 'missing doc_layout_result'  # 版面分析失败
-    Exception = '_exception'  # 解析中发生异常
-    ENCRYPTED = 'encrypted'  # PDF是加密的
-    EMPTY_PDF = 'total_page=0'  # PDF页面总数为0
-    NOT_IS_TEXT_PDF = 'not_is_text_pdf'  # 不是文字版PDF，无法直接解析
-    DENSE_SINGLE_LINE_BLOCK = 'dense_single_line_block'  # 无法清晰的分段
-    TITLE_DETECTION_FAILED = 'title_detection_failed'  # 探测标题失败
-    TITLE_LEVEL_FAILED = (
-        'title_level_failed'  # 分析标题级别失败（例如一级、二级、三级标题）
-    )
-    PARA_SPLIT_FAILED = 'para_split_failed'  # 识别段落失败
-    PARA_MERGE_FAILED = 'para_merge_failed'  # 段落合并失败
-    NOT_ALLOW_LANGUAGE = 'not_allow_language'  # 不支持的语种
-    SPECIAL_PDF = 'special_pdf'
-    PSEUDO_SINGLE_COLUMN = 'pseudo_single_column'  # 无法精确判断文字分栏
-    CAN_NOT_DETECT_PAGE_LAYOUT = 'can_not_detect_page_layout'  # 无法分析页面的版面
-    NEGATIVE_BBOX_AREA = 'negative_bbox_area'  # 缩放导致 bbox 面积为负
-    OVERLAP_BLOCKS_CAN_NOT_SEPARATION = (
-        'overlap_blocks_can_t_separation'  # 无法分离重叠的block
-    )
diff --git a/magic_pdf/config/drop_tag.py b/magic_pdf/config/drop_tag.py
deleted file mode 100644
index 51a2bc99378ddb1182a3c87de4e3623f00f93807..0000000000000000000000000000000000000000
--- a/magic_pdf/config/drop_tag.py
+++ /dev/null
@@ -1,19 +0,0 @@
-
-COLOR_BG_HEADER_TXT_BLOCK = 'color_background_header_txt_block'
-PAGE_NO = 'page-no'  # 页码
-CONTENT_IN_FOOT_OR_HEADER = 'in-foot-header-area'  # 页眉页脚内的文本
-VERTICAL_TEXT = 'vertical-text'  # 垂直文本
-ROTATE_TEXT = 'rotate-text'  # 旋转文本
-EMPTY_SIDE_BLOCK = 'empty-side-block'  # 边缘上的空白没有任何内容的block
-ON_IMAGE_TEXT = 'on-image-text'  # 文本在图片上
-ON_TABLE_TEXT = 'on-table-text'  # 文本在表格上
-
-
-class DropTag:
-    PAGE_NUMBER = 'page_no'
-    HEADER = 'header'
-    FOOTER = 'footer'
-    FOOTNOTE = 'footnote'
-    NOT_IN_LAYOUT = 'not_in_layout'
-    SPAN_OVERLAP = 'span_overlap'
-    BLOCK_OVERLAP = 'block_overlap'
diff --git a/magic_pdf/config/enums.py b/magic_pdf/config/enums.py
deleted file mode 100644
index 6f3e91a3227e6cb6678af0fc578a833a3d2439e3..0000000000000000000000000000000000000000
--- a/magic_pdf/config/enums.py
+++ /dev/null
@@ -1,7 +0,0 @@
-
-import enum
-
-
-class SupportedPdfParseMethod(enum.Enum):
-    OCR = 'ocr'
-    TXT = 'txt'
diff --git a/magic_pdf/config/exceptions.py b/magic_pdf/config/exceptions.py
deleted file mode 100644
index c0b7beda3409df0daaf63aac254f337186bc2999..0000000000000000000000000000000000000000
--- a/magic_pdf/config/exceptions.py
+++ /dev/null
@@ -1,39 +0,0 @@
-
-class FileNotExisted(Exception):
-
-    def __init__(self, path):
-        self.path = path
-
-    def __str__(self):
-        return f'File {self.path} does not exist.'
-
-
-class InvalidConfig(Exception):
-    def __init__(self, msg):
-        self.msg = msg
-
-    def __str__(self):
-        return f'Invalid config: {self.msg}'
-
-
-class InvalidParams(Exception):
-    def __init__(self, msg):
-        self.msg = msg
-
-    def __str__(self):
-        return f'Invalid params: {self.msg}'
-
-
-class EmptyData(Exception):
-    def __init__(self, msg):
-        self.msg = msg
-
-    def __str__(self):
-        return f'Empty data: {self.msg}'
-
-class CUDA_NOT_AVAILABLE(Exception):
-    def __init__(self, msg):
-        self.msg = msg
-
-    def __str__(self):
-        return f'CUDA not available: {self.msg}'
\ No newline at end of file
diff --git a/magic_pdf/config/make_content_config.py b/magic_pdf/config/make_content_config.py
deleted file mode 100644
index abcd74a4b860f163deb484ad33797c638034fb08..0000000000000000000000000000000000000000
--- a/magic_pdf/config/make_content_config.py
+++ /dev/null
@@ -1,11 +0,0 @@
-class MakeMode:
-    MM_MD = 'mm_markdown'
-    NLP_MD = 'nlp_markdown'
-    STANDARD_FORMAT = 'standard_format'
-
-
-class DropMode:
-    WHOLE_PDF = 'whole_pdf'
-    SINGLE_PAGE = 'single_page'
-    NONE = 'none'
-    NONE_WITH_REASON = 'none_with_reason'
diff --git a/magic_pdf/config/model_block_type.py b/magic_pdf/config/model_block_type.py
deleted file mode 100644
index 4ad739ac51c08071626d8badd17f43b0eb90a66c..0000000000000000000000000000000000000000
--- a/magic_pdf/config/model_block_type.py
+++ /dev/null
@@ -1,10 +0,0 @@
-from enum import Enum
-
-
-class ModelBlockTypeEnum(Enum):
-    TITLE = 0
-    PLAIN_TEXT = 1
-    ABANDON = 2
-    ISOLATE_FORMULA = 8
-    EMBEDDING = 13
-    ISOLATED = 14
diff --git a/magic_pdf/config/ocr_content_type.py b/magic_pdf/config/ocr_content_type.py
deleted file mode 100644
index 30d88cfdedbf28d3552a92e1549b839bea195f5b..0000000000000000000000000000000000000000
--- a/magic_pdf/config/ocr_content_type.py
+++ /dev/null
@@ -1,40 +0,0 @@
-class ContentType:
-    Image = 'image'
-    Table = 'table'
-    Text = 'text'
-    InlineEquation = 'inline_equation'
-    InterlineEquation = 'interline_equation'
-
-
-class BlockType:
-    Image = 'image'
-    ImageBody = 'image_body'
-    ImageCaption = 'image_caption'
-    ImageFootnote = 'image_footnote'
-    Table = 'table'
-    TableBody = 'table_body'
-    TableCaption = 'table_caption'
-    TableFootnote = 'table_footnote'
-    Text = 'text'
-    Title = 'title'
-    InterlineEquation = 'interline_equation'
-    Footnote = 'footnote'
-    Discarded = 'discarded'
-    List = 'list'
-    Index = 'index'
-
-
-class CategoryId:
-    Title = 0
-    Text = 1
-    Abandon = 2
-    ImageBody = 3
-    ImageCaption = 4
-    TableBody = 5
-    TableCaption = 6
-    TableFootnote = 7
-    InterlineEquation_Layout = 8
-    InlineEquation = 13
-    InterlineEquation_YOLO = 14
-    OcrText = 15
-    ImageFootnote = 101
diff --git a/magic_pdf/data/__init__.py b/magic_pdf/data/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/data/batch_build_dataset.py b/magic_pdf/data/batch_build_dataset.py
deleted file mode 100644
index 52d33485f1c9a624b31e16029526f0ac653a165f..0000000000000000000000000000000000000000
--- a/magic_pdf/data/batch_build_dataset.py
+++ /dev/null
@@ -1,167 +0,0 @@
-import concurrent.futures
-
-import fitz
-
-from magic_pdf.data.dataset import PymuDocDataset
-from magic_pdf.data.utils import fitz_doc_to_image  # PyMuPDF
-
-
-def partition_array_greedy(arr, k):
-    """Partition an array into k parts using a simple greedy approach.
-
-    Parameters:
-    -----------
-    arr : list
-        The input array of integers
-    k : int
-        Number of partitions to create
-
-    Returns:
-    --------
-    partitions : list of lists
-        The k partitions of the array
-    """
-    # Handle edge cases
-    if k <= 0:
-        raise ValueError('k must be a positive integer')
-    if k > len(arr):
-        k = len(arr)  # Adjust k if it's too large
-    if k == 1:
-        return [list(range(len(arr)))]
-    if k == len(arr):
-        return [[i] for i in range(len(arr))]
-
-    # Sort the array in descending order
-    sorted_indices = sorted(range(len(arr)), key=lambda i: arr[i][1], reverse=True)
-
-    # Initialize k empty partitions
-    partitions = [[] for _ in range(k)]
-    partition_sums = [0] * k
-
-    # Assign each element to the partition with the smallest current sum
-    for idx in sorted_indices:
-        # Find the partition with the smallest sum
-        min_sum_idx = partition_sums.index(min(partition_sums))
-
-        # Add the element to this partition
-        partitions[min_sum_idx].append(idx)  # Store the original index
-        partition_sums[min_sum_idx] += arr[idx][1]
-
-    return partitions
-
-
-def process_pdf_batch(pdf_jobs, idx):
-    """Process a batch of PDF pages using multiple threads.
-
-    Parameters:
-    -----------
-    pdf_jobs : list of tuples
-        List of (pdf_path, page_num) tuples
-    output_dir : str or None
-        Directory to save images to
-    num_threads : int
-        Number of threads to use
-    **kwargs :
-        Additional arguments for process_pdf_page
-
-    Returns:
-    --------
-    images : list
-        List of processed images
-    """
-    images = []
-
-    for pdf_path, _ in pdf_jobs:
-        doc = fitz.open(pdf_path)
-        tmp = []
-        for page_num in range(len(doc)):
-            page = doc[page_num]
-            tmp.append(fitz_doc_to_image(page))
-        images.append(tmp)
-    return (idx, images)
-
-
-def batch_build_dataset(pdf_paths, k, lang=None):
-    """Process multiple PDFs by partitioning them into k balanced parts and
-    processing each part in parallel.
-
-    Parameters:
-    -----------
-    pdf_paths : list
-        List of paths to PDF files
-    k : int
-        Number of partitions to create
-    output_dir : str or None
-        Directory to save images to
-    threads_per_worker : int
-        Number of threads to use per worker
-    **kwargs :
-        Additional arguments for process_pdf_page
-
-    Returns:
-    --------
-    all_images : list
-        List of all processed images
-    """
-
-    results = []
-    for pdf_path in pdf_paths:
-        with open(pdf_path, 'rb') as f:
-            pdf_bytes = f.read()
-        dataset = PymuDocDataset(pdf_bytes, lang=lang)
-        results.append(dataset)
-    return results
-
-
-    #
-    # # Get page counts for each PDF
-    # pdf_info = []
-    # total_pages = 0
-    #
-    # for pdf_path in pdf_paths:
-    #     try:
-    #         doc = fitz.open(pdf_path)
-    #         num_pages = len(doc)
-    #         pdf_info.append((pdf_path, num_pages))
-    #         total_pages += num_pages
-    #         doc.close()
-    #     except Exception as e:
-    #         print(f'Error opening {pdf_path}: {e}')
-    #
-    # # Partition the jobs based on page countEach job has 1 page
-    # partitions = partition_array_greedy(pdf_info, k)
-    #
-    # # Process each partition in parallel
-    # all_images_h = {}
-    #
-    # with concurrent.futures.ProcessPoolExecutor(max_workers=k) as executor:
-    #     # Submit one task per partition
-    #     futures = []
-    #     for sn, partition in enumerate(partitions):
-    #         # Get the jobs for this partition
-    #         partition_jobs = [pdf_info[idx] for idx in partition]
-    #
-    #         # Submit the task
-    #         future = executor.submit(
-    #             process_pdf_batch,
-    #             partition_jobs,
-    #             sn
-    #         )
-    #         futures.append(future)
-    #     # Process results as they complete
-    #     for i, future in enumerate(concurrent.futures.as_completed(futures)):
-    #         try:
-    #             idx, images = future.result()
-    #             all_images_h[idx] = images
-    #         except Exception as e:
-    #             print(f'Error processing partition: {e}')
-    # results = [None] * len(pdf_paths)
-    # for i in range(len(partitions)):
-    #     partition = partitions[i]
-    #     for j in range(len(partition)):
-    #         with open(pdf_info[partition[j]][0], 'rb') as f:
-    #             pdf_bytes = f.read()
-    #         dataset = PymuDocDataset(pdf_bytes, lang=lang)
-    #         dataset.set_images(all_images_h[i][j])
-    #         results[partition[j]] = dataset
-    # return results
\ No newline at end of file
diff --git a/magic_pdf/data/data_reader_writer/__init__.py b/magic_pdf/data/data_reader_writer/__init__.py
deleted file mode 100644
index f8f8234739e4cc756b56dbd4cb502893481a7a09..0000000000000000000000000000000000000000
--- a/magic_pdf/data/data_reader_writer/__init__.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from magic_pdf.data.data_reader_writer.filebase import \
-    FileBasedDataReader  # noqa: F401
-from magic_pdf.data.data_reader_writer.filebase import \
-    FileBasedDataWriter  # noqa: F401
-from magic_pdf.data.data_reader_writer.multi_bucket_s3 import \
-    MultiBucketS3DataReader  # noqa: F401
-from magic_pdf.data.data_reader_writer.multi_bucket_s3 import \
-    MultiBucketS3DataWriter  # noqa: F401
-from magic_pdf.data.data_reader_writer.s3 import S3DataReader  # noqa: F401
-from magic_pdf.data.data_reader_writer.s3 import S3DataWriter  # noqa: F401
-from magic_pdf.data.data_reader_writer.base import DataReader  # noqa: F401
-from magic_pdf.data.data_reader_writer.base import DataWriter  # noqa: F401
\ No newline at end of file
diff --git a/magic_pdf/data/data_reader_writer/base.py b/magic_pdf/data/data_reader_writer/base.py
deleted file mode 100644
index d294b329559723303b1f42cb9f48c39f07ae3622..0000000000000000000000000000000000000000
--- a/magic_pdf/data/data_reader_writer/base.py
+++ /dev/null
@@ -1,63 +0,0 @@
-
-from abc import ABC, abstractmethod
-
-
-class DataReader(ABC):
-
-    def read(self, path: str) -> bytes:
-        """Read the file.
-
-        Args:
-            path (str): file path to read
-
-        Returns:
-            bytes: the content of the file
-        """
-        return self.read_at(path)
-
-    @abstractmethod
-    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
-        """Read the file at offset and limit.
-
-        Args:
-            path (str): the file path
-            offset (int, optional): the number of bytes skipped. Defaults to 0.
-            limit (int, optional): the length of bytes want to read. Defaults to -1.
-
-        Returns:
-            bytes: the content of the file
-        """
-        pass
-
-
-class DataWriter(ABC):
-    @abstractmethod
-    def write(self, path: str, data: bytes) -> None:
-        """Write the data to the file.
-
-        Args:
-            path (str): the target file where to write
-            data (bytes): the data want to write
-        """
-        pass
-
-    def write_string(self, path: str, data: str) -> None:
-        """Write the data to file, the data will be encoded to bytes.
-
-        Args:
-            path (str): the target file where to write
-            data (str): the data want to write
-        """
-
-        def safe_encode(data: str, method: str):
-            try:
-                bit_data = data.encode(encoding=method, errors='replace')
-                return bit_data, True
-            except:  # noqa
-                return None, False
-
-        for method in ['utf-8', 'ascii']:
-            bit_data, flag = safe_encode(data, method)
-            if flag:
-                self.write(path, bit_data)
-                break
diff --git a/magic_pdf/data/data_reader_writer/filebase.py b/magic_pdf/data/data_reader_writer/filebase.py
deleted file mode 100644
index ff098ea0826e207663a6f51a21a4214d951f91b4..0000000000000000000000000000000000000000
--- a/magic_pdf/data/data_reader_writer/filebase.py
+++ /dev/null
@@ -1,62 +0,0 @@
-import os
-
-from magic_pdf.data.data_reader_writer.base import DataReader, DataWriter
-
-
-class FileBasedDataReader(DataReader):
-    def __init__(self, parent_dir: str = ''):
-        """Initialized with parent_dir.
-
-        Args:
-            parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
-        """
-        self._parent_dir = parent_dir
-
-    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
-        """Read at offset and limit.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            offset (int, optional): the number of bytes skipped. Defaults to 0.
-            limit (int, optional): the length of bytes want to read. Defaults to -1.
-
-        Returns:
-            bytes: the content of file
-        """
-        fn_path = path
-        if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
-            fn_path = os.path.join(self._parent_dir, path)
-
-        with open(fn_path, 'rb') as f:
-            f.seek(offset)
-            if limit == -1:
-                return f.read()
-            else:
-                return f.read(limit)
-
-
-class FileBasedDataWriter(DataWriter):
-    def __init__(self, parent_dir: str = '') -> None:
-        """Initialized with parent_dir.
-
-        Args:
-            parent_dir (str, optional): the parent directory that may be used within methods. Defaults to ''.
-        """
-        self._parent_dir = parent_dir
-
-    def write(self, path: str, data: bytes) -> None:
-        """Write file with data.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            data (bytes): the data want to write
-        """
-        fn_path = path
-        if not os.path.isabs(fn_path) and len(self._parent_dir) > 0:
-            fn_path = os.path.join(self._parent_dir, path)
-
-        if not os.path.exists(os.path.dirname(fn_path)) and os.path.dirname(fn_path) != "":
-            os.makedirs(os.path.dirname(fn_path), exist_ok=True)
-
-        with open(fn_path, 'wb') as f:
-            f.write(data)
diff --git a/magic_pdf/data/data_reader_writer/multi_bucket_s3.py b/magic_pdf/data/data_reader_writer/multi_bucket_s3.py
deleted file mode 100644
index 525209f07db93a4dd0b8b4e17bd2a5ba2453c605..0000000000000000000000000000000000000000
--- a/magic_pdf/data/data_reader_writer/multi_bucket_s3.py
+++ /dev/null
@@ -1,145 +0,0 @@
-
-from magic_pdf.config.exceptions import InvalidConfig, InvalidParams
-from magic_pdf.data.data_reader_writer.base import DataReader, DataWriter
-from magic_pdf.data.io.s3 import S3Reader, S3Writer
-from magic_pdf.data.schemas import S3Config
-from magic_pdf.libs.path_utils import (parse_s3_range_params, parse_s3path,
-                                       remove_non_official_s3_args)
-
-
-class MultiS3Mixin:
-    def __init__(self, default_prefix: str, s3_configs: list[S3Config]):
-        """Initialized with multiple s3 configs.
-
-        Args:
-            default_prefix (str): the default prefix of the relative path. for example, {some_bucket}/{some_prefix} or {some_bucket}
-            s3_configs (list[S3Config]): list of s3 configs, the bucket_name must be unique in the list.
-
-        Raises:
-            InvalidConfig: default bucket config not in s3_configs.
-            InvalidConfig: bucket name not unique in s3_configs.
-            InvalidConfig: default bucket must be provided.
-        """
-        if len(default_prefix) == 0:
-            raise InvalidConfig('default_prefix must be provided')
-
-        arr = default_prefix.strip('/').split('/')
-        self.default_bucket = arr[0]
-        self.default_prefix = '/'.join(arr[1:])
-
-        found_default_bucket_config = False
-        for conf in s3_configs:
-            if conf.bucket_name == self.default_bucket:
-                found_default_bucket_config = True
-                break
-
-        if not found_default_bucket_config:
-            raise InvalidConfig(
-                f'default_bucket: {self.default_bucket} config must be provided in s3_configs: {s3_configs}'
-            )
-
-        uniq_bucket = set([conf.bucket_name for conf in s3_configs])
-        if len(uniq_bucket) != len(s3_configs):
-            raise InvalidConfig(
-                f'the bucket_name in s3_configs: {s3_configs} must be unique'
-            )
-
-        self.s3_configs = s3_configs
-        self._s3_clients_h: dict = {}
-
-
-class MultiBucketS3DataReader(DataReader, MultiS3Mixin):
-    def read(self, path: str) -> bytes:
-        """Read the path from s3, select diffect bucket client for each request
-        based on the bucket, also support range read.
-
-        Args:
-            path (str): the s3 path of file, the path must be in the format of s3://bucket_name/path?offset,limit.
-            for example: s3://bucket_name/path?0,100.
-
-        Returns:
-            bytes: the content of s3 file.
-        """
-        may_range_params = parse_s3_range_params(path)
-        if may_range_params is None or 2 != len(may_range_params):
-            byte_start, byte_len = 0, -1
-        else:
-            byte_start, byte_len = int(may_range_params[0]), int(may_range_params[1])
-        path = remove_non_official_s3_args(path)
-        return self.read_at(path, byte_start, byte_len)
-
-    def __get_s3_client(self, bucket_name: str):
-        if bucket_name not in set([conf.bucket_name for conf in self.s3_configs]):
-            raise InvalidParams(
-                f'bucket name: {bucket_name} not found in s3_configs: {self.s3_configs}'
-            )
-        if bucket_name not in self._s3_clients_h:
-            conf = next(
-                filter(lambda conf: conf.bucket_name == bucket_name, self.s3_configs)
-            )
-            self._s3_clients_h[bucket_name] = S3Reader(
-                bucket_name,
-                conf.access_key,
-                conf.secret_key,
-                conf.endpoint_url,
-                conf.addressing_style,
-            )
-        return self._s3_clients_h[bucket_name]
-
-    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
-        """Read the file with offset and limit, select diffect bucket client
-        for each request based on the bucket.
-
-        Args:
-            path (str): the file path.
-            offset (int, optional): the number of bytes skipped. Defaults to 0.
-            limit (int, optional): the number of bytes want to read. Defaults to -1 which means infinite.
-
-        Returns:
-            bytes: the file content.
-        """
-        if path.startswith('s3://'):
-            bucket_name, path = parse_s3path(path)
-            s3_reader = self.__get_s3_client(bucket_name)
-        else:
-            s3_reader = self.__get_s3_client(self.default_bucket)
-            if self.default_prefix:
-                path = self.default_prefix + '/' + path
-        return s3_reader.read_at(path, offset, limit)
-
-
-class MultiBucketS3DataWriter(DataWriter, MultiS3Mixin):
-    def __get_s3_client(self, bucket_name: str):
-        if bucket_name not in set([conf.bucket_name for conf in self.s3_configs]):
-            raise InvalidParams(
-                f'bucket name: {bucket_name} not found in s3_configs: {self.s3_configs}'
-            )
-        if bucket_name not in self._s3_clients_h:
-            conf = next(
-                filter(lambda conf: conf.bucket_name == bucket_name, self.s3_configs)
-            )
-            self._s3_clients_h[bucket_name] = S3Writer(
-                bucket_name,
-                conf.access_key,
-                conf.secret_key,
-                conf.endpoint_url,
-                conf.addressing_style,
-            )
-        return self._s3_clients_h[bucket_name]
-
-    def write(self, path: str, data: bytes) -> None:
-        """Write file with data, also select diffect bucket client for each
-        request based on the bucket.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            data (bytes): the data want to write.
-        """
-        if path.startswith('s3://'):
-            bucket_name, path = parse_s3path(path)
-            s3_writer = self.__get_s3_client(bucket_name)
-        else:
-            s3_writer = self.__get_s3_client(self.default_bucket)
-            if self.default_prefix:
-                path = self.default_prefix + '/' + path
-        return s3_writer.write(path, data)
diff --git a/magic_pdf/data/data_reader_writer/s3.py b/magic_pdf/data/data_reader_writer/s3.py
deleted file mode 100644
index 34ec43b7c1b52ef931a8b06febba12c11ac7ab82..0000000000000000000000000000000000000000
--- a/magic_pdf/data/data_reader_writer/s3.py
+++ /dev/null
@@ -1,73 +0,0 @@
-from magic_pdf.data.data_reader_writer.multi_bucket_s3 import (
-    MultiBucketS3DataReader, MultiBucketS3DataWriter)
-from magic_pdf.data.schemas import S3Config
-
-
-class S3DataReader(MultiBucketS3DataReader):
-    def __init__(
-        self,
-        default_prefix_without_bucket: str,
-        bucket: str,
-        ak: str,
-        sk: str,
-        endpoint_url: str,
-        addressing_style: str = 'auto',
-    ):
-        """s3 reader client.
-
-        Args:
-            default_prefix_without_bucket: prefix that not contains bucket
-            bucket (str): bucket name
-            ak (str): access key
-            sk (str): secret key
-            endpoint_url (str): endpoint url of s3
-            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
-            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
-        """
-        super().__init__(
-            f'{bucket}/{default_prefix_without_bucket}',
-            [
-                S3Config(
-                    bucket_name=bucket,
-                    access_key=ak,
-                    secret_key=sk,
-                    endpoint_url=endpoint_url,
-                    addressing_style=addressing_style,
-                )
-            ],
-        )
-
-
-class S3DataWriter(MultiBucketS3DataWriter):
-    def __init__(
-        self,
-        default_prefix_without_bucket: str,
-        bucket: str,
-        ak: str,
-        sk: str,
-        endpoint_url: str,
-        addressing_style: str = 'auto',
-    ):
-        """s3 writer client.
-
-        Args:
-            default_prefix_without_bucket: prefix that not contains bucket
-            bucket (str): bucket name
-            ak (str): access key
-            sk (str): secret key
-            endpoint_url (str): endpoint url of s3
-            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
-            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
-        """
-        super().__init__(
-            f'{bucket}/{default_prefix_without_bucket}',
-            [
-                S3Config(
-                    bucket_name=bucket,
-                    access_key=ak,
-                    secret_key=sk,
-                    endpoint_url=endpoint_url,
-                    addressing_style=addressing_style,
-                )
-            ],
-        )
diff --git a/magic_pdf/data/dataset.py b/magic_pdf/data/dataset.py
deleted file mode 100644
index fb626e12cbfb7845fff1fed30dbbfdf650d507ea..0000000000000000000000000000000000000000
--- a/magic_pdf/data/dataset.py
+++ /dev/null
@@ -1,408 +0,0 @@
-import os
-from abc import ABC, abstractmethod
-from typing import Callable, Iterator
-
-import fitz
-from loguru import logger
-
-from magic_pdf.config.enums import SupportedPdfParseMethod
-from magic_pdf.data.schemas import PageInfo
-from magic_pdf.data.utils import fitz_doc_to_image
-from magic_pdf.filter import classify
-
-
-class PageableData(ABC):
-    @abstractmethod
-    def get_image(self) -> dict:
-        """Transform data to image."""
-        pass
-
-    @abstractmethod
-    def get_doc(self) -> fitz.Page:
-        """Get the pymudoc page."""
-        pass
-
-    @abstractmethod
-    def get_page_info(self) -> PageInfo:
-        """Get the page info of the page.
-
-        Returns:
-            PageInfo: the page info of this page
-        """
-        pass
-
-    @abstractmethod
-    def draw_rect(self, rect_coords, color, fill, fill_opacity, width, overlay):
-        """draw rectangle.
-
-        Args:
-            rect_coords (list[float]): four elements array contain the top-left and bottom-right coordinates, [x0, y0, x1, y1]
-            color (list[float] | None): three element tuple which describe the RGB of the board line, None means no board line
-            fill (list[float] | None): fill the board with RGB, None means will not fill with color
-            fill_opacity (float): opacity of the fill, range from [0, 1]
-            width (float): the width of board
-            overlay (bool): fill the color in foreground or background. True means fill in background.
-        """
-        pass
-
-    @abstractmethod
-    def insert_text(self, coord, content, fontsize, color):
-        """insert text.
-
-        Args:
-            coord (list[float]): four elements array contain the top-left and bottom-right coordinates, [x0, y0, x1, y1]
-            content (str): the text content
-            fontsize (int): font size of the text
-            color (list[float] | None):  three element tuple which describe the RGB of the board line, None will use the default font color!
-        """
-        pass
-
-
-class Dataset(ABC):
-    @abstractmethod
-    def __len__(self) -> int:
-        """The length of the dataset."""
-        pass
-
-    @abstractmethod
-    def __iter__(self) -> Iterator[PageableData]:
-        """Yield the page data."""
-        pass
-
-    @abstractmethod
-    def supported_methods(self) -> list[SupportedPdfParseMethod]:
-        """The methods that this dataset support.
-
-        Returns:
-            list[SupportedPdfParseMethod]: The supported methods, Valid methods are: OCR, TXT
-        """
-        pass
-
-    @abstractmethod
-    def data_bits(self) -> bytes:
-        """The bits used to create this dataset."""
-        pass
-
-    @abstractmethod
-    def get_page(self, page_id: int) -> PageableData:
-        """Get the page indexed by page_id.
-
-        Args:
-            page_id (int): the index of the page
-
-        Returns:
-            PageableData: the page doc object
-        """
-        pass
-
-    @abstractmethod
-    def dump_to_file(self, file_path: str):
-        """Dump the file.
-
-        Args:
-            file_path (str): the file path
-        """
-        pass
-
-    @abstractmethod
-    def apply(self, proc: Callable, *args, **kwargs):
-        """Apply callable method which.
-
-        Args:
-            proc (Callable): invoke proc as follows:
-                proc(self, *args, **kwargs)
-
-        Returns:
-            Any: return the result generated by proc
-        """
-        pass
-
-    @abstractmethod
-    def classify(self) -> SupportedPdfParseMethod:
-        """classify the dataset.
-
-        Returns:
-            SupportedPdfParseMethod: _description_
-        """
-        pass
-
-    @abstractmethod
-    def clone(self):
-        """clone this dataset."""
-        pass
-
-
-class PymuDocDataset(Dataset):
-    def __init__(self, bits: bytes, lang=None):
-        """Initialize the dataset, which wraps the pymudoc documents.
-
-        Args:
-            bits (bytes): the bytes of the pdf
-        """
-        self._raw_fitz = fitz.open('pdf', bits)
-        self._records = [Doc(v) for v in self._raw_fitz]
-        self._data_bits = bits
-        self._raw_data = bits
-        self._classify_result = None
-
-        if lang == '':
-            self._lang = None
-        elif lang == 'auto':
-            from magic_pdf.model.sub_modules.language_detection.utils import \
-                auto_detect_lang
-            self._lang = auto_detect_lang(self._data_bits)
-            logger.info(f'lang: {lang}, detect_lang: {self._lang}')
-        else:
-            self._lang = lang
-            logger.info(f'lang: {lang}')
-
-    def __len__(self) -> int:
-        """The page number of the pdf."""
-        return len(self._records)
-
-    def __iter__(self) -> Iterator[PageableData]:
-        """Yield the page doc object."""
-        return iter(self._records)
-
-    def supported_methods(self) -> list[SupportedPdfParseMethod]:
-        """The method supported by this dataset.
-
-        Returns:
-            list[SupportedPdfParseMethod]: the supported methods
-        """
-        return [SupportedPdfParseMethod.OCR, SupportedPdfParseMethod.TXT]
-
-    def data_bits(self) -> bytes:
-        """The pdf bits used to create this dataset."""
-        return self._data_bits
-
-    def get_page(self, page_id: int) -> PageableData:
-        """The page doc object.
-
-        Args:
-            page_id (int): the page doc index
-
-        Returns:
-            PageableData: the page doc object
-        """
-        return self._records[page_id]
-
-    def dump_to_file(self, file_path: str):
-        """Dump the file.
-
-        Args:
-            file_path (str): the file path
-        """
-
-        dir_name = os.path.dirname(file_path)
-        if dir_name not in ('', '.', '..'):
-            os.makedirs(dir_name, exist_ok=True)
-        self._raw_fitz.save(file_path)
-
-    def apply(self, proc: Callable, *args, **kwargs):
-        """Apply callable method which.
-
-        Args:
-            proc (Callable): invoke proc as follows:
-                proc(dataset, *args, **kwargs)
-
-        Returns:
-            Any: return the result generated by proc
-        """
-        if 'lang' in kwargs and self._lang is not None:
-            kwargs['lang'] = self._lang
-        return proc(self, *args, **kwargs)
-
-    def classify(self) -> SupportedPdfParseMethod:
-        """classify the dataset.
-
-        Returns:
-            SupportedPdfParseMethod: _description_
-        """
-        if self._classify_result is None:
-            self._classify_result = classify(self._data_bits)
-        return self._classify_result
-
-    def clone(self):
-        """clone this dataset."""
-        return PymuDocDataset(self._raw_data)
-
-    def set_images(self, images):
-        for i in range(len(self._records)):
-            self._records[i].set_image(images[i])
-
-class ImageDataset(Dataset):
-    def __init__(self, bits: bytes, lang=None):
-        """Initialize the dataset, which wraps the pymudoc documents.
-
-        Args:
-            bits (bytes): the bytes of the photo which will be converted to pdf first. then converted to pymudoc.
-        """
-        pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
-        self._raw_fitz = fitz.open('pdf', pdf_bytes)
-        self._records = [Doc(v) for v in self._raw_fitz]
-        self._raw_data = bits
-        self._data_bits = pdf_bytes
-
-        if lang == '':
-            self._lang = None
-        elif lang == 'auto':
-            from magic_pdf.model.sub_modules.language_detection.utils import \
-                auto_detect_lang
-            self._lang = auto_detect_lang(self._data_bits)
-            logger.info(f'lang: {lang}, detect_lang: {self._lang}')
-        else:
-            self._lang = lang
-            logger.info(f'lang: {lang}')
-
-    def __len__(self) -> int:
-        """The length of the dataset."""
-        return len(self._records)
-
-    def __iter__(self) -> Iterator[PageableData]:
-        """Yield the page object."""
-        return iter(self._records)
-
-    def supported_methods(self):
-        """The method supported by this dataset.
-
-        Returns:
-            list[SupportedPdfParseMethod]: the supported methods
-        """
-        return [SupportedPdfParseMethod.OCR]
-
-    def data_bits(self) -> bytes:
-        """The pdf bits used to create this dataset."""
-        return self._data_bits
-
-    def get_page(self, page_id: int) -> PageableData:
-        """The page doc object.
-
-        Args:
-            page_id (int): the page doc index
-
-        Returns:
-            PageableData: the page doc object
-        """
-        return self._records[page_id]
-
-    def dump_to_file(self, file_path: str):
-        """Dump the file.
-
-        Args:
-            file_path (str): the file path
-        """
-        dir_name = os.path.dirname(file_path)
-        if dir_name not in ('', '.', '..'):
-            os.makedirs(dir_name, exist_ok=True)
-        self._raw_fitz.save(file_path)
-
-    def apply(self, proc: Callable, *args, **kwargs):
-        """Apply callable method which.
-
-        Args:
-            proc (Callable): invoke proc as follows:
-                proc(dataset, *args, **kwargs)
-
-        Returns:
-            Any: return the result generated by proc
-        """
-        return proc(self, *args, **kwargs)
-
-    def classify(self) -> SupportedPdfParseMethod:
-        """classify the dataset.
-
-        Returns:
-            SupportedPdfParseMethod: _description_
-        """
-        return SupportedPdfParseMethod.OCR
-
-    def clone(self):
-        """clone this dataset."""
-        return ImageDataset(self._raw_data)
-
-    def set_images(self, images):
-        for i in range(len(self._records)):
-            self._records[i].set_image(images[i])
-
-class Doc(PageableData):
-    """Initialized with pymudoc object."""
-
-    def __init__(self, doc: fitz.Page):
-        self._doc = doc
-        self._img = None
-
-    def get_image(self):
-        """Return the image info.
-
-        Returns:
-            dict: {
-                img: np.ndarray,
-                width: int,
-                height: int
-            }
-        """
-        if self._img is None:
-            self._img = fitz_doc_to_image(self._doc)
-        return self._img
-
-    def set_image(self, img):
-        """
-        Args:
-            img (np.ndarray): the image
-        """
-        if self._img is None:
-            self._img = img
-
-    def get_doc(self) -> fitz.Page:
-        """Get the pymudoc object.
-
-        Returns:
-            fitz.Page: the pymudoc object
-        """
-        return self._doc
-
-    def get_page_info(self) -> PageInfo:
-        """Get the page info of the page.
-
-        Returns:
-            PageInfo: the page info of this page
-        """
-        page_w = self._doc.rect.width
-        page_h = self._doc.rect.height
-        return PageInfo(w=page_w, h=page_h)
-
-    def __getattr__(self, name):
-        if hasattr(self._doc, name):
-            return getattr(self._doc, name)
-
-    def draw_rect(self, rect_coords, color, fill, fill_opacity, width, overlay):
-        """draw rectangle.
-
-        Args:
-            rect_coords (list[float]): four elements array contain the top-left and bottom-right coordinates, [x0, y0, x1, y1]
-            color (list[float] | None): three element tuple which describe the RGB of the board line, None means no board line
-            fill (list[float] | None): fill the board with RGB, None means will not fill with color
-            fill_opacity (float): opacity of the fill, range from [0, 1]
-            width (float): the width of board
-            overlay (bool): fill the color in foreground or background. True means fill in background.
-        """
-        self._doc.draw_rect(
-            rect_coords,
-            color=color,
-            fill=fill,
-            fill_opacity=fill_opacity,
-            width=width,
-            overlay=overlay,
-        )
-
-    def insert_text(self, coord, content, fontsize, color):
-        """insert text.
-
-        Args:
-            coord (list[float]): four elements array contain the top-left and bottom-right coordinates, [x0, y0, x1, y1]
-            content (str): the text content
-            fontsize (int): font size of the text
-            color (list[float] | None):  three element tuple which describe the RGB of the board line, None will use the default font color!
-        """
-        self._doc.insert_text(coord, content, fontsize=fontsize, color=color)
\ No newline at end of file
diff --git a/magic_pdf/data/io/__init__.py b/magic_pdf/data/io/__init__.py
deleted file mode 100644
index badf1df07551df611dc955710743f26bf5f60595..0000000000000000000000000000000000000000
--- a/magic_pdf/data/io/__init__.py
+++ /dev/null
@@ -1,6 +0,0 @@
-
-from magic_pdf.data.io.base import IOReader, IOWriter  # noqa: F401
-from magic_pdf.data.io.http import HttpReader, HttpWriter  # noqa: F401
-from magic_pdf.data.io.s3 import S3Reader, S3Writer  # noqa: F401
-
-__all__ = ['IOReader', 'IOWriter', 'HttpReader', 'HttpWriter', 'S3Reader', 'S3Writer']
\ No newline at end of file
diff --git a/magic_pdf/data/io/base.py b/magic_pdf/data/io/base.py
deleted file mode 100644
index 3c163d1fe97f9f40820fbd710f85a67bcccd4b34..0000000000000000000000000000000000000000
--- a/magic_pdf/data/io/base.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from abc import ABC, abstractmethod
-
-
-class IOReader(ABC):
-    @abstractmethod
-    def read(self, path: str) -> bytes:
-        """Read the file.
-
-        Args:
-            path (str): file path to read
-
-        Returns:
-            bytes: the content of the file
-        """
-        pass
-
-    @abstractmethod
-    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
-        """Read at offset and limit.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            offset (int, optional): the number of bytes skipped. Defaults to 0.
-            limit (int, optional): the length of bytes want to read. Defaults to -1.
-
-        Returns:
-            bytes: the content of file
-        """
-        pass
-
-
-class IOWriter(ABC):
-
-    @abstractmethod
-    def write(self, path: str, data: bytes) -> None:
-        """Write file with data.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            data (bytes): the data want to write
-        """
-        pass
diff --git a/magic_pdf/data/io/http.py b/magic_pdf/data/io/http.py
deleted file mode 100644
index 3b08271f05a8ad6e2163f1e357fe66ab4a713b48..0000000000000000000000000000000000000000
--- a/magic_pdf/data/io/http.py
+++ /dev/null
@@ -1,37 +0,0 @@
-
-import io
-
-import requests
-
-from magic_pdf.data.io.base import IOReader, IOWriter
-
-
-class HttpReader(IOReader):
-
-    def read(self, url: str) -> bytes:
-        """Read the file.
-
-        Args:
-            path (str): file path to read
-
-        Returns:
-            bytes: the content of the file
-        """
-        return requests.get(url).content
-
-    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
-        """Not Implemented."""
-        raise NotImplementedError
-
-
-class HttpWriter(IOWriter):
-    def write(self, url: str, data: bytes) -> None:
-        """Write file with data.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            data (bytes): the data want to write
-        """
-        files = {'file': io.BytesIO(data)}
-        response = requests.post(url, files=files)
-        assert 300 > response.status_code and response.status_code > 199
diff --git a/magic_pdf/data/io/s3.py b/magic_pdf/data/io/s3.py
deleted file mode 100644
index 4222c73fecdeb99283fa2d0ef419d2f3cde06cb5..0000000000000000000000000000000000000000
--- a/magic_pdf/data/io/s3.py
+++ /dev/null
@@ -1,114 +0,0 @@
-import boto3
-from botocore.config import Config
-
-from magic_pdf.data.io.base import IOReader, IOWriter
-
-
-class S3Reader(IOReader):
-    def __init__(
-        self,
-        bucket: str,
-        ak: str,
-        sk: str,
-        endpoint_url: str,
-        addressing_style: str = 'auto',
-    ):
-        """s3 reader client.
-
-        Args:
-            bucket (str): bucket name
-            ak (str): access key
-            sk (str): secret key
-            endpoint_url (str): endpoint url of s3
-            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
-            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
-        """
-        self._bucket = bucket
-        self._ak = ak
-        self._sk = sk
-        self._s3_client = boto3.client(
-            service_name='s3',
-            aws_access_key_id=ak,
-            aws_secret_access_key=sk,
-            endpoint_url=endpoint_url,
-            config=Config(
-                s3={'addressing_style': addressing_style},
-                retries={'max_attempts': 5, 'mode': 'standard'},
-            ),
-        )
-
-    def read(self, key: str) -> bytes:
-        """Read the file.
-
-        Args:
-            path (str): file path to read
-
-        Returns:
-            bytes: the content of the file
-        """
-        return self.read_at(key)
-
-    def read_at(self, key: str, offset: int = 0, limit: int = -1) -> bytes:
-        """Read at offset and limit.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            offset (int, optional): the number of bytes skipped. Defaults to 0.
-            limit (int, optional): the length of bytes want to read. Defaults to -1.
-
-        Returns:
-            bytes: the content of file
-        """
-        if limit > -1:
-            range_header = f'bytes={offset}-{offset+limit-1}'
-            res = self._s3_client.get_object(
-                Bucket=self._bucket, Key=key, Range=range_header
-            )
-        else:
-            res = self._s3_client.get_object(
-                Bucket=self._bucket, Key=key, Range=f'bytes={offset}-'
-            )
-        return res['Body'].read()
-
-
-class S3Writer(IOWriter):
-    def __init__(
-        self,
-        bucket: str,
-        ak: str,
-        sk: str,
-        endpoint_url: str,
-        addressing_style: str = 'auto',
-    ):
-        """s3 reader client.
-
-        Args:
-            bucket (str): bucket name
-            ak (str): access key
-            sk (str): secret key
-            endpoint_url (str): endpoint url of s3
-            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
-            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
-        """
-        self._bucket = bucket
-        self._ak = ak
-        self._sk = sk
-        self._s3_client = boto3.client(
-            service_name='s3',
-            aws_access_key_id=ak,
-            aws_secret_access_key=sk,
-            endpoint_url=endpoint_url,
-            config=Config(
-                s3={'addressing_style': addressing_style},
-                retries={'max_attempts': 5, 'mode': 'standard'},
-            ),
-        )
-
-    def write(self, key: str, data: bytes):
-        """Write file with data.
-
-        Args:
-            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
-            data (bytes): the data want to write
-        """
-        self._s3_client.put_object(Bucket=self._bucket, Key=key, Body=data)
diff --git a/magic_pdf/data/read_api.py b/magic_pdf/data/read_api.py
deleted file mode 100644
index 9e52af6d8976910975b987529871b0acbae239bb..0000000000000000000000000000000000000000
--- a/magic_pdf/data/read_api.py
+++ /dev/null
@@ -1,142 +0,0 @@
-import json
-import os
-import tempfile
-import shutil
-from pathlib import Path
-
-from magic_pdf.config.exceptions import EmptyData, InvalidParams
-from magic_pdf.data.data_reader_writer import (FileBasedDataReader,
-                                               MultiBucketS3DataReader)
-from magic_pdf.data.dataset import ImageDataset, PymuDocDataset
-from magic_pdf.utils.office_to_pdf import convert_file_to_pdf, ConvertToPdfError
-
-def read_jsonl(
-    s3_path_or_local: str, s3_client: MultiBucketS3DataReader | None = None
-) -> list[PymuDocDataset]:
-    """Read the jsonl file and return the list of PymuDocDataset.
-
-    Args:
-        s3_path_or_local (str): local file or s3 path
-        s3_client (MultiBucketS3DataReader | None, optional): s3 client that support multiple bucket. Defaults to None.
-
-    Raises:
-        InvalidParams: if s3_path_or_local is s3 path but s3_client is not provided.
-        EmptyData: if no pdf file location is provided in some line of jsonl file.
-        InvalidParams: if the file location is s3 path but s3_client is not provided
-
-    Returns:
-        list[PymuDocDataset]: each line in the jsonl file will be converted to a PymuDocDataset
-    """
-    bits_arr = []
-    if s3_path_or_local.startswith('s3://'):
-        if s3_client is None:
-            raise InvalidParams('s3_client is required when s3_path is provided')
-        jsonl_bits = s3_client.read(s3_path_or_local)
-    else:
-        jsonl_bits = FileBasedDataReader('').read(s3_path_or_local)
-    jsonl_d = [
-        json.loads(line) for line in jsonl_bits.decode().split('\n') if line.strip()
-    ]
-    for d in jsonl_d:
-        pdf_path = d.get('file_location', '') or d.get('path', '')
-        if len(pdf_path) == 0:
-            raise EmptyData('pdf file location is empty')
-        if pdf_path.startswith('s3://'):
-            if s3_client is None:
-                raise InvalidParams('s3_client is required when s3_path is provided')
-            bits_arr.append(s3_client.read(pdf_path))
-        else:
-            bits_arr.append(FileBasedDataReader('').read(pdf_path))
-    return [PymuDocDataset(bits) for bits in bits_arr]
-
-
-def read_local_pdfs(path: str) -> list[PymuDocDataset]:
-    """Read pdf from path or directory.
-
-    Args:
-        path (str): pdf file path or directory that contains pdf files
-
-    Returns:
-        list[PymuDocDataset]: each pdf file will converted to a PymuDocDataset
-    """
-    if os.path.isdir(path):
-        reader = FileBasedDataReader()
-        ret = []
-        for root, _, files in os.walk(path):
-            for file in files:
-                suffix = file.split('.')
-                if suffix[-1] == 'pdf':
-                    ret.append( PymuDocDataset(reader.read(os.path.join(root, file))))
-        return ret
-    else:
-        reader = FileBasedDataReader()
-        bits = reader.read(path)
-        return [PymuDocDataset(bits)]
-
-def read_local_office(path: str) -> list[PymuDocDataset]:
-    """Read ms-office file (ppt, pptx, doc, docx) from path or directory.
-
-    Args:
-        path (str): ms-office file or directory that contains ms-office files
-
-    Returns:
-        list[PymuDocDataset]: each ms-office file will converted to a PymuDocDataset
-        
-    Raises:
-        ConvertToPdfError: Failed to convert ms-office file to pdf via libreoffice
-        FileNotFoundError: File not Found
-        Exception: Unknown Exception raised
-    """
-    suffixes = ['.ppt', '.pptx', '.doc', '.docx']
-    fns = []
-    ret = []
-    if os.path.isdir(path):
-        for root, _, files in os.walk(path):
-            for file in files:
-                suffix = Path(file).suffix
-                if suffix in suffixes:
-                    fns.append((os.path.join(root, file)))
-    else:
-        fns.append(path)
-        
-    reader = FileBasedDataReader()
-    temp_dir = tempfile.mkdtemp()
-    for fn in fns:
-        try:
-            convert_file_to_pdf(fn, temp_dir)
-        except ConvertToPdfError as e:
-            raise e
-        except FileNotFoundError as e:
-            raise e
-        except Exception as e:
-            raise e
-        fn_path = Path(fn)
-        pdf_fn = f"{temp_dir}/{fn_path.stem}.pdf"
-        ret.append(PymuDocDataset(reader.read(pdf_fn)))
-    shutil.rmtree(temp_dir)
-    return ret
-
-def read_local_images(path: str, suffixes: list[str]=['.png', '.jpg', '.jpeg']) -> list[ImageDataset]:
-    """Read images from path or directory.
-
-    Args:
-        path (str): image file path or directory that contains image files
-        suffixes (list[str]): the suffixes of the image files used to filter the files. Example: ['.jpg', '.png']
-
-    Returns:
-        list[ImageDataset]: each image file will converted to a ImageDataset
-    """
-    if os.path.isdir(path):
-        imgs_bits = []
-        s_suffixes = set(suffixes)
-        reader = FileBasedDataReader()
-        for root, _, files in os.walk(path):
-            for file in files:
-                suffix = Path(file).suffix
-                if suffix in s_suffixes:
-                    imgs_bits.append(reader.read(os.path.join(root, file)))
-        return [ImageDataset(bits) for bits in imgs_bits]
-    else:
-        reader = FileBasedDataReader()
-        bits = reader.read(path)
-        return [ImageDataset(bits)]
diff --git a/magic_pdf/data/schemas.py b/magic_pdf/data/schemas.py
deleted file mode 100644
index c2efb46aac565a434bbbd50568c295ca2776db2e..0000000000000000000000000000000000000000
--- a/magic_pdf/data/schemas.py
+++ /dev/null
@@ -1,19 +0,0 @@
-
-from pydantic import BaseModel, Field
-
-
-class S3Config(BaseModel):
-    """S3 config
-    """
-    bucket_name: str = Field(description='s3 bucket name', min_length=1)
-    access_key: str = Field(description='s3 access key', min_length=1)
-    secret_key: str = Field(description='s3 secret key', min_length=1)
-    endpoint_url: str = Field(description='s3 endpoint url', min_length=1)
-    addressing_style: str = Field(description='s3 addressing style', default='auto', min_length=1)
-
-
-class PageInfo(BaseModel):
-    """The width and height of page
-    """
-    w: float = Field(description='the width of page')
-    h: float = Field(description='the height of page')
diff --git a/magic_pdf/data/utils.py b/magic_pdf/data/utils.py
deleted file mode 100644
index 849fa780939ddba531029500b158280658af8ea3..0000000000000000000000000000000000000000
--- a/magic_pdf/data/utils.py
+++ /dev/null
@@ -1,166 +0,0 @@
-
-import multiprocessing as mp
-import threading
-from concurrent.futures import (ProcessPoolExecutor, ThreadPoolExecutor,
-                                as_completed)
-
-import fitz
-import numpy as np
-from loguru import logger
-
-
-
-def fitz_doc_to_image(page, dpi=200) -> dict:
-    """Convert fitz.Document to image, Then convert the image to numpy array.
-
-    Args:
-        page (_type_): pymudoc page
-        dpi (int, optional): reset the dpi of dpi. Defaults to 200.
-
-    Returns:
-        dict:  {'img': numpy array, 'width': width, 'height': height }
-    """
-    mat = fitz.Matrix(dpi / 72, dpi / 72)
-    pm = page.get_pixmap(matrix=mat, alpha=False)
-
-    # If the width or height exceeds 4500 after scaling, do not scale further.
-    if pm.width > 4500 or pm.height > 4500:
-        pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
-
-    # Convert pixmap samples directly to numpy array
-    img = np.frombuffer(pm.samples, dtype=np.uint8).reshape(pm.height, pm.width, 3)
-
-    img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
-
-    return img_dict
-
-def load_images_from_pdf(pdf_bytes: bytes, dpi=200, start_page_id=0, end_page_id=None) -> list:
-    images = []
-    with fitz.open('pdf', pdf_bytes) as doc:
-        pdf_page_num = doc.page_count
-        end_page_id = (
-            end_page_id
-            if end_page_id is not None and end_page_id >= 0
-            else pdf_page_num - 1
-        )
-        if end_page_id > pdf_page_num - 1:
-            logger.warning('end_page_id is out of range, use images length')
-            end_page_id = pdf_page_num - 1
-
-        for index in range(0, doc.page_count):
-            if start_page_id <= index <= end_page_id:
-                page = doc[index]
-                mat = fitz.Matrix(dpi / 72, dpi / 72)
-                pm = page.get_pixmap(matrix=mat, alpha=False)
-
-                # If the width or height exceeds 4500 after scaling, do not scale further.
-                if pm.width > 4500 or pm.height > 4500:
-                    pm = page.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
-
-                # Convert pixmap samples directly to numpy array
-                img = np.frombuffer(pm.samples, dtype=np.uint8).reshape(pm.height, pm.width, 3)
-
-                img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
-            else:
-                img_dict = {'img': [], 'width': 0, 'height': 0}
-
-            images.append(img_dict)
-    return images
-
-
-def convert_page(bytes_page):
-    pdfs = fitz.open('pdf', bytes_page)
-    page = pdfs[0]
-    return fitz_doc_to_image(page)
-
-def parallel_process_pdf_safe(pages, num_workers=None, **kwargs):
-    """Process PDF pages in parallel with serialization-safe approach."""
-    if num_workers is None:
-        num_workers = mp.cpu_count()
-
-
-    # Process the extracted page data in parallel
-    with ProcessPoolExecutor(max_workers=num_workers) as executor:
-        # Process the page data
-        results = list(
-            executor.map(convert_page, pages)
-        )
-
-    return results
-
-
-def threaded_process_pdf(pdf_path, num_threads=4, **kwargs):
-    """Process all pages of a PDF using multiple threads.
-
-    Parameters:
-    -----------
-    pdf_path : str
-        Path to the PDF file
-    num_threads : int
-        Number of threads to use
-    **kwargs :
-        Additional arguments for fitz_doc_to_image
-
-    Returns:
-    --------
-    images : list
-        List of processed images, in page order
-    """
-    # Open the PDF
-    doc = fitz.open(pdf_path)
-    num_pages = len(doc)
-
-    # Create a list to store results in the correct order
-    results = [None] * num_pages
-
-    # Create a thread pool
-    with ThreadPoolExecutor(max_workers=num_threads) as executor:
-        # Submit all tasks
-        futures = {}
-        for page_num in range(num_pages):
-            page = doc[page_num]
-            future = executor.submit(fitz_doc_to_image, page, **kwargs)
-            futures[future] = page_num
-        # Process results as they complete with progress bar
-        for future in as_completed(futures):
-            page_num = futures[future]
-            try:
-                results[page_num] = future.result()
-            except Exception as e:
-                print(f'Error processing page {page_num}: {e}')
-                results[page_num] = None
-
-    # Close the document
-    doc.close()
-
-if __name__ == '__main__':
-    pdf = fitz.open('/tmp/[MS-DOC].pdf')
-
-
-    pdf_page = [fitz.open() for i in range(pdf.page_count)]
-    [pdf_page[i].insert_pdf(pdf, from_page=i, to_page=i) for i in range(pdf.page_count)]
-
-    pdf_page = [v.tobytes() for v in pdf_page]
-    results = parallel_process_pdf_safe(pdf_page, num_workers=16)
-
-    # threaded_process_pdf('/tmp/[MS-DOC].pdf', num_threads=16)
-
-    """ benchmark results of multi-threaded processing (fitz page to image)
-    total page nums: 578
-    thread nums,    time cost
-    1               7.351 sec
-    2               6.334 sec
-    4               5.968 sec
-    8               6.728 sec
-    16              8.085 sec
-    """
-
-    """ benchmark results of multi-processor processing (fitz page to image)
-    total page nums: 578
-    processor nums,    time cost
-    1                  17.170 sec
-    2                  10.170 sec
-    4                  7.841 sec
-    8                  7.900 sec
-    16                 7.984 sec
-    """
diff --git a/magic_pdf/dict2md/__init__.py b/magic_pdf/dict2md/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/dict2md/ocr_mkcontent.py b/magic_pdf/dict2md/ocr_mkcontent.py
deleted file mode 100644
index 997b1832a96be934f93c1c6c680b3de35d79bcc6..0000000000000000000000000000000000000000
--- a/magic_pdf/dict2md/ocr_mkcontent.py
+++ /dev/null
@@ -1,352 +0,0 @@
-import re
-
-from loguru import logger
-
-from magic_pdf.config.make_content_config import DropMode, MakeMode
-from magic_pdf.config.ocr_content_type import BlockType, ContentType
-from magic_pdf.libs.commons import join_path
-from magic_pdf.libs.config_reader import get_latex_delimiter_config
-from magic_pdf.libs.language import detect_lang
-from magic_pdf.libs.markdown_utils import ocr_escape_special_markdown_char
-from magic_pdf.post_proc.para_split_v3 import ListLineTag
-
-
-def __is_hyphen_at_line_end(line):
-    """Check if a line ends with one or more letters followed by a hyphen.
-
-    Args:
-    line (str): The line of text to check.
-
-    Returns:
-    bool: True if the line ends with one or more letters followed by a hyphen, False otherwise.
-    """
-    # Use regex to check if the line ends with one or more letters followed by a hyphen
-    return bool(re.search(r'[A-Za-z]+-\s*$', line))
-
-
-def ocr_mk_mm_markdown_with_para_and_pagination(pdf_info_dict: list,
-                                                img_buket_path):
-    markdown_with_para_and_pagination = []
-    page_no = 0
-    for page_info in pdf_info_dict:
-        paras_of_layout = page_info.get('para_blocks')
-        if not paras_of_layout:
-            markdown_with_para_and_pagination.append({
-                'page_no':
-                    page_no,
-                'md_content':
-                    '',
-            })
-            page_no += 1
-            continue
-        page_markdown = ocr_mk_markdown_with_para_core_v2(
-            paras_of_layout, 'mm', img_buket_path)
-        markdown_with_para_and_pagination.append({
-            'page_no':
-                page_no,
-            'md_content':
-                '\n\n'.join(page_markdown)
-        })
-        page_no += 1
-    return markdown_with_para_and_pagination
-
-
-def ocr_mk_markdown_with_para_core_v2(paras_of_layout,
-                                      mode,
-                                      img_buket_path='',
-                                      ):
-    page_markdown = []
-    for para_block in paras_of_layout:
-        para_text = ''
-        para_type = para_block['type']
-        if para_type in [BlockType.Text, BlockType.List, BlockType.Index]:
-            para_text = merge_para_with_text(para_block)
-        elif para_type == BlockType.Title:
-            title_level = get_title_level(para_block)
-            para_text = f'{"#" * title_level} {merge_para_with_text(para_block)}'
-        elif para_type == BlockType.InterlineEquation:
-            para_text = merge_para_with_text(para_block)
-        elif para_type == BlockType.Image:
-            if mode == 'nlp':
-                continue
-            elif mode == 'mm':
-                # 检测是否存在图片脚注
-                has_image_footnote = any(block['type'] == BlockType.ImageFootnote for block in para_block['blocks'])
-                # 如果存在图片脚注，则将图片脚注拼接到图片正文后面
-                if has_image_footnote:
-                    for block in para_block['blocks']:  # 1st.拼image_caption
-                        if block['type'] == BlockType.ImageCaption:
-                            para_text += merge_para_with_text(block) + '  \n'
-                    for block in para_block['blocks']:  # 2nd.拼image_body
-                        if block['type'] == BlockType.ImageBody:
-                            for line in block['lines']:
-                                for span in line['spans']:
-                                    if span['type'] == ContentType.Image:
-                                        if span.get('image_path', ''):
-                                            para_text += f"![]({img_buket_path}/{span['image_path']})"
-                    for block in para_block['blocks']:  # 3rd.拼image_footnote
-                        if block['type'] == BlockType.ImageFootnote:
-                            para_text += '  \n' + merge_para_with_text(block)
-                else:
-                    for block in para_block['blocks']:  # 1st.拼image_body
-                        if block['type'] == BlockType.ImageBody:
-                            for line in block['lines']:
-                                for span in line['spans']:
-                                    if span['type'] == ContentType.Image:
-                                        if span.get('image_path', ''):
-                                            para_text += f"![]({img_buket_path}/{span['image_path']})"
-                    for block in para_block['blocks']:  # 2nd.拼image_caption
-                        if block['type'] == BlockType.ImageCaption:
-                            para_text += '  \n' + merge_para_with_text(block)
-        elif para_type == BlockType.Table:
-            if mode == 'nlp':
-                continue
-            elif mode == 'mm':
-                for block in para_block['blocks']:  # 1st.拼table_caption
-                    if block['type'] == BlockType.TableCaption:
-                        para_text += merge_para_with_text(block) + '  \n'
-                for block in para_block['blocks']:  # 2nd.拼table_body
-                    if block['type'] == BlockType.TableBody:
-                        for line in block['lines']:
-                            for span in line['spans']:
-                                if span['type'] == ContentType.Table:
-                                    # if processed by table model
-                                    if span.get('html', ''):
-                                        para_text += f"\n{span['html']}\n"
-                                    elif span.get('image_path', ''):
-                                        para_text += f"![]({img_buket_path}/{span['image_path']})"
-                for block in para_block['blocks']:  # 3rd.拼table_footnote
-                    if block['type'] == BlockType.TableFootnote:
-                        para_text += '\n' + merge_para_with_text(block) + '  '
-
-        if para_text.strip() == '':
-            continue
-        else:
-            # page_markdown.append(para_text.strip() + '  ')
-            page_markdown.append(para_text.strip())
-
-    return page_markdown
-
-
-def detect_language(text):
-    en_pattern = r'[a-zA-Z]+'
-    en_matches = re.findall(en_pattern, text)
-    en_length = sum(len(match) for match in en_matches)
-    if len(text) > 0:
-        if en_length / len(text) >= 0.5:
-            return 'en'
-        else:
-            return 'unknown'
-    else:
-        return 'empty'
-
-
-def full_to_half(text: str) -> str:
-    """Convert full-width characters to half-width characters using code point manipulation.
-
-    Args:
-        text: String containing full-width characters
-
-    Returns:
-        String with full-width characters converted to half-width
-    """
-    result = []
-    for char in text:
-        code = ord(char)
-        # Full-width letters and numbers (FF21-FF3A for A-Z, FF41-FF5A for a-z, FF10-FF19 for 0-9)
-        if (0xFF21 <= code <= 0xFF3A) or (0xFF41 <= code <= 0xFF5A) or (0xFF10 <= code <= 0xFF19):
-            result.append(chr(code - 0xFEE0))  # Shift to ASCII range
-        else:
-            result.append(char)
-    return ''.join(result)
-
-latex_delimiters_config = get_latex_delimiter_config()
-
-default_delimiters = {
-    'display': {'left': '$$', 'right': '$$'},
-    'inline': {'left': '$', 'right': '$'}
-}
-
-delimiters = latex_delimiters_config if latex_delimiters_config else default_delimiters
-
-display_left_delimiter = delimiters['display']['left']
-display_right_delimiter = delimiters['display']['right']
-inline_left_delimiter = delimiters['inline']['left']
-inline_right_delimiter = delimiters['inline']['right']
-
-def merge_para_with_text(para_block):
-    block_text = ''
-    for line in para_block['lines']:
-        for span in line['spans']:
-            if span['type'] in [ContentType.Text]:
-                span['content'] = full_to_half(span['content'])
-                block_text += span['content']
-    block_lang = detect_lang(block_text)
-
-    para_text = ''
-    for i, line in enumerate(para_block['lines']):
-
-        if i >= 1 and line.get(ListLineTag.IS_LIST_START_LINE, False):
-            para_text += '  \n'
-
-        for j, span in enumerate(line['spans']):
-
-            span_type = span['type']
-            content = ''
-            if span_type == ContentType.Text:
-                content = ocr_escape_special_markdown_char(span['content'])
-            elif span_type == ContentType.InlineEquation:
-                content = f"{inline_left_delimiter}{span['content']}{inline_right_delimiter}"
-            elif span_type == ContentType.InterlineEquation:
-                content = f"\n{display_left_delimiter}\n{span['content']}\n{display_right_delimiter}\n"
-
-            content = content.strip()
-
-            if content:
-                langs = ['zh', 'ja', 'ko']
-                # logger.info(f'block_lang: {block_lang}, content: {content}')
-                if block_lang in langs: # 中文/日语/韩文语境下，换行不需要空格分隔,但是如果是行内公式结尾，还是要加空格
-                    if j == len(line['spans']) - 1 and span_type not in [ContentType.InlineEquation]:
-                        para_text += content
-                    else:
-                        para_text += f'{content} '
-                else:
-                    if span_type in [ContentType.Text, ContentType.InlineEquation]:
-                        # 如果span是line的最后一个且末尾带有-连字符，那么末尾不应该加空格,同时应该把-删除
-                        if j == len(line['spans'])-1 and span_type == ContentType.Text and __is_hyphen_at_line_end(content):
-                            para_text += content[:-1]
-                        else:  # 西方文本语境下 content间需要空格分隔
-                            para_text += f'{content} '
-                    elif span_type == ContentType.InterlineEquation:
-                        para_text += content
-            else:
-                continue
-    # 连写字符拆分
-    # para_text = __replace_ligatures(para_text)
-
-    return para_text
-
-
-def para_to_standard_format_v2(para_block, img_buket_path, page_idx, drop_reason=None):
-    para_type = para_block['type']
-    para_content = {}
-    if para_type in [BlockType.Text, BlockType.List, BlockType.Index]:
-        para_content = {
-            'type': 'text',
-            'text': merge_para_with_text(para_block),
-        }
-    elif para_type == BlockType.Title:
-        para_content = {
-            'type': 'text',
-            'text': merge_para_with_text(para_block),
-        }
-        title_level = get_title_level(para_block)
-        if title_level != 0:
-            para_content['text_level'] = title_level
-    elif para_type == BlockType.InterlineEquation:
-        para_content = {
-            'type': 'equation',
-            'text': merge_para_with_text(para_block),
-            'text_format': 'latex',
-        }
-    elif para_type == BlockType.Image:
-        para_content = {'type': 'image', 'img_path': '', 'img_caption': [], 'img_footnote': []}
-        for block in para_block['blocks']:
-            if block['type'] == BlockType.ImageBody:
-                for line in block['lines']:
-                    for span in line['spans']:
-                        if span['type'] == ContentType.Image:
-                            if span.get('image_path', ''):
-                                para_content['img_path'] = join_path(img_buket_path, span['image_path'])
-            if block['type'] == BlockType.ImageCaption:
-                para_content['img_caption'].append(merge_para_with_text(block))
-            if block['type'] == BlockType.ImageFootnote:
-                para_content['img_footnote'].append(merge_para_with_text(block))
-    elif para_type == BlockType.Table:
-        para_content = {'type': 'table', 'img_path': '', 'table_caption': [], 'table_footnote': []}
-        for block in para_block['blocks']:
-            if block['type'] == BlockType.TableBody:
-                for line in block['lines']:
-                    for span in line['spans']:
-                        if span['type'] == ContentType.Table:
-
-                            if span.get('latex', ''):
-                                para_content['table_body'] = f"{span['latex']}"
-                            elif span.get('html', ''):
-                                para_content['table_body'] = f"{span['html']}"
-
-                            if span.get('image_path', ''):
-                                para_content['img_path'] = join_path(img_buket_path, span['image_path'])
-
-            if block['type'] == BlockType.TableCaption:
-                para_content['table_caption'].append(merge_para_with_text(block))
-            if block['type'] == BlockType.TableFootnote:
-                para_content['table_footnote'].append(merge_para_with_text(block))
-
-    para_content['page_idx'] = page_idx
-
-    if drop_reason is not None:
-        para_content['drop_reason'] = drop_reason
-
-    return para_content
-
-
-def union_make(pdf_info_dict: list,
-               make_mode: str,
-               drop_mode: str,
-               img_buket_path: str = '',
-               ):
-    output_content = []
-    for page_info in pdf_info_dict:
-        drop_reason_flag = False
-        drop_reason = None
-        if page_info.get('need_drop', False):
-            drop_reason = page_info.get('drop_reason')
-            if drop_mode == DropMode.NONE:
-                pass
-            elif drop_mode == DropMode.NONE_WITH_REASON:
-                drop_reason_flag = True
-            elif drop_mode == DropMode.WHOLE_PDF:
-                raise Exception((f'drop_mode is {DropMode.WHOLE_PDF} ,'
-                                 f'drop_reason is {drop_reason}'))
-            elif drop_mode == DropMode.SINGLE_PAGE:
-                logger.warning((f'drop_mode is {DropMode.SINGLE_PAGE} ,'
-                                f'drop_reason is {drop_reason}'))
-                continue
-            else:
-                raise Exception('drop_mode can not be null')
-
-        paras_of_layout = page_info.get('para_blocks')
-        page_idx = page_info.get('page_idx')
-        if not paras_of_layout:
-            continue
-        if make_mode == MakeMode.MM_MD:
-            page_markdown = ocr_mk_markdown_with_para_core_v2(
-                paras_of_layout, 'mm', img_buket_path)
-            output_content.extend(page_markdown)
-        elif make_mode == MakeMode.NLP_MD:
-            page_markdown = ocr_mk_markdown_with_para_core_v2(
-                paras_of_layout, 'nlp')
-            output_content.extend(page_markdown)
-        elif make_mode == MakeMode.STANDARD_FORMAT:
-            for para_block in paras_of_layout:
-                if drop_reason_flag:
-                    para_content = para_to_standard_format_v2(
-                        para_block, img_buket_path, page_idx)
-                else:
-                    para_content = para_to_standard_format_v2(
-                        para_block, img_buket_path, page_idx)
-                output_content.append(para_content)
-    if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
-        return '\n\n'.join(output_content)
-    elif make_mode == MakeMode.STANDARD_FORMAT:
-        return output_content
-
-
-def get_title_level(block):
-    title_level = block.get('level', 1)
-    if title_level > 4:
-        title_level = 4
-    elif title_level < 1:
-        title_level = 0
-    return title_level
\ No newline at end of file
diff --git a/magic_pdf/filter/__init__.py b/magic_pdf/filter/__init__.py
deleted file mode 100644
index 280156358b1417c1526ade41302a7f21b09863e0..0000000000000000000000000000000000000000
--- a/magic_pdf/filter/__init__.py
+++ /dev/null
@@ -1,32 +0,0 @@
-
-from magic_pdf.config.drop_reason import DropReason
-from magic_pdf.config.enums import SupportedPdfParseMethod
-from magic_pdf.filter.pdf_classify_by_type import classify as do_classify
-from magic_pdf.filter.pdf_meta_scan import pdf_meta_scan
-
-
-def classify(pdf_bytes: bytes) -> SupportedPdfParseMethod:
-    """根据pdf的元数据，判断是文本pdf，还是ocr pdf."""
-    pdf_meta = pdf_meta_scan(pdf_bytes)
-    if pdf_meta.get('_need_drop', False):  # 如果返回了需要丢弃的标志，则抛出异常
-        raise Exception(f"pdf meta_scan need_drop,reason is {pdf_meta['_drop_reason']}")
-    else:
-        is_encrypted = pdf_meta['is_encrypted']
-        is_needs_password = pdf_meta['is_needs_password']
-        if is_encrypted or is_needs_password:  # 加密的，需要密码的，没有页面的，都不处理
-            raise Exception(f'pdf meta_scan need_drop,reason is {DropReason.ENCRYPTED}')
-        else:
-            is_text_pdf, results = do_classify(
-                pdf_meta['total_page'],
-                pdf_meta['page_width_pts'],
-                pdf_meta['page_height_pts'],
-                pdf_meta['image_info_per_page'],
-                pdf_meta['text_len_per_page'],
-                pdf_meta['imgs_per_page'],
-                # pdf_meta['text_layout_per_page'],
-                pdf_meta['invalid_chars'],
-            )
-            if is_text_pdf:
-                return SupportedPdfParseMethod.TXT
-            else:
-                return SupportedPdfParseMethod.OCR
diff --git a/magic_pdf/filter/pdf_classify_by_type.py b/magic_pdf/filter/pdf_classify_by_type.py
deleted file mode 100644
index 50665737287c2d1798924c3aa30980ce280a3c7d..0000000000000000000000000000000000000000
--- a/magic_pdf/filter/pdf_classify_by_type.py
+++ /dev/null
@@ -1,395 +0,0 @@
-"""
-根据利用meta_scan得到的结果，对pdf是否为文字版进行分类。
-定义标准：
-一、什么pdf会是文字pdf，只要满足以下任意一条
-  1. 随机抽取N页，如果有任何一页文字数目大于100
-  2. 只要存在一个页面，图片的数量为0
-二、什么是扫描版pdf，只要满足以下任意一条
-  1. ~~80%页面上的最大图大小一样并且面积超过页面面积0.6~~
-  2. 大部分页面上文字的长度都是相等的。
-
-"""
-import json
-import sys
-from collections import Counter
-
-import click
-import numpy as np
-from loguru import logger
-
-from magic_pdf.libs.commons import mymax, get_top_percent_list
-from magic_pdf.filter.pdf_meta_scan import scan_max_page, junk_limit_min
-
-TEXT_LEN_THRESHOLD = 100
-AVG_TEXT_LEN_THRESHOLD = 100
-TEXT_LEN_SAMPLE_RATIO = 0.1  # 抽取0.1的页面进行文字长度统计
-
-
-# 一个拼接图片的方案，将某些特殊扫描版本的拆图拼成一张整图
-def merge_images(image_list, page_width, page_height, max_offset=5, max_gap=2):
-    # 先通过set去除所有bbox重叠的图片数据
-    image_list_result = []
-    for page_images in image_list:
-        page_result = []
-        dedup = set()
-        for img in page_images:
-            x0, y0, x1, y1, img_bojid = img
-            if (x0, y0, x1, y1) in dedup:  # 这里面会出现一些重复的bbox，无需重复出现，需要去掉
-                continue
-            else:
-                dedup.add((x0, y0, x1, y1))
-                page_result.append([x0, y0, x1, y1, img_bojid])
-        image_list_result.append(page_result)
-
-    # 接下来，将同一页可拼接的图片进行合并
-    merged_images = []
-    for page_images in image_list_result:
-        if not page_images:
-            continue
-
-        # 先将同一页的图片从上到下，从左到右进行排序
-        page_images.sort(key=lambda img: (img[1], img[0]))
-
-        merged = [page_images[0]]
-
-        for img in page_images[1:]:
-            x0, y0, x1, y1, imgid = img
-
-            last_img = merged[-1]
-            last_x0, last_y0, last_x1, last_y1, last_imgid = last_img
-
-            # 单张图片宽或者高覆盖页面宽高的9成以上是拼图的一个前置条件
-            full_width = abs(x1 - x0) >= page_width * 0.9
-            full_height = abs(y1 - y0) >= page_height * 0.9
-
-            # 如果宽达标，检测是否能竖着拼
-            if full_width:
-                # 竖着拼需要满足两个前提，左右边界各偏移不能超过 max_offset，第一张图的下边界和第二张图的上边界偏移不能超过 max_gap
-                close1 = (last_x0 - max_offset) <= x0 <= (last_x0 + max_offset) and (last_x1 - max_offset) <= x1 <= (
-                            last_x1 + max_offset) and (last_y1 - max_gap) <= y0 <= (last_y1 + max_gap)
-
-            # 如果高达标，检测是否可以横着拼
-            if full_height:
-                # 横着拼需要满足两个前提，上下边界各偏移不能超过 max_offset，第一张图的右边界和第二张图的左边界偏移不能超过 max_gap
-                close2 = (last_y0 - max_offset) <= y0 <= (last_y0 + max_offset) and (last_y1 - max_offset) <= y1 <= (
-                            last_y1 + max_offset) and (last_x1 - max_gap) <= x0 <= (last_x1 + max_gap)
-
-            # Check if the image can be merged with the last image
-            if (full_width and close1) or (full_height and close2):
-                # Merge the image with the last image
-                merged[-1] = [min(x0, last_x0), min(y0, last_y0),
-                              max(x1, last_x1), max(y1, last_y1), imgid]
-            else:
-                # Add the image as a new image
-                merged.append(img)
-
-        merged_images.append(merged)
-
-    return merged_images
-
-
-def classify_by_area(total_page: int, page_width, page_height, img_sz_list, text_len_list: list):
-    """
-    80%页面上的最大图大小一样并且面积超过页面面积0.6则返回False，否则返回True
-    :param pdf_path:
-    :param total_page:
-    :param page_width:
-    :param page_height:
-    :param img_sz_list:
-    :return:
-    """
-    # # 只要有一页没有图片，那么就是文字pdf。但是同时还需要满足一个条件就是这个页面上同时不能有文字。发现过一些扫描版pdf，上面有一些空白页面，既没有图片也没有文字。
-    # if any([len(img_sz) == 0 for img_sz in img_sz_list]):  # 含有不含图片的页面
-    #     # 现在找到这些页面的index
-    #     empty_page_index = [i for i, img_sz in enumerate(img_sz_list) if len(img_sz) == 0]
-    #     # 然后检查这些页面上是否有文字
-    #     text_len_at_page_idx = [text_len for i, text_len in enumerate(text_len_list) if i in empty_page_index and text_len > 0]
-    #     if len(text_len_at_page_idx) > TEXT_LEN_THRESHOLD:  # 没有图片，但是有文字，说明可能是个文字版，如果没有文字则无法判断，留给下一步,现在要求这页文字量超过一定阈值
-    #         return True
-
-    # 通过objid去掉重复出现10次以上的图片，这些图片是隐藏的透明图层，其特点是id都一样
-    # 先对每个id出现的次数做个统计
-    objid_cnt = Counter([objid for page_img_sz in img_sz_list for _, _, _, _, objid in page_img_sz])
-    # 再去掉出现次数大于10的
-    if total_page >= scan_max_page:  # 新的meta_scan只扫描前 scan_max_page 页，页数大于 scan_max_page 当total_page为 scan_max_page
-        total_page = scan_max_page
-
-    repeat_threshold = 2  # 把bad_image的阈值设为2
-    # repeat_threshold = min(2, total_page)  # 当total_page为1时，repeat_threshold为1，会产生误判导致所有img变成bad_img
-    bad_image_objid = set([objid for objid, cnt in objid_cnt.items() if cnt >= repeat_threshold])
-    # bad_image_page_idx = [i for i, page_img_sz in enumerate(img_sz_list) if any([objid in bad_image_objid for _, _, _, _, objid in page_img_sz])]
-    # text_len_at_bad_image_page_idx = [text_len for i, text_len in enumerate(text_len_list) if i in bad_image_page_idx and text_len > 0]
-
-    # 特殊情况，一个文字版pdf，每页覆盖一个超大的透明图片,超大的定义是图片占整页面积的90%以上
-    # fake_image_ids = [objid for objid in bad_image_objid if
-    #                   any([abs((x1 - x0) * (y1 - y0) / page_width * page_height) > 0.9 for images in img_sz_list for
-    #                        x0, y0, x1, y1, _ in images])]  # 原来的代码，any里面恒为true了，原因？？？
-    # fake_image_ids = [objid for objid in bad_image_objid for images in img_sz_list for x0, y0, x1, y1, img_id in images
-    #                   if img_id == objid and abs((x1 - x0) * (y1 - y0)) / (page_width * page_height) > 0.9]
-
-    # if len(fake_image_ids) > 0 and any([l > TEXT_LEN_THRESHOLD for l in text_len_at_bad_image_page_idx]):  # 这些透明图片所在的页面上有文字大于阈值
-    #     return True
-
-    img_sz_list = [[img_sz for img_sz in page_img_sz if img_sz[-1] not in bad_image_objid] for page_img_sz in
-                   img_sz_list]  # 过滤掉重复出现的图片
-
-    # 有的扫描版会把一页图片拆成很多张，需要先把图拼起来再计算
-    img_sz_list = merge_images(img_sz_list, page_width, page_height)
-
-    # 计算每个页面上最大的图的面积，然后计算这个面积占页面面积的比例
-    max_image_area_per_page = [mymax([(x1 - x0) * (y1 - y0) for x0, y0, x1, y1, _ in page_img_sz]) for page_img_sz in
-                               img_sz_list]
-    page_area = page_width * page_height
-    max_image_area_per_page = [area / page_area for area in max_image_area_per_page]
-    max_image_area_per_page = [area for area in max_image_area_per_page if area > 0.5]
-
-    if len(max_image_area_per_page) >= 0.5 * total_page:  # 阈值从0.8改到0.5，适配3页里面有两页和两页里面有一页的情况
-        # 这里条件成立的前提是把反复出现的图片去掉了。这些图片是隐藏的透明图层，其特点是id都一样
-        return False
-    else:
-        return True
-
-
-def classify_by_text_len(text_len_list: list, total_page: int):
-    """
-    随机抽取10%的页面，如果少于5个页面，那么就取全部页面。
-    查看页面上的文字长度，如果有任何一个页面的文字长度大于TEXT_LEN_THRESHOLD，那么就是文字pdf
-    :param total_page:
-    :param text_len_list:
-    :return:
-    """
-    select_page_cnt = int(total_page * TEXT_LEN_SAMPLE_RATIO)  # 选取10%的页面
-    if select_page_cnt < 5:
-        select_page_cnt = total_page
-
-    # # 排除头尾各10页
-    # if total_page > 20:  # 如果总页数大于20
-    #     page_range = list(range(10, total_page - 10))  # 从第11页到倒数第11页
-    # else:
-    #     page_range = list(range(total_page))  # 否则选择所有页面
-    # page_num = np.random.choice(page_range, min(select_page_cnt, len(page_range)), replace=False)
-    # 排除前后10页对只有21，22页的pdf很尴尬，如果选出来的中间那一两页恰好没字容易误判，有了avg_words规则，这个规则可以忽略
-    page_num = np.random.choice(total_page, select_page_cnt, replace=False)
-    text_len_lst = [text_len_list[i] for i in page_num]
-    is_text_pdf = any([text_len > TEXT_LEN_THRESHOLD for text_len in text_len_lst])
-    return is_text_pdf
-
-
-def classify_by_avg_words(text_len_list: list):
-    """
-    补充规则，如果平均每页字数少于 AVG_TEXT_LEN_THRESHOLD，就不是文字pdf
-    主要是各种图集
-    :param text_len_list:
-    :return:
-    """
-    sum_words = sum(text_len_list)
-    count_of_numbers = len(text_len_list)
-    if count_of_numbers == 0:
-        is_text_pdf = False
-    else:
-        avg_words = round(sum_words / count_of_numbers)
-        if avg_words > AVG_TEXT_LEN_THRESHOLD:
-            is_text_pdf = True
-        else:
-            is_text_pdf = False
-
-    return is_text_pdf
-
-
-def classify_by_img_num(img_sz_list: list, img_num_list: list):
-    """
-    补充规则，有一种扫描版本的PDF，每一页都会放所有的扫描页进去，在 metascan 时会被去重，
-    这种pdf的 metasca 扫描结果的特点是 img_sz_list 内全是空元素，img_num_list中每一页的数量都很大且相同
-    :param img_sz_list:
-    :param img_num_list:
-    :return:
-    """
-    # 计算img_sz_list中非空元素的个数
-    count_img_sz_list_not_none = sum(1 for item in img_sz_list if item)
-    # 获取前80%的元素
-    top_eighty_percent = get_top_percent_list(img_num_list, 0.8)
-    # img_sz_list中非空元素的个数小于1，前80%的元素都相等，且最大值大于等于junk_limit_min
-    if count_img_sz_list_not_none <= 1 and len(set(top_eighty_percent)) == 1 and max(img_num_list) >= junk_limit_min:
-
-        #拿max和min的值,用来判断list内的值是否全都相等
-        # min_imgs = min(img_num_list)
-        # max_imgs = max(img_num_list)
-        #
-        # if count_img_sz_list_not_none == 0 and max_imgs == min_imgs and max_imgs >= junk_limit_min:
-        return False  # 如果满足这个条件，一定不是文字版pdf
-    else:
-        return True  # 不满足这三个条件，可能是文字版pdf，通过其他规则判断
-
-
-def classify_by_text_layout(text_layout_per_page: list):
-    """
-    判断文本布局是否以竖排为主。
-
-    Args:
-        text_layout_per_page (list): 文本布局列表，列表中的每个元素表示一页的文本布局，
-                                     值为'vertical'表示竖排，值为'horizontal'表示横排。
-
-    Returns:
-        bool: 若文本布局以竖排为主，则返回False；否则返回True。
-    """
-    # 统计text_layout_per_page中竖排的个数
-    count_vertical = sum(1 for item in text_layout_per_page if item == 'vertical')
-    # 统计text_layout_per_page中横排的个数
-    count_horizontal = sum(1 for item in text_layout_per_page if item == 'horizontal')
-    # 计算text_layout_per_page中竖排的占比
-    known_layout_cnt = count_vertical + count_horizontal
-    if known_layout_cnt != 0:
-        ratio = count_vertical / known_layout_cnt
-        if ratio >= 0.5:  # 阈值设为0.5，适配3页里面有2页和两页里有一页的情况
-            return False  # 文本布局以竖排为主，认为不是文字版pdf
-        else:
-            return True  # 文本布局以横排为主，认为是文字版pdf
-    else:
-        return False  # 文本布局未知，默认认为不是文字版pdf
-
-
-def classify_by_img_narrow_strips(page_width, page_height, img_sz_list):
-    """
-    判断一页是否由细长条组成，有两个条件：
-    1. 图片的宽或高达到页面宽或高的90%，且长边需要是窄边长度的数倍以上
-    2. 整个页面所有的图片有80%以上满足条件1
-
-    Args:
-        page_width (float): 页面宽度
-        page_height (float): 页面高度
-        img_sz_list (list): 图片尺寸列表，每个元素为一个元组，表示图片的矩形区域和尺寸，形如(x0, y0, x1, y1, size)，其中(x0, y0)为矩形区域的左上角坐标，(x1, y1)为矩形区域的右下角坐标，size为图片的尺寸
-
-    Returns:
-        bool: 如果满足条件的页面的比例小于0.5，返回True，否则返回False
-    """
-
-    def is_narrow_strip(img):
-        x0, y0, x1, y1, _ = img
-        width, height = x1 - x0, y1 - y0
-        return any([
-            # 图片宽度大于等于页面宽度的90%，且宽度大于等于高度4倍
-            width >= page_width * 0.9 and width >= height * 4,
-            # 图片高度大于等于页面高度的90%，且高度大于等于宽度4倍
-            height >= page_height * 0.9 and height >= width * 4,
-        ])
-
-    # 初始化满足条件的页面数量
-    narrow_strip_pages_count = 0
-
-    # 遍历所有页面
-    for page_img_list in img_sz_list:
-        # 忽略空页面
-        if not page_img_list:
-            continue
-
-        # 计算页面中的图片总数
-        total_images = len(page_img_list)
-
-        # 计算页面中细长条图片的数量
-        narrow_strip_images_count = 0
-        for img in page_img_list:
-            if is_narrow_strip(img):
-                narrow_strip_images_count += 1
-        # 如果细长条图片的数量少于5，跳过
-        if narrow_strip_images_count < 5:
-            continue
-        else:
-            # 如果细长条图片的比例大于或等于0.8，增加满足条件的页面数量
-            if narrow_strip_images_count / total_images >= 0.8:
-                narrow_strip_pages_count += 1
-
-    # 计算满足条件的页面的比例
-    narrow_strip_pages_ratio = narrow_strip_pages_count / len(img_sz_list)
-
-    return narrow_strip_pages_ratio < 0.5
-
-
-def classify(total_page: int, page_width, page_height, img_sz_list: list, text_len_list: list, img_num_list: list,
-             # text_layout_list: list,
-             invalid_chars: bool):
-    """
-    这里的图片和页面长度单位是pts
-    :param total_page:
-    :param text_len_list:
-    :param page_width:
-    :param page_height:
-    :param img_sz_list:
-    :param pdf_path:
-    :return:
-    """
-    results = {
-        'by_image_area': classify_by_area(total_page, page_width, page_height, img_sz_list, text_len_list),
-        'by_text_len': classify_by_text_len(text_len_list, total_page),
-        'by_avg_words': classify_by_avg_words(text_len_list),
-        'by_img_num': classify_by_img_num(img_sz_list, img_num_list),
-        # 'by_text_layout': classify_by_text_layout(text_layout_list),
-        'by_img_narrow_strips': classify_by_img_narrow_strips(page_width, page_height, img_sz_list),
-        'by_invalid_chars': invalid_chars,
-    }
-
-    if all(results.values()):
-        return True, results
-    elif not any(results.values()):
-        return False, results
-    else:
-        logger.warning(
-            f"OCR needed based on classification result, by_image_area: {results['by_image_area']},"
-            f" by_text: {results['by_text_len']}, by_avg_words: {results['by_avg_words']}, by_img_num: {results['by_img_num']},"
-            # f" by_text_layout: {results['by_text_layout']},"
-            f" by_img_narrow_strips: {results['by_img_narrow_strips']},"
-            f" by_invalid_chars: {results['by_invalid_chars']}",
-            file=sys.stderr)  # 利用这种情况可以快速找出来哪些pdf比较特殊，针对性修正分类算法
-        return False, results
-
-
-@click.command()
-@click.option("--json-file", type=str, help="pdf信息")
-def main(json_file):
-    if json_file is None:
-        print("json_file is None", file=sys.stderr)
-        exit(0)
-    try:
-        with open(json_file, "r") as f:
-            for l in f:
-                if l.strip() == "":
-                    continue
-                o = json.loads(l)
-                total_page = o["total_page"]
-                page_width = o["page_width_pts"]
-                page_height = o["page_height_pts"]
-                img_sz_list = o["image_info_per_page"]
-                text_len_list = o['text_len_per_page']
-                text_layout_list = o['text_layout_per_page']
-                pdf_path = o['pdf_path']
-                is_encrypted = o['is_encrypted']
-                is_needs_password = o['is_needs_password']
-                if is_encrypted or total_page == 0 or is_needs_password:  # 加密的，需要密码的，没有页面的，都不处理
-                    continue
-                tag = classify(total_page, page_width, page_height, img_sz_list, text_len_list, text_layout_list)
-                o['is_text_pdf'] = tag
-                print(json.dumps(o, ensure_ascii=False))
-    except Exception as e:
-        print("ERROR: ", e, file=sys.stderr)
-
-
-if __name__ == "__main__":
-    main()
-    # false = False
-    # true = True
-    # null = None
-    # o = {"pdf_path":"s3://llm-raw-snew/llm-raw-the-eye/raw/World%20Tracker%20Library/worldtracker.org/media/library/Science/Computer%20Science/Shreiner%20-%20OpenGL%20Programming%20Guide%206e%20%5BThe%20Redbook%5D%20%28AW%2C%202008%29.pdf","is_needs_password":false,"is_encrypted":false,"total_page":978,"page_width_pts":368,"page_height_pts":513,"image_info_per_page":[[[0,0,368,513,10037]],[[0,0,368,513,4]],[[0,0,368,513,7]],[[0,0,368,513,10]],[[0,0,368,513,13]],[[0,0,368,513,16]],[[0,0,368,513,19]],[[0,0,368,513,22]],[[0,0,368,513,25]],[[0,0,368,513,28]],[[0,0,368,513,31]],[[0,0,368,513,34]],[[0,0,368,513,37]],[[0,0,368,513,40]],[[0,0,368,513,43]],[[0,0,368,513,46]],[[0,0,368,513,49]],[[0,0,368,513,52]],[[0,0,368,513,55]],[[0,0,368,513,58]],[[0,0,368,513,61]],[[0,0,368,513,64]],[[0,0,368,513,67]],[[0,0,368,513,70]],[[0,0,368,513,73]],[[0,0,368,516,76]],[[0,0,368,516,79]],[[0,0,368,513,82]],[[0,0,368,513,85]],[[0,0,368,513,88]],[[0,0,368,513,91]],[[0,0,368,513,94]],[[0,0,368,513,97]],[[0,0,368,513,100]],[[0,0,368,513,103]],[[0,0,368,513,106]],[[0,0,368,513,109]],[[0,0,368,513,112]],[[0,0,368,513,115]],[[0,0,368,513,118]],[[0,0,368,513,121]],[[0,0,368,513,124]],[[0,0,368,513,127]],[[0,0,368,513,130]],[[0,0,368,513,133]],[[0,0,368,513,136]],[[0,0,368,513,139]],[[0,0,368,513,142]],[[0,0,368,513,145]],[[0,0,368,513,148]],[[0,0,368,513,151]],[[0,0,368,513,154]],[[0,0,368,513,157]],[[0,0,368,513,160]],[[0,0,368,513,163]],[[0,0,368,513,166]],[[0,0,368,513,169]],[[0,0,368,513,172]],[[0,0,368,513,175]],[[0,0,368,513,178]],[[0,0,368,513,181]],[[0,0,368,513,184]],[[0,0,368,513,187]],[[0,0,368,513,190]],[[0,0,368,513,193]],[[0,0,368,513,196]],[[0,0,368,513,199]],[[0,0,368,513,202]],[[0,0,368,513,205]],[[0,0,368,513,208]],[[0,0,368,513,211]],[[0,0,368,513,214]],[[0,0,368,513,217]],[[0,0,368,513,220]],[[0,0,368,513,223]],[[0,0,368,513,226]],[[0,0,368,513,229]],[[0,0,368,513,232]],[[0,0,368,513,235]],[[0,0,368,513,238]],[[0,0,368,513,241]],[[0,0,368,513,244]],[[0,0,368,513,247]],[[0,0,368,513,250]],[[0,0,368,513,253]],[[0,0,368,513,256]],[[0,0,368,513,259]],[[0,0,368,513,262]],[[0,0,368,513,265]],[[0,0,368,513,268]],[[0,0,368,513,271]],[[0,0,368,513,274]],[[0,0,368,513,277]],[[0,0,368,513,280]],[[0,0,368,513,283]],[[0,0,368,513,286]],[[0,0,368,513,289]],[[0,0,368,513,292]],[[0,0,368,513,295]],[[0,0,368,513,298]],[[0,0,368,513,301]],[[0,0,368,513,304]],[[0,0,368,513,307]],[[0,0,368,513,310]],[[0,0,368,513,313]],[[0,0,368,513,316]],[[0,0,368,513,319]],[[0,0,368,513,322]],[[0,0,368,513,325]],[[0,0,368,513,328]],[[0,0,368,513,331]],[[0,0,368,513,334]],[[0,0,368,513,337]],[[0,0,368,513,340]],[[0,0,368,513,343]],[[0,0,368,513,346]],[[0,0,368,513,349]],[[0,0,368,513,352]],[[0,0,368,513,355]],[[0,0,368,513,358]],[[0,0,368,513,361]],[[0,0,368,513,364]],[[0,0,368,513,367]],[[0,0,368,513,370]],[[0,0,368,513,373]],[[0,0,368,513,376]],[[0,0,368,513,379]],[[0,0,368,513,382]],[[0,0,368,513,385]],[[0,0,368,513,388]],[[0,0,368,513,391]],[[0,0,368,513,394]],[[0,0,368,513,397]],[[0,0,368,513,400]],[[0,0,368,513,403]],[[0,0,368,513,406]],[[0,0,368,513,409]],[[0,0,368,513,412]],[[0,0,368,513,415]],[[0,0,368,513,418]],[[0,0,368,513,421]],[[0,0,368,513,424]],[[0,0,368,513,427]],[[0,0,368,513,430]],[[0,0,368,513,433]],[[0,0,368,513,436]],[[0,0,368,513,439]],[[0,0,368,513,442]],[[0,0,368,513,445]],[[0,0,368,513,448]],[[0,0,368,513,451]],[[0,0,368,513,454]],[[0,0,368,513,457]],[[0,0,368,513,460]],[[0,0,368,513,463]],[[0,0,368,513,466]],[[0,0,368,513,469]],[[0,0,368,513,472]],[[0,0,368,513,475]],[[0,0,368,513,478]],[[0,0,368,513,481]],[[0,0,368,513,484]],[[0,0,368,513,487]],[[0,0,368,513,490]],[[0,0,368,513,493]],[[0,0,368,513,496]],[[0,0,368,513,499]],[[0,0,368,513,502]],[[0,0,368,513,505]],[[0,0,368,513,508]],[[0,0,368,513,511]],[[0,0,368,513,514]],[[0,0,368,513,517]],[[0,0,368,513,520]],[[0,0,368,513,523]],[[0,0,368,513,526]],[[0,0,368,513,529]],[[0,0,368,513,532]],[[0,0,368,513,535]],[[0,0,368,513,538]],[[0,0,368,513,541]],[[0,0,368,513,544]],[[0,0,368,513,547]],[[0,0,368,513,550]],[[0,0,368,513,553]],[[0,0,368,513,556]],[[0,0,368,513,559]],[[0,0,368,513,562]],[[0,0,368,513,565]],[[0,0,368,513,568]],[[0,0,368,513,571]],[[0,0,368,513,574]],[[0,0,368,513,577]],[[0,0,368,513,580]],[[0,0,368,513,583]],[[0,0,368,513,586]],[[0,0,368,513,589]],[[0,0,368,513,592]],[[0,0,368,513,595]],[[0,0,368,513,598]],[[0,0,368,513,601]],[[0,0,368,513,604]],[[0,0,368,513,607]],[[0,0,368,513,610]],[[0,0,368,513,613]],[[0,0,368,513,616]],[[0,0,368,513,619]],[[0,0,368,513,622]],[[0,0,368,513,625]],[[0,0,368,513,628]],[[0,0,368,513,631]],[[0,0,368,513,634]],[[0,0,368,513,637]],[[0,0,368,513,640]],[[0,0,368,513,643]],[[0,0,368,513,646]],[[0,0,368,513,649]],[[0,0,368,513,652]],[[0,0,368,513,655]],[[0,0,368,513,658]],[[0,0,368,513,661]],[[0,0,368,513,664]],[[0,0,368,513,667]],[[0,0,368,513,670]],[[0,0,368,513,673]],[[0,0,368,513,676]],[[0,0,368,513,679]],[[0,0,368,513,682]],[[0,0,368,513,685]],[[0,0,368,513,688]],[[0,0,368,513,691]],[[0,0,368,513,694]],[[0,0,368,513,697]],[[0,0,368,513,700]],[[0,0,368,513,703]],[[0,0,368,513,706]],[[0,0,368,513,709]],[[0,0,368,513,712]],[[0,0,368,513,715]],[[0,0,368,513,718]],[[0,0,368,513,721]],[[0,0,368,513,724]],[[0,0,368,513,727]],[[0,0,368,513,730]],[[0,0,368,513,733]],[[0,0,368,513,736]],[[0,0,368,513,739]],[[0,0,368,513,742]],[[0,0,368,513,745]],[[0,0,368,513,748]],[[0,0,368,513,751]],[[0,0,368,513,754]],[[0,0,368,513,757]],[[0,0,368,513,760]],[[0,0,368,513,763]],[[0,0,368,513,766]],[[0,0,368,513,769]],[[0,0,368,513,772]],[[0,0,368,513,775]],[[0,0,368,513,778]],[[0,0,368,513,781]],[[0,0,368,513,784]],[[0,0,368,513,787]],[[0,0,368,513,790]],[[0,0,368,513,793]],[[0,0,368,513,796]],[[0,0,368,513,799]],[[0,0,368,513,802]],[[0,0,368,513,805]],[[0,0,368,513,808]],[[0,0,368,513,811]],[[0,0,368,513,814]],[[0,0,368,513,817]],[[0,0,368,513,820]],[[0,0,368,513,823]],[[0,0,368,513,826]],[[0,0,368,513,829]],[[0,0,368,513,832]],[[0,0,368,513,835]],[[0,0,368,513,838]],[[0,0,368,513,841]],[[0,0,368,513,844]],[[0,0,368,513,847]],[[0,0,368,513,850]],[[0,0,368,513,853]],[[0,0,368,513,856]],[[0,0,368,513,859]],[[0,0,368,513,862]],[[0,0,368,513,865]],[[0,0,368,513,868]],[[0,0,368,513,871]],[[0,0,368,513,874]],[[0,0,368,513,877]],[[0,0,368,513,880]],[[0,0,368,513,883]],[[0,0,368,513,886]],[[0,0,368,513,889]],[[0,0,368,513,892]],[[0,0,368,513,895]],[[0,0,368,513,898]],[[0,0,368,513,901]],[[0,0,368,513,904]],[[0,0,368,513,907]],[[0,0,368,513,910]],[[0,0,368,513,913]],[[0,0,368,513,916]],[[0,0,368,513,919]],[[0,0,368,513,922]],[[0,0,368,513,925]],[[0,0,368,513,928]],[[0,0,368,513,931]],[[0,0,368,513,934]],[[0,0,368,513,937]],[[0,0,368,513,940]],[[0,0,368,513,943]],[[0,0,368,513,946]],[[0,0,368,513,949]],[[0,0,368,513,952]],[[0,0,368,513,955]],[[0,0,368,513,958]],[[0,0,368,513,961]],[[0,0,368,513,964]],[[0,0,368,513,967]],[[0,0,368,513,970]],[[0,0,368,513,973]],[[0,0,368,513,976]],[[0,0,368,513,979]],[[0,0,368,513,982]],[[0,0,368,513,985]],[[0,0,368,513,988]],[[0,0,368,513,991]],[[0,0,368,513,994]],[[0,0,368,513,997]],[[0,0,368,513,1000]],[[0,0,368,513,1003]],[[0,0,368,513,1006]],[[0,0,368,513,1009]],[[0,0,368,513,1012]],[[0,0,368,513,1015]],[[0,0,368,513,1018]],[[0,0,368,513,2797]],[[0,0,368,513,2798]],[[0,0,368,513,2799]],[[0,0,368,513,2800]],[[0,0,368,513,2801]],[[0,0,368,513,2802]],[[0,0,368,513,2803]],[[0,0,368,513,2804]],[[0,0,368,513,2805]],[[0,0,368,513,2806]],[[0,0,368,513,2807]],[[0,0,368,513,2808]],[[0,0,368,513,2809]],[[0,0,368,513,2810]],[[0,0,368,513,2811]],[[0,0,368,513,2812]],[[0,0,368,513,2813]],[[0,0,368,513,2814]],[[0,0,368,513,2815]],[[0,0,368,513,2816]],[[0,0,368,513,2817]],[[0,0,368,513,2818]],[[0,0,368,513,2819]],[[0,0,368,513,2820]],[[0,0,368,513,2821]],[[0,0,368,513,2822]],[[0,0,368,513,2823]],[[0,0,368,513,2824]],[[0,0,368,513,2825]],[[0,0,368,513,2826]],[[0,0,368,513,2827]],[[0,0,368,513,2828]],[[0,0,368,513,2829]],[[0,0,368,513,2830]],[[0,0,368,513,2831]],[[0,0,368,513,2832]],[[0,0,368,513,2833]],[[0,0,368,513,2834]],[[0,0,368,513,2835]],[[0,0,368,513,2836]],[[0,0,368,513,2837]],[[0,0,368,513,2838]],[[0,0,368,513,2839]],[[0,0,368,513,2840]],[[0,0,368,513,2841]],[[0,0,368,513,2842]],[[0,0,368,513,2843]],[[0,0,368,513,2844]],[[0,0,368,513,2845]],[[0,0,368,513,2846]],[[0,0,368,513,2847]],[[0,0,368,513,2848]],[[0,0,368,513,2849]],[[0,0,368,513,2850]],[[0,0,368,513,2851]],[[0,0,368,513,2852]],[[0,0,368,513,2853]],[[0,0,368,513,2854]],[[0,0,368,513,2855]],[[0,0,368,513,2856]],[[0,0,368,513,2857]],[[0,0,368,513,2858]],[[0,0,368,513,2859]],[[0,0,368,513,2860]],[[0,0,368,513,2861]],[[0,0,368,513,2862]],[[0,0,368,513,2863]],[[0,0,368,513,2864]],[[0,0,368,513,2797]],[[0,0,368,513,2798]],[[0,0,368,513,2799]],[[0,0,368,513,2800]],[[0,0,368,513,2801]],[[0,0,368,513,2802]],[[0,0,368,513,2803]],[[0,0,368,513,2804]],[[0,0,368,513,2805]],[[0,0,368,513,2806]],[[0,0,368,513,2807]],[[0,0,368,513,2808]],[[0,0,368,513,2809]],[[0,0,368,513,2810]],[[0,0,368,513,2811]],[[0,0,368,513,2812]],[[0,0,368,513,2813]],[[0,0,368,513,2814]],[[0,0,368,513,2815]],[[0,0,368,513,2816]],[[0,0,368,513,2817]],[[0,0,368,513,2818]],[[0,0,368,513,2819]],[[0,0,368,513,2820]],[[0,0,368,513,2821]],[[0,0,368,513,2822]],[[0,0,368,513,2823]],[[0,0,368,513,2824]],[[0,0,368,513,2825]],[[0,0,368,513,2826]],[[0,0,368,513,2827]],[[0,0,368,513,2828]],[[0,0,368,513,2829]],[[0,0,368,513,2830]],[[0,0,368,513,2831]],[[0,0,368,513,2832]],[[0,0,368,513,2833]],[[0,0,368,513,2834]],[[0,0,368,513,2835]],[[0,0,368,513,2836]],[[0,0,368,513,2837]],[[0,0,368,513,2838]],[[0,0,368,513,2839]],[[0,0,368,513,2840]],[[0,0,368,513,2841]],[[0,0,368,513,2842]],[[0,0,368,513,2843]],[[0,0,368,513,2844]],[[0,0,368,513,2845]],[[0,0,368,513,2846]],[[0,0,368,513,2847]],[[0,0,368,513,2848]],[[0,0,368,513,2849]],[[0,0,368,513,2850]],[[0,0,368,513,2851]],[[0,0,368,513,2852]],[[0,0,368,513,2853]],[[0,0,368,513,2854]],[[0,0,368,513,2855]],[[0,0,368,513,2856]],[[0,0,368,513,2857]],[[0,0,368,513,2858]],[[0,0,368,513,2859]],[[0,0,368,513,2860]],[[0,0,368,513,2861]],[[0,0,368,513,2862]],[[0,0,368,513,2863]],[[0,0,368,513,2864]],[[0,0,368,513,1293]],[[0,0,368,513,1296]],[[0,0,368,513,1299]],[[0,0,368,513,1302]],[[0,0,368,513,1305]],[[0,0,368,513,1308]],[[0,0,368,513,1311]],[[0,0,368,513,1314]],[[0,0,368,513,1317]],[[0,0,368,513,1320]],[[0,0,368,513,1323]],[[0,0,368,513,1326]],[[0,0,368,513,1329]],[[0,0,368,513,1332]],[[0,0,368,513,1335]],[[0,0,368,513,1338]],[[0,0,368,513,1341]],[[0,0,368,513,1344]],[[0,0,368,513,1347]],[[0,0,368,513,1350]],[[0,0,368,513,1353]],[[0,0,368,513,1356]],[[0,0,368,513,1359]],[[0,0,368,513,1362]],[[0,0,368,513,1365]],[[0,0,368,513,1368]],[[0,0,368,513,1371]],[[0,0,368,513,1374]],[[0,0,368,513,1377]],[[0,0,368,513,1380]],[[0,0,368,513,1383]],[[0,0,368,513,1386]],[[0,0,368,513,1389]],[[0,0,368,513,1392]],[[0,0,368,513,1395]],[[0,0,368,513,1398]],[[0,0,368,513,1401]],[[0,0,368,513,1404]],[[0,0,368,513,1407]],[[0,0,368,513,1410]],[[0,0,368,513,1413]],[[0,0,368,513,1416]],[[0,0,368,513,1419]],[[0,0,368,513,1422]],[[0,0,368,513,1425]],[[0,0,368,513,1428]],[[0,0,368,513,1431]],[[0,0,368,513,1434]],[[0,0,368,513,1437]],[[0,0,368,513,1440]],[[0,0,368,513,1443]],[[0,0,368,513,1446]],[[0,0,368,513,1449]],[[0,0,368,513,1452]],[[0,0,368,513,1455]],[[0,0,368,513,1458]],[[0,0,368,513,1461]],[[0,0,368,513,1464]],[[0,0,368,513,1467]],[[0,0,368,513,1470]],[[0,0,368,513,1473]],[[0,0,368,513,1476]],[[0,0,368,513,1479]],[[0,0,368,513,1482]],[[0,0,368,513,1485]],[[0,0,368,513,1488]],[[0,0,368,513,1491]],[[0,0,368,513,1494]],[[0,0,368,513,1497]],[[0,0,368,513,1500]],[[0,0,368,513,1503]],[[0,0,368,513,1506]],[[0,0,368,513,1509]],[[0,0,368,513,1512]],[[0,0,368,513,1515]],[[0,0,368,513,1518]],[[0,0,368,513,1521]],[[0,0,368,513,1524]],[[0,0,368,513,1527]],[[0,0,368,513,1530]],[[0,0,368,513,1533]],[[0,0,368,513,1536]],[[0,0,368,513,1539]],[[0,0,368,513,1542]],[[0,0,368,513,1545]],[[0,0,368,513,1548]],[[0,0,368,513,1551]],[[0,0,368,513,1554]],[[0,0,368,513,1557]],[[0,0,368,513,1560]],[[0,0,368,513,1563]],[[0,0,368,513,1566]],[[0,0,368,513,1569]],[[0,0,368,513,1572]],[[0,0,368,513,1575]],[[0,0,368,513,1578]],[[0,0,368,513,1581]],[[0,0,368,513,1584]],[[0,0,368,513,1587]],[[0,0,368,513,1590]],[[0,0,368,513,1593]],[[0,0,368,513,1596]],[[0,0,368,513,1599]],[[0,0,368,513,1602]],[[0,0,368,513,1605]],[[0,0,368,513,1608]],[[0,0,368,513,1611]],[[0,0,368,513,1614]],[[0,0,368,513,1617]],[[0,0,368,513,1620]],[[0,0,368,513,1623]],[[0,0,368,513,1626]],[[0,0,368,513,1629]],[[0,0,368,513,1632]],[[0,0,368,513,1635]],[[0,0,368,513,1638]],[[0,0,368,513,1641]],[[0,0,368,513,1644]],[[0,0,368,513,1647]],[[0,0,368,513,1650]],[[0,0,368,513,1653]],[[0,0,368,513,1656]],[[0,0,368,513,1659]],[[0,0,368,513,1662]],[[0,0,368,513,1665]],[[0,0,368,513,1668]],[[0,0,368,513,1671]],[[0,0,368,513,1674]],[[0,0,368,513,1677]],[[0,0,368,513,1680]],[[0,0,368,513,1683]],[[0,0,368,513,1686]],[[0,0,368,513,1689]],[[0,0,368,513,1692]],[[0,0,368,513,1695]],[[0,0,368,513,1698]],[[0,0,368,513,1701]],[[0,0,368,513,1704]],[[0,0,368,513,1707]],[[0,0,368,513,1710]],[[0,0,368,513,1713]],[[0,0,368,513,1716]],[[0,0,368,513,1719]],[[0,0,368,513,1722]],[[0,0,368,513,1725]],[[0,0,368,513,1728]],[[0,0,368,513,1731]],[[0,0,368,513,1734]],[[0,0,368,513,1737]],[[0,0,368,513,1740]],[[0,0,368,513,1743]],[[0,0,368,513,1746]],[[0,0,368,513,1749]],[[0,0,368,513,1752]],[[0,0,368,513,1755]],[[0,0,368,513,1758]],[[0,0,368,513,1761]],[[0,0,368,513,1764]],[[0,0,368,513,1767]],[[0,0,368,513,1770]],[[0,0,368,513,1773]],[[0,0,368,513,1776]],[[0,0,368,513,1779]],[[0,0,368,513,1782]],[[0,0,368,513,1785]],[[0,0,368,513,1788]],[[0,0,368,513,1791]],[[0,0,368,513,1794]],[[0,0,368,513,1797]],[[0,0,368,513,1800]],[[0,0,368,513,1803]],[[0,0,368,513,1806]],[[0,0,368,513,1809]],[[0,0,368,513,1812]],[[0,0,368,513,1815]],[[0,0,368,513,1818]],[[0,0,368,513,1821]],[[0,0,368,513,1824]],[[0,0,368,513,1827]],[[0,0,368,513,1830]],[[0,0,368,513,1833]],[[0,0,368,513,1836]],[[0,0,368,513,1839]],[[0,0,368,513,1842]],[[0,0,368,513,1845]],[[0,0,368,513,1848]],[[0,0,368,513,1851]],[[0,0,368,513,1854]],[[0,0,368,513,1857]],[[0,0,368,513,1860]],[[0,0,368,513,1863]],[[0,0,368,513,1866]],[[0,0,368,513,1869]],[[0,0,368,513,1872]],[[0,0,368,513,1875]],[[0,0,368,513,1878]],[[0,0,368,513,1881]],[[0,0,368,513,1884]],[[0,0,368,513,1887]],[[0,0,368,513,1890]],[[0,0,368,513,1893]],[[0,0,368,513,1896]],[[0,0,368,513,1899]],[[0,0,368,513,1902]],[[0,0,368,513,1905]],[[0,0,368,513,1908]],[[0,0,368,513,1911]],[[0,0,368,513,1914]],[[0,0,368,513,1917]],[[0,0,368,513,1920]],[[0,0,368,513,1923]],[[0,0,368,513,1926]],[[0,0,368,513,1929]],[[0,0,368,513,1932]],[[0,0,368,513,1935]],[[0,0,368,513,1938]],[[0,0,368,513,1941]],[[0,0,368,513,1944]],[[0,0,368,513,1947]],[[0,0,368,513,1950]],[[0,0,368,513,1953]],[[0,0,368,513,1956]],[[0,0,368,513,1959]],[[0,0,368,513,1962]],[[0,0,368,513,1965]],[[0,0,368,513,1968]],[[0,0,368,513,1971]],[[0,0,368,513,1974]],[[0,0,368,513,1977]],[[0,0,368,513,1980]],[[0,0,368,513,1983]],[[0,0,368,513,1986]],[[0,0,368,513,1989]],[[0,0,368,513,1992]],[[0,0,368,513,1995]],[[0,0,368,513,1998]],[[0,0,368,513,2001]],[[0,0,368,513,2004]],[[0,0,368,513,2007]],[[0,0,368,513,2010]],[[0,0,368,513,2013]],[[0,0,368,513,2016]],[[0,0,368,513,2019]],[[0,0,368,513,2022]],[[0,0,368,513,2025]],[[0,0,368,513,2028]],[[0,0,368,513,2031]],[[0,0,368,513,2034]],[[0,0,368,513,2037]],[[0,0,368,513,2040]],[[0,0,368,513,2043]],[[0,0,368,513,2046]],[[0,0,368,513,2049]],[[0,0,368,513,2052]],[[0,0,368,513,2055]],[[0,0,368,513,2058]],[[0,0,368,513,2061]],[[0,0,368,513,2064]],[[0,0,368,513,2067]],[[0,0,368,513,2070]],[[0,0,368,513,2073]],[[0,0,368,513,2076]],[[0,0,368,513,2079]],[[0,0,368,513,2082]],[[0,0,368,513,2085]],[[0,0,368,513,2088]],[[0,0,368,513,2091]],[[0,0,368,513,2094]],[[0,0,368,513,2097]],[[0,0,368,513,2100]],[[0,0,368,513,2103]],[[0,0,368,513,2106]],[[0,0,368,513,2109]],[[0,0,368,513,2112]],[[0,0,368,513,2115]],[[0,0,368,513,2118]],[[0,0,368,513,2121]],[[0,0,368,513,2124]],[[0,0,368,513,2127]],[[0,0,368,513,2130]],[[0,0,368,513,2133]],[[0,0,368,513,2136]],[[0,0,368,513,2139]],[[0,0,368,513,2142]],[[0,0,368,513,2145]],[[0,0,368,513,2148]],[[0,0,368,513,2151]],[[0,0,368,513,2154]],[[0,0,368,513,2157]],[[0,0,368,513,2160]],[[0,0,368,513,2163]],[[0,0,368,513,2166]],[[0,0,368,513,2169]],[[0,0,368,513,2172]],[[0,0,368,513,2175]],[[0,0,368,513,2178]],[[0,0,368,513,2181]],[[0,0,368,513,2184]],[[0,0,368,513,2187]],[[0,0,368,513,2190]],[[0,0,368,513,2193]],[[0,0,368,513,2196]],[[0,0,368,513,2199]],[[0,0,368,513,2202]],[[0,0,368,513,2205]],[[0,0,368,513,2208]],[[0,0,368,513,2211]],[[0,0,368,513,2214]],[[0,0,368,513,2217]],[[0,0,368,513,2220]],[[0,0,368,513,2223]],[[0,0,368,513,2226]],[[0,0,368,513,2229]],[[0,0,368,513,2232]],[[0,0,368,513,2235]],[[0,0,368,513,2238]],[[0,0,368,513,2241]],[[0,0,368,513,2244]],[[0,0,368,513,2247]],[[0,0,368,513,2250]],[[0,0,368,513,2253]],[[0,0,368,513,2256]],[[0,0,368,513,2259]],[[0,0,368,513,2262]],[[0,0,368,513,2265]],[[0,0,368,513,2268]],[[0,0,368,513,2271]],[[0,0,368,513,2274]],[[0,0,368,513,2277]],[[0,0,368,513,2280]],[[0,0,368,513,2283]],[[0,0,368,513,2286]],[[0,0,368,513,2289]],[[0,0,368,513,2292]],[[0,0,368,513,2295]],[[0,0,368,513,2298]],[[0,0,368,513,2301]],[[0,0,368,513,2304]],[[0,0,368,513,2307]],[[0,0,368,513,2310]],[[0,0,368,513,2313]],[[0,0,368,513,2316]],[[0,0,368,513,2319]],[[0,0,368,513,2322]],[[0,0,368,513,2325]],[[0,0,368,513,2328]],[[0,0,368,513,2331]],[[0,0,368,513,2334]],[[0,0,368,513,2337]],[[0,0,368,513,2340]],[[0,0,368,513,2343]],[[0,0,368,513,2346]],[[0,0,368,513,2349]],[[0,0,368,513,2352]],[[0,0,368,513,2355]],[[0,0,368,513,2358]],[[0,0,368,513,2361]],[[0,0,368,513,2364]],[[0,0,368,513,2367]],[[0,0,368,513,2370]],[[0,0,368,513,2373]],[[0,0,368,513,2376]],[[0,0,368,513,2379]],[[0,0,368,513,2382]],[[0,0,368,513,2385]],[[0,0,368,513,2388]],[[0,0,368,513,2391]],[[0,0,368,513,2394]],[[0,0,368,513,2397]],[[0,0,368,513,2400]],[[0,0,368,513,2403]],[[0,0,368,513,2406]],[[0,0,368,513,2409]],[[0,0,368,513,2412]],[[0,0,368,513,2415]],[[0,0,368,513,2418]],[[0,0,368,513,2421]],[[0,0,368,513,2424]],[[0,0,368,513,2427]],[[0,0,368,513,2430]],[[0,0,368,513,2433]],[[0,0,368,513,2436]],[[0,0,368,513,2439]],[[0,0,368,513,2442]],[[0,0,368,513,2445]],[[0,0,368,513,2448]],[[0,0,368,513,2451]],[[0,0,368,513,2454]],[[0,0,368,513,2457]],[[0,0,368,513,2460]],[[0,0,368,513,2463]],[[0,0,368,513,2466]],[[0,0,368,513,2469]],[[0,0,368,513,2472]],[[0,0,368,513,2475]],[[0,0,368,513,2478]],[[0,0,368,513,2481]],[[0,0,368,513,2484]],[[0,0,368,513,2487]],[[0,0,368,513,2490]],[[0,0,368,513,2493]],[[0,0,368,513,2496]],[[0,0,368,513,2499]],[[0,0,368,513,2502]],[[0,0,368,513,2505]],[[0,0,368,513,2508]],[[0,0,368,513,2511]],[[0,0,368,513,2514]],[[0,0,368,513,2517]],[[0,0,368,513,2520]],[[0,0,368,513,2523]],[[0,0,368,513,2526]],[[0,0,368,513,2529]],[[0,0,368,513,2532]],[[0,0,368,513,2535]],[[0,0,368,513,2538]],[[0,0,368,513,2541]],[[0,0,368,513,2544]],[[0,0,368,513,2547]],[[0,0,368,513,2550]],[[0,0,368,513,2553]],[[0,0,368,513,2556]],[[0,0,368,513,2559]],[[0,0,368,513,2562]],[[0,0,368,513,2565]],[[0,0,368,513,2568]],[[0,0,368,513,2571]],[[0,0,368,513,2574]],[[0,0,368,513,2577]],[[0,0,368,513,2580]],[[0,0,368,513,2583]],[[0,0,368,513,2586]],[[0,0,368,513,2589]],[[0,0,368,513,2592]],[[0,0,368,513,2595]],[[0,0,368,513,2598]],[[0,0,368,513,2601]],[[0,0,368,513,2604]],[[0,0,368,513,2607]],[[0,0,368,513,2610]],[[0,0,368,513,2613]],[[0,0,368,513,2616]],[[0,0,368,513,2619]],[[0,0,368,513,2622]],[[0,0,368,513,2625]],[[0,0,368,513,2628]],[[0,0,368,513,2631]],[[0,0,368,513,2634]],[[0,0,368,513,2637]],[[0,0,368,513,2640]],[[0,0,368,513,2643]],[[0,0,368,513,2646]],[[0,0,368,513,2649]],[[0,0,368,513,2652]],[[0,0,368,513,2655]],[[0,0,368,513,2658]],[[0,0,368,513,2661]],[[0,0,368,513,2664]],[[0,0,368,513,2667]],[[0,0,368,513,2670]],[[0,0,368,513,2673]],[[0,0,368,513,2676]],[[0,0,368,513,2679]],[[0,0,368,513,2682]],[[0,0,368,513,2685]],[[0,0,368,513,2688]],[[0,0,368,513,2691]],[[0,0,368,513,2694]],[[0,0,368,513,2697]],[[0,0,368,513,2700]],[[0,0,368,513,2703]],[[0,0,368,513,2706]],[[0,0,368,513,2709]],[[0,0,368,513,2712]],[[0,0,368,513,2715]],[[0,0,368,513,2718]],[[0,0,368,513,2721]],[[0,0,368,513,2724]],[[0,0,368,513,2727]],[[0,0,368,513,2730]],[[0,0,368,513,2733]],[[0,0,368,513,2736]],[[0,0,368,513,2739]],[[0,0,368,513,2742]],[[0,0,368,513,2745]],[[0,0,368,513,2748]],[[0,0,368,513,2751]],[[0,0,368,513,2754]],[[0,0,368,513,2757]],[[0,0,368,513,2760]],[[0,0,368,513,2763]],[[0,0,368,513,2766]],[[0,0,368,513,2769]],[[0,0,368,513,2772]],[[0,0,368,513,2775]],[[0,0,368,513,2778]],[[0,0,368,513,2781]],[[0,0,368,513,2784]],[[0,0,368,513,2787]],[[0,0,368,513,2790]],[[0,0,368,513,2793]],[[0,0,368,513,2796]]],"text_len_per_page":[53,53,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54,54],"metadata":{"format":"PDF 1.6","title":"","author":"","subject":"","keywords":"","creator":"Adobe Acrobat 7.0","producer":"Adobe Acrobat 7.0 Image Conversion Plug-in","creationDate":"D:20080404141457+01'00'","modDate":"D:20080404144821+01'00'","trapped":"","encryption":null}}
-    # o = json.loads(json.dumps(o))
-    # total_page = o["total_page"]
-    # page_width = o["page_width_pts"]
-    # page_height = o["page_height_pts"]
-    # img_sz_list = o["image_info_per_page"]
-    # text_len_list = o['text_len_per_page']
-    # pdf_path = o['pdf_path']
-    # is_encrypted = o['is_encrypted']
-    # is_needs_password = o['is_needs_password']
-    # if is_encrypted or total_page == 0 or is_needs_password:  # 加密的，需要密码的，没有页面的，都不处理
-    #     print("加密的")
-    #     exit(0)
-    # tag = classify(pdf_path, total_page, page_width, page_height, img_sz_list, text_len_list)
-    # o['is_text_pdf'] = tag
-    # print(json.dumps(o, ensure_ascii=False))
diff --git a/magic_pdf/filter/pdf_meta_scan.py b/magic_pdf/filter/pdf_meta_scan.py
deleted file mode 100644
index 67e56315057299b4888bc4058f057d857c0b3dc8..0000000000000000000000000000000000000000
--- a/magic_pdf/filter/pdf_meta_scan.py
+++ /dev/null
@@ -1,397 +0,0 @@
-"""输入： s3路径，每行一个 输出： pdf文件元信息，包括每一页上的所有图片的长宽高，bbox位置."""
-
-from collections import Counter
-
-import fitz
-from loguru import logger
-
-from magic_pdf.config.drop_reason import DropReason
-from magic_pdf.libs.commons import get_top_percent_list, mymax
-from magic_pdf.libs.language import detect_lang
-from magic_pdf.libs.pdf_check import detect_invalid_chars_by_pymupdf, detect_invalid_chars
-
-scan_max_page = 50
-junk_limit_min = 10
-
-
-def calculate_max_image_area_per_page(result: list, page_width_pts, page_height_pts):
-    max_image_area_per_page = [
-        mymax([(x1 - x0) * (y1 - y0) for x0, y0, x1, y1, _ in page_img_sz])
-        for page_img_sz in result
-    ]
-    page_area = int(page_width_pts) * int(page_height_pts)
-    max_image_area_per_page = [area / page_area for area in max_image_area_per_page]
-    max_image_area_per_page = [area for area in max_image_area_per_page if area > 0.6]
-    return max_image_area_per_page
-
-
-def process_image(page, junk_img_bojids=[]):
-    page_result = []  # 存每个页面里的多张图四元组信息
-    items = page.get_images()
-    dedup = set()
-    for img in items:
-        #  这里返回的是图片在page上的实际展示的大小。返回一个数组，每个元素第一部分是
-        img_bojid = img[
-            0
-        ]  # 在pdf文件中是全局唯一的，如果这个图反复出现在pdf里那么就可能是垃圾信息，例如水印、页眉页脚等
-        if img_bojid in junk_img_bojids:  # 如果是垃圾图像，就跳过
-            continue
-        recs = page.get_image_rects(img, transform=True)
-        if recs:
-            rec = recs[0][0]
-            x0, y0, x1, y1 = map(int, rec)
-            width = x1 - x0
-            height = y1 - y0
-            if (
-                x0,
-                y0,
-                x1,
-                y1,
-                img_bojid,
-            ) in dedup:  # 这里面会出现一些重复的bbox，无需重复出现，需要去掉
-                continue
-            if not all(
-                [width, height]
-            ):  # 长和宽任何一个都不能是0，否则这个图片不可见，没有实际意义
-                continue
-            dedup.add((x0, y0, x1, y1, img_bojid))
-            page_result.append([x0, y0, x1, y1, img_bojid])
-    return page_result
-
-
-def get_image_info(doc: fitz.Document, page_width_pts, page_height_pts) -> list:
-    """返回每个页面里的图片的四元组，每个页面多个图片。
-
-    :param doc:
-    :return:
-    """
-    #  使用 Counter 计数 img_bojid 的出现次数
-    img_bojid_counter = Counter(img[0] for page in doc for img in page.get_images())
-    #  找出出现次数超过 len(doc) 半数的 img_bojid
-
-    junk_limit = max(len(doc) * 0.5, junk_limit_min)  # 对一些页数比较少的进行豁免
-
-    junk_img_bojids = [
-        img_bojid
-        for img_bojid, count in img_bojid_counter.items()
-        if count >= junk_limit
-    ]
-
-    #  todo 加个判断，用前十页就行，这些垃圾图片需要满足两个条件，不止出现的次数要足够多，而且图片占书页面积的比例要足够大，且图与图大小都差不多
-    #  有两种扫描版，一种文字版，这里可能会有误判
-    #  扫描版1：每页都有所有扫描页图片，特点是图占比大，每页展示1张
-    #  扫描版2，每页存储的扫描页图片数量递增，特点是图占比大，每页展示1张，需要清空junklist跑前50页图片信息用于分类判断
-    # 文  字版1.每页存储所有图片，特点是图片占页面比例不大，每页展示可能为0也可能不止1张 这种pdf需要拿前10页抽样检测img大小和个数，如果符合需要清空junklist
-    imgs_len_list = [len(page.get_images()) for page in doc]
-
-    special_limit_pages = 10
-
-    #  统一用前十页结果做判断
-    result = []
-    break_loop = False
-    for i, page in enumerate(doc):
-        if break_loop:
-            break
-        if i >= special_limit_pages:
-            break
-        page_result = process_image(
-            page
-        )  # 这里不传junk_img_bojids，拿前十页所有图片信息用于后续分析
-        result.append(page_result)
-        for item in result:
-            if not any(
-                item
-            ):  # 如果任何一页没有图片，说明是个文字版，需要判断是否为特殊文字版
-                if (
-                    max(imgs_len_list) == min(imgs_len_list)
-                    and max(imgs_len_list) >= junk_limit_min
-                ):  # 如果是特殊文字版，就把junklist置空并break
-                    junk_img_bojids = []
-                else:  # 不是特殊文字版，是个普通文字版，但是存在垃圾图片，不置空junklist
-                    pass
-                break_loop = True
-                break
-    if not break_loop:
-        # 获取前80%的元素
-        top_eighty_percent = get_top_percent_list(imgs_len_list, 0.8)
-        # 检查前80%的元素是否都相等
-        if len(set(top_eighty_percent)) == 1 and max(imgs_len_list) >= junk_limit_min:
-            # # 如果前10页跑完都有图，根据每页图片数量是否相等判断是否需要清除junklist
-            # if max(imgs_len_list) == min(imgs_len_list) and max(imgs_len_list) >= junk_limit_min:
-
-            # 前10页都有图，且每页数量一致，需要检测图片大小占页面的比例判断是否需要清除junklist
-            max_image_area_per_page = calculate_max_image_area_per_page(
-                result, page_width_pts, page_height_pts
-            )
-            if (
-                len(max_image_area_per_page) < 0.8 * special_limit_pages
-            ):  # 前10页不全是大图，说明可能是个文字版pdf，把垃圾图片list置空
-                junk_img_bojids = []
-            else:  # 前10页都有图，而且80%都是大图，且每页图片数量一致并都很多，说明是扫描版1，不需要清空junklist
-                pass
-        else:  # 每页图片数量不一致，需要清掉junklist全量跑前50页图片
-            junk_img_bojids = []
-
-    # 正式进入取前50页图片的信息流程
-    result = []
-    for i, page in enumerate(doc):
-        if i >= scan_max_page:
-            break
-        page_result = process_image(page, junk_img_bojids)
-        # logger.info(f"page {i} img_len: {len(page_result)}")
-        result.append(page_result)
-
-    return result, junk_img_bojids
-
-
-def get_pdf_page_size_pts(doc: fitz.Document):
-    page_cnt = len(doc)
-    l: int = min(page_cnt, 50)
-    # 把所有宽度和高度塞到两个list 分别取中位数（中间遇到了个在纵页里塞横页的pdf，导致宽高互换了）
-    page_width_list = []
-    page_height_list = []
-    for i in range(l):
-        page = doc[i]
-        page_rect = page.rect
-        page_width_list.append(page_rect.width)
-        page_height_list.append(page_rect.height)
-
-    page_width_list.sort()
-    page_height_list.sort()
-
-    median_width = page_width_list[len(page_width_list) // 2]
-    median_height = page_height_list[len(page_height_list) // 2]
-
-    return median_width, median_height
-
-
-def get_pdf_textlen_per_page(doc: fitz.Document):
-    text_len_lst = []
-    for page in doc:
-        # 拿包含img和text的所有blocks
-        # text_block = page.get_text("blocks")
-        # 拿所有text的blocks
-        # text_block = page.get_text("words")
-        # text_block_len = sum([len(t[4]) for t in text_block])
-        # 拿所有text的str
-        text_block = page.get_text('text')
-        text_block_len = len(text_block)
-        # logger.info(f"page {page.number} text_block_len: {text_block_len}")
-        text_len_lst.append(text_block_len)
-
-    return text_len_lst
-
-
-def get_pdf_text_layout_per_page(doc: fitz.Document):
-    """根据PDF文档的每一页文本布局，判断该页的文本布局是横向、纵向还是未知。
-
-    Args:
-        doc (fitz.Document): PDF文档对象。
-
-    Returns:
-        List[str]: 每一页的文本布局（横向、纵向、未知）。
-    """
-    text_layout_list = []
-
-    for page_id, page in enumerate(doc):
-        if page_id >= scan_max_page:
-            break
-        # 创建每一页的纵向和横向的文本行数计数器
-        vertical_count = 0
-        horizontal_count = 0
-        text_dict = page.get_text('dict')
-        if 'blocks' in text_dict:
-            for block in text_dict['blocks']:
-                if 'lines' in block:
-                    for line in block['lines']:
-                        # 获取line的bbox顶点坐标
-                        x0, y0, x1, y1 = line['bbox']
-                        # 计算bbox的宽高
-                        width = x1 - x0
-                        height = y1 - y0
-                        # 计算bbox的面积
-                        area = width * height
-                        font_sizes = []
-                        for span in line['spans']:
-                            if 'size' in span:
-                                font_sizes.append(span['size'])
-                        if len(font_sizes) > 0:
-                            average_font_size = sum(font_sizes) / len(font_sizes)
-                        else:
-                            average_font_size = (
-                                10  # 有的line拿不到font_size，先定一个阈值100
-                            )
-                        if (
-                            area <= average_font_size**2
-                        ):  # 判断bbox的面积是否小于平均字体大小的平方,单字无法计算是横向还是纵向
-                            continue
-                        else:
-                            if 'wmode' in line:  # 通过wmode判断文本方向
-                                if line['wmode'] == 1:  # 判断是否为竖向文本
-                                    vertical_count += 1
-                                elif line['wmode'] == 0:  # 判断是否为横向文本
-                                    horizontal_count += 1
-                        #     if 'dir' in line:  # 通过旋转角度计算判断文本方向
-                        #         # 获取行的 "dir" 值
-                        #         dir_value = line['dir']
-                        #         cosine, sine = dir_value
-                        #         # 计算角度
-                        #         angle = math.degrees(math.acos(cosine))
-                        #
-                        #         # 判断是否为横向文本
-                        #         if abs(angle - 0) < 0.01 or abs(angle - 180) < 0.01:
-                        #             # line_text = ' '.join(span['text'] for span in line['spans'])
-                        #             # print('This line is horizontal:', line_text)
-                        #             horizontal_count += 1
-                        #         # 判断是否为纵向文本
-                        #         elif abs(angle - 90) < 0.01 or abs(angle - 270) < 0.01:
-                        #             # line_text = ' '.join(span['text'] for span in line['spans'])
-                        #             # print('This line is vertical:', line_text)
-                        #             vertical_count += 1
-        # print(f"page_id: {page_id}, vertical_count: {vertical_count}, horizontal_count: {horizontal_count}")
-        # 判断每一页的文本布局
-        if vertical_count == 0 and horizontal_count == 0:  # 该页没有文本，无法判断
-            text_layout_list.append('unknow')
-            continue
-        else:
-            if vertical_count > horizontal_count:  # 该页的文本纵向行数大于横向的
-                text_layout_list.append('vertical')
-            else:  # 该页的文本横向行数大于纵向的
-                text_layout_list.append('horizontal')
-        # logger.info(f"page_id: {page_id}, vertical_count: {vertical_count}, horizontal_count: {horizontal_count}")
-    return text_layout_list
-
-
-"""定义一个自定义异常用来抛出单页svg太多的pdf"""
-
-
-class PageSvgsTooManyError(Exception):
-    def __init__(self, message='Page SVGs are too many'):
-        self.message = message
-        super().__init__(self.message)
-
-
-def get_svgs_per_page(doc: fitz.Document):
-    svgs_len_list = []
-    for page_id, page in enumerate(doc):
-        # svgs = page.get_drawings()
-        svgs = page.get_cdrawings()  # 切换成get_cdrawings，效率更高
-        len_svgs = len(svgs)
-        if len_svgs >= 3000:
-            raise PageSvgsTooManyError()
-        else:
-            svgs_len_list.append(len_svgs)
-        # logger.info(f"page_id: {page_id}, svgs_len: {len(svgs)}")
-    return svgs_len_list
-
-
-def get_imgs_per_page(doc: fitz.Document):
-    imgs_len_list = []
-    for page_id, page in enumerate(doc):
-        imgs = page.get_images()
-        imgs_len_list.append(len(imgs))
-        # logger.info(f"page_id: {page}, imgs_len: {len(imgs)}")
-
-    return imgs_len_list
-
-
-def get_language(doc: fitz.Document):
-    """
-    获取PDF文档的语言。
-    Args:
-        doc (fitz.Document): PDF文档对象。
-    Returns:
-        str: 文档语言，如 "en-US"。
-    """
-    language_lst = []
-    for page_id, page in enumerate(doc):
-        if page_id >= scan_max_page:
-            break
-        # 拿所有text的str
-        text_block = page.get_text('text')
-        page_language = detect_lang(text_block)
-        language_lst.append(page_language)
-
-        # logger.info(f"page_id: {page_id}, page_language: {page_language}")
-
-    # 统计text_language_list中每种语言的个数
-    count_dict = Counter(language_lst)
-    # 输出text_language_list中出现的次数最多的语言
-    language = max(count_dict, key=count_dict.get)
-    return language
-
-
-def check_invalid_chars(pdf_bytes):
-    """乱码检测."""
-    # return detect_invalid_chars_by_pymupdf(pdf_bytes)
-    return detect_invalid_chars(pdf_bytes)
-
-
-def pdf_meta_scan(pdf_bytes: bytes):
-    """
-    :param s3_pdf_path:
-    :param pdf_bytes: pdf文件的二进制数据
-    几个维度来评价：是否加密，是否需要密码，纸张大小，总页数，是否文字可提取
-    """
-    doc = fitz.open('pdf', pdf_bytes)
-    is_needs_password = doc.needs_pass
-    is_encrypted = doc.is_encrypted
-    total_page = len(doc)
-    if total_page == 0:
-        logger.warning(f'drop this pdf, drop_reason: {DropReason.EMPTY_PDF}')
-        result = {'_need_drop': True, '_drop_reason': DropReason.EMPTY_PDF}
-        return result
-    else:
-        page_width_pts, page_height_pts = get_pdf_page_size_pts(doc)
-        # logger.info(f"page_width_pts: {page_width_pts}, page_height_pts: {page_height_pts}")
-
-        # svgs_per_page = get_svgs_per_page(doc)
-        # logger.info(f"svgs_per_page: {svgs_per_page}")
-        imgs_per_page = get_imgs_per_page(doc)
-        # logger.info(f"imgs_per_page: {imgs_per_page}")
-
-        image_info_per_page, junk_img_bojids = get_image_info(
-            doc, page_width_pts, page_height_pts
-        )
-        # logger.info(f"image_info_per_page: {image_info_per_page}, junk_img_bojids: {junk_img_bojids}")
-        text_len_per_page = get_pdf_textlen_per_page(doc)
-        # logger.info(f"text_len_per_page: {text_len_per_page}")
-        # text_layout_per_page = get_pdf_text_layout_per_page(doc)
-        # logger.info(f"text_layout_per_page: {text_layout_per_page}")
-        # text_language = get_language(doc)
-        # logger.info(f"text_language: {text_language}")
-        invalid_chars = check_invalid_chars(pdf_bytes)
-        # logger.info(f"invalid_chars: {invalid_chars}")
-
-        # 最后输出一条json
-        res = {
-            'is_needs_password': is_needs_password,
-            'is_encrypted': is_encrypted,
-            'total_page': total_page,
-            'page_width_pts': int(page_width_pts),
-            'page_height_pts': int(page_height_pts),
-            'image_info_per_page': image_info_per_page,
-            'text_len_per_page': text_len_per_page,
-            # 'text_layout_per_page': text_layout_per_page,
-            # 'text_language': text_language,
-            # "svgs_per_page": svgs_per_page,
-            'imgs_per_page': imgs_per_page,  # 增加每页img数量list
-            'junk_img_bojids': junk_img_bojids,  # 增加垃圾图片的bojid list
-            'invalid_chars': invalid_chars,
-            'metadata': doc.metadata,
-        }
-        # logger.info(json.dumps(res, ensure_ascii=False))
-        return res
-
-
-if __name__ == '__main__':
-    pass
-    # "D:\project/20231108code-clean\pdf_cost_time\竖排例子\净空法师-大乘无量寿.pdf"
-    # "D:\project/20231108code-clean\pdf_cost_time\竖排例子\三国演义_繁体竖排版.pdf"
-    # "D:\project/20231108code-clean\pdf_cost_time\scihub\scihub_86800000\libgen.scimag86880000-86880999.zip_10.1021/acsami.1c03109.s002.pdf"
-    # "D:/project/20231108code-clean/pdf_cost_time/scihub/scihub_18600000/libgen.scimag18645000-18645999.zip_10.1021/om3006239.pdf"
-    # file_content = read_file("D:/project/20231108code-clean/pdf_cost_time/scihub/scihub_31000000/libgen.scimag31098000-31098999.zip_10.1109/isit.2006.261791.pdf","")  # noqa: E501
-    # file_content = read_file("D:\project/20231108code-clean\pdf_cost_time\竖排例子\净空法师_大乘无量寿.pdf","")
-    # doc = fitz.open("pdf", file_content)
-    # text_layout_lst = get_pdf_text_layout_per_page(doc)
-    # print(text_layout_lst)
diff --git a/magic_pdf/integrations/__init__.py b/magic_pdf/integrations/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/integrations/rag/__init__.py b/magic_pdf/integrations/rag/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/integrations/rag/api.py b/magic_pdf/integrations/rag/api.py
deleted file mode 100644
index 5c05f91169dad911b147a4f9c518af26a419b449..0000000000000000000000000000000000000000
--- a/magic_pdf/integrations/rag/api.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import os
-from pathlib import Path
-
-from loguru import logger
-
-from magic_pdf.integrations.rag.type import (ElementRelation, LayoutElements,
-                                             Node)
-from magic_pdf.integrations.rag.utils import inference
-
-
-class RagPageReader:
-
-    def __init__(self, pagedata: LayoutElements):
-        self.o = [
-            Node(
-                category_type=v.category_type,
-                text=v.text,
-                image_path=v.image_path,
-                anno_id=v.anno_id,
-                latex=v.latex,
-                html=v.html,
-            ) for v in pagedata.layout_dets
-        ]
-
-        self.pagedata = pagedata
-
-    def __iter__(self):
-        return iter(self.o)
-
-    def get_rel_map(self) -> list[ElementRelation]:
-        return self.pagedata.extra.element_relation
-
-
-class RagDocumentReader:
-
-    def __init__(self, ragdata: list[LayoutElements]):
-        self.o = [RagPageReader(v) for v in ragdata]
-
-    def __iter__(self):
-        return iter(self.o)
-
-
-class DataReader:
-
-    def __init__(self, path_or_directory: str, method: str, output_dir: str):
-        self.path_or_directory = path_or_directory
-        self.method = method
-        self.output_dir = output_dir
-        self.pdfs = []
-        if os.path.isdir(path_or_directory):
-            for doc_path in Path(path_or_directory).glob('*.pdf'):
-                self.pdfs.append(doc_path)
-        else:
-            assert path_or_directory.endswith('.pdf')
-            self.pdfs.append(Path(path_or_directory))
-
-    def get_documents_count(self) -> int:
-        """Returns the number of documents in the directory."""
-        return len(self.pdfs)
-
-    def get_document_result(self, idx: int) -> RagDocumentReader | None:
-        """
-        Args:
-            idx (int): the index of documents under the
-                directory path_or_directory
-
-        Returns:
-            RagDocumentReader | None: RagDocumentReader is an iterable object,
-            more details @RagDocumentReader
-        """
-        if idx >= self.get_documents_count() or idx < 0:
-            logger.error(f'invalid idx: {idx}')
-            return None
-        res = inference(str(self.pdfs[idx]), self.output_dir, self.method)
-        if res is None:
-            logger.warning(f'failed to inference pdf {self.pdfs[idx]}')
-            return None
-        return RagDocumentReader(res)
-
-    def get_document_filename(self, idx: int) -> Path:
-        """get the filename of the document."""
-        return self.pdfs[idx]
diff --git a/magic_pdf/integrations/rag/type.py b/magic_pdf/integrations/rag/type.py
deleted file mode 100644
index 11258af39487f3084a900d44c5bc4eb364ef2230..0000000000000000000000000000000000000000
--- a/magic_pdf/integrations/rag/type.py
+++ /dev/null
@@ -1,82 +0,0 @@
-from enum import Enum
-
-from pydantic import BaseModel, Field
-
-
-# rag
-class CategoryType(Enum):  # py310 not support StrEnum
-    text = 'text'
-    title = 'title'
-    interline_equation = 'interline_equation'
-    image = 'image'
-    image_body = 'image_body'
-    image_caption = 'image_caption'
-    table = 'table'
-    table_body = 'table_body'
-    table_caption = 'table_caption'
-    table_footnote = 'table_footnote'
-
-
-class ElementRelType(Enum):
-    sibling = 'sibling'
-
-
-class PageInfo(BaseModel):
-    page_no: int = Field(description='the index of page, start from zero',
-                         ge=0)
-    height: int = Field(description='the height of page', gt=0)
-    width: int = Field(description='the width of page', ge=0)
-    image_path: str | None = Field(description='the image of this page',
-                                   default=None)
-
-
-class ContentObject(BaseModel):
-    category_type: CategoryType = Field(description='类别')
-    poly: list[float] = Field(
-        description=('Coordinates, need to convert back to PDF coordinates,'
-                     ' order is top-left, top-right, bottom-right, bottom-left'
-                     ' x,y coordinates'))
-    ignore: bool = Field(description='whether ignore this object',
-                         default=False)
-    text: str | None = Field(description='text content of the object',
-                             default=None)
-    image_path: str | None = Field(description='path of embedded image',
-                                   default=None)
-    order: int = Field(description='the order of this object within a page',
-                       default=-1)
-    anno_id: int = Field(description='unique id', default=-1)
-    latex: str | None = Field(description='latex result', default=None)
-    html: str | None = Field(description='html result', default=None)
-
-
-class ElementRelation(BaseModel):
-    source_anno_id: int = Field(description='unique id of the source object',
-                                default=-1)
-    target_anno_id: int = Field(description='unique id of the target object',
-                                default=-1)
-    relation: ElementRelType = Field(
-        description='the relation between source and target element')
-
-
-class LayoutElementsExtra(BaseModel):
-    element_relation: list[ElementRelation] = Field(
-        description='the relation between source and target element')
-
-
-class LayoutElements(BaseModel):
-    layout_dets: list[ContentObject] = Field(
-        description='layout element details')
-    page_info: PageInfo = Field(description='page info')
-    extra: LayoutElementsExtra = Field(description='extra information')
-
-
-# iter data format
-class Node(BaseModel):
-    category_type: CategoryType = Field(description='类别')
-    text: str | None = Field(description='text content of the object',
-                             default=None)
-    image_path: str | None = Field(description='path of embedded image',
-                                   default=None)
-    anno_id: int = Field(description='unique id', default=-1)
-    latex: str | None = Field(description='latex result', default=None)
-    html: str | None = Field(description='html result', default=None)
diff --git a/magic_pdf/integrations/rag/utils.py b/magic_pdf/integrations/rag/utils.py
deleted file mode 100644
index 49e9dc0ee2a6955219012dd05aa58d6e56b1f25d..0000000000000000000000000000000000000000
--- a/magic_pdf/integrations/rag/utils.py
+++ /dev/null
@@ -1,284 +0,0 @@
-import json
-import os
-from pathlib import Path
-
-from loguru import logger
-
-import magic_pdf.model as model_config
-from magic_pdf.config.ocr_content_type import BlockType, ContentType
-from magic_pdf.data.data_reader_writer import FileBasedDataReader
-from magic_pdf.dict2md.ocr_mkcontent import merge_para_with_text
-from magic_pdf.integrations.rag.type import (CategoryType, ContentObject,
-                                             ElementRelation, ElementRelType,
-                                             LayoutElements,
-                                             LayoutElementsExtra, PageInfo)
-from magic_pdf.tools.common import do_parse, prepare_env
-
-
-def convert_middle_json_to_layout_elements(
-    json_data: dict,
-    output_dir: str,
-) -> list[LayoutElements]:
-    uniq_anno_id = 0
-
-    res: list[LayoutElements] = []
-    for page_no, page_data in enumerate(json_data['pdf_info']):
-        order_id = 0
-        page_info = PageInfo(
-            height=int(page_data['page_size'][1]),
-            width=int(page_data['page_size'][0]),
-            page_no=page_no,
-        )
-        layout_dets: list[ContentObject] = []
-        extra_element_relation: list[ElementRelation] = []
-
-        for para_block in page_data['para_blocks']:
-            para_text = ''
-            para_type = para_block['type']
-
-            if para_type == BlockType.Text:
-                para_text = merge_para_with_text(para_block)
-                x0, y0, x1, y1 = para_block['bbox']
-                content = ContentObject(
-                    anno_id=uniq_anno_id,
-                    category_type=CategoryType.text,
-                    text=para_text,
-                    order=order_id,
-                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                )
-                uniq_anno_id += 1
-                order_id += 1
-                layout_dets.append(content)
-
-            elif para_type == BlockType.Title:
-                para_text = merge_para_with_text(para_block)
-                x0, y0, x1, y1 = para_block['bbox']
-                content = ContentObject(
-                    anno_id=uniq_anno_id,
-                    category_type=CategoryType.title,
-                    text=para_text,
-                    order=order_id,
-                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                )
-                uniq_anno_id += 1
-                order_id += 1
-                layout_dets.append(content)
-
-            elif para_type == BlockType.InterlineEquation:
-                para_text = merge_para_with_text(para_block)
-                x0, y0, x1, y1 = para_block['bbox']
-                content = ContentObject(
-                    anno_id=uniq_anno_id,
-                    category_type=CategoryType.interline_equation,
-                    text=para_text,
-                    order=order_id,
-                    poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                )
-                uniq_anno_id += 1
-                order_id += 1
-                layout_dets.append(content)
-
-            elif para_type == BlockType.Image:
-                body_anno_id = -1
-                caption_anno_id = -1
-
-                for block in para_block['blocks']:
-                    if block['type'] == BlockType.ImageBody:
-                        for line in block['lines']:
-                            for span in line['spans']:
-                                if span['type'] == ContentType.Image:
-                                    x0, y0, x1, y1 = block['bbox']
-                                    content = ContentObject(
-                                        anno_id=uniq_anno_id,
-                                        category_type=CategoryType.image_body,
-                                        image_path=os.path.join(
-                                            output_dir, span['image_path']),
-                                        order=order_id,
-                                        poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                                    )
-                                    body_anno_id = uniq_anno_id
-                                    uniq_anno_id += 1
-                                    order_id += 1
-                                    layout_dets.append(content)
-
-                for block in para_block['blocks']:
-                    if block['type'] == BlockType.ImageCaption:
-                        para_text += merge_para_with_text(block)
-                        x0, y0, x1, y1 = block['bbox']
-                        content = ContentObject(
-                            anno_id=uniq_anno_id,
-                            category_type=CategoryType.image_caption,
-                            text=para_text,
-                            order=order_id,
-                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                        )
-                        caption_anno_id = uniq_anno_id
-                        uniq_anno_id += 1
-                        order_id += 1
-                        layout_dets.append(content)
-
-                if body_anno_id > 0 and caption_anno_id > 0:
-                    element_relation = ElementRelation(
-                        relation=ElementRelType.sibling,
-                        source_anno_id=body_anno_id,
-                        target_anno_id=caption_anno_id,
-                    )
-                    extra_element_relation.append(element_relation)
-
-            elif para_type == BlockType.Table:
-                body_anno_id, caption_anno_id, footnote_anno_id = -1, -1, -1
-
-                for block in para_block['blocks']:
-                    if block['type'] == BlockType.TableCaption:
-                        para_text += merge_para_with_text(block)
-                        x0, y0, x1, y1 = block['bbox']
-                        content = ContentObject(
-                            anno_id=uniq_anno_id,
-                            category_type=CategoryType.table_caption,
-                            text=para_text,
-                            order=order_id,
-                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                        )
-                        caption_anno_id = uniq_anno_id
-                        uniq_anno_id += 1
-                        order_id += 1
-                        layout_dets.append(content)
-
-                for block in para_block['blocks']:
-                    if block['type'] == BlockType.TableBody:
-                        for line in block['lines']:
-                            for span in line['spans']:
-                                if span['type'] == ContentType.Table:
-                                    x0, y0, x1, y1 = para_block['bbox']
-                                    content = ContentObject(
-                                        anno_id=uniq_anno_id,
-                                        category_type=CategoryType.table_body,
-                                        order=order_id,
-                                        poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                                    )
-                                    body_anno_id = uniq_anno_id
-                                    uniq_anno_id += 1
-                                    order_id += 1
-                                    # if processed by table model
-                                    if span.get('latex', ''):
-                                        content.latex = span['latex']
-                                    else:
-                                        content.image_path = os.path.join(
-                                            output_dir, span['image_path'])
-                                    layout_dets.append(content)
-
-                for block in para_block['blocks']:
-                    if block['type'] == BlockType.TableFootnote:
-                        para_text += merge_para_with_text(block)
-                        x0, y0, x1, y1 = block['bbox']
-                        content = ContentObject(
-                            anno_id=uniq_anno_id,
-                            category_type=CategoryType.table_footnote,
-                            text=para_text,
-                            order=order_id,
-                            poly=[x0, y0, x1, y0, x1, y1, x0, y1],
-                        )
-                        footnote_anno_id = uniq_anno_id
-                        uniq_anno_id += 1
-                        order_id += 1
-                        layout_dets.append(content)
-
-                if caption_anno_id != -1 and body_anno_id != -1:
-                    element_relation = ElementRelation(
-                        relation=ElementRelType.sibling,
-                        source_anno_id=body_anno_id,
-                        target_anno_id=caption_anno_id,
-                    )
-                    extra_element_relation.append(element_relation)
-
-                if footnote_anno_id != -1 and body_anno_id != -1:
-                    element_relation = ElementRelation(
-                        relation=ElementRelType.sibling,
-                        source_anno_id=body_anno_id,
-                        target_anno_id=footnote_anno_id,
-                    )
-                    extra_element_relation.append(element_relation)
-
-        res.append(
-            LayoutElements(
-                page_info=page_info,
-                layout_dets=layout_dets,
-                extra=LayoutElementsExtra(
-                    element_relation=extra_element_relation),
-            ))
-
-    return res
-
-
-def inference(path, output_dir, method):
-    model_config.__use_inside_model__ = True
-    model_config.__model_mode__ = 'full'
-    if output_dir == '':
-        if os.path.isdir(path):
-            output_dir = os.path.join(path, 'output')
-        else:
-            output_dir = os.path.join(os.path.dirname(path), 'output')
-
-    local_image_dir, local_md_dir = prepare_env(output_dir,
-                                                str(Path(path).stem), method)
-
-    def read_fn(path):
-        disk_rw = FileBasedDataReader(os.path.dirname(path))
-        return disk_rw.read(os.path.basename(path))
-
-    def parse_doc(doc_path: str):
-        try:
-            file_name = str(Path(doc_path).stem)
-            pdf_data = read_fn(doc_path)
-            do_parse(
-                output_dir,
-                file_name,
-                pdf_data,
-                [],
-                method,
-                False,
-                f_draw_span_bbox=False,
-                f_draw_layout_bbox=False,
-                f_dump_md=False,
-                f_dump_middle_json=True,
-                f_dump_model_json=False,
-                f_dump_orig_pdf=False,
-                f_dump_content_list=False,
-                f_draw_model_bbox=False,
-            )
-
-            middle_json_fn = os.path.join(local_md_dir,
-                                          f'{file_name}_middle.json')
-            with open(middle_json_fn) as fd:
-                jso = json.load(fd)
-            os.remove(middle_json_fn)
-            return convert_middle_json_to_layout_elements(jso, local_image_dir)
-
-        except Exception as e:
-            logger.exception(e)
-
-    return parse_doc(path)
-
-
-if __name__ == '__main__':
-    import pprint
-
-    base_dir = '/opt/data/pdf/resources/samples/'
-    if 0:
-        with open(base_dir + 'json_outputs/middle.json') as f:
-            d = json.load(f)
-        result = convert_middle_json_to_layout_elements(d, '/tmp')
-        pprint.pp(result)
-    if 0:
-        with open(base_dir + 'json_outputs/middle.3.json') as f:
-            d = json.load(f)
-        result = convert_middle_json_to_layout_elements(d, '/tmp')
-        pprint.pp(result)
-
-    if 1:
-        res = inference(
-            base_dir + 'samples/pdf/one_page_with_table_image.pdf',
-            '/tmp/output',
-            'ocr',
-        )
-        pprint.pp(res)
diff --git a/magic_pdf/libs/__init__.py b/magic_pdf/libs/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/libs/boxbase.py b/magic_pdf/libs/boxbase.py
deleted file mode 100644
index 2813121bb3fcde988d510b89646478c97461da74..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/boxbase.py
+++ /dev/null
@@ -1,485 +0,0 @@
-import math
-
-
-def _is_in_or_part_overlap(box1, box2) -> bool:
-    """两个bbox是否有部分重叠或者包含."""
-    if box1 is None or box2 is None:
-        return False
-
-    x0_1, y0_1, x1_1, y1_1 = box1
-    x0_2, y0_2, x1_2, y1_2 = box2
-
-    return not (x1_1 < x0_2 or  # box1在box2的左边
-                x0_1 > x1_2 or  # box1在box2的右边
-                y1_1 < y0_2 or  # box1在box2的上边
-                y0_1 > y1_2)  # box1在box2的下边
-
-
-def _is_in_or_part_overlap_with_area_ratio(box1,
-                                           box2,
-                                           area_ratio_threshold=0.6):
-    """判断box1是否在box2里面，或者box1和box2有部分重叠，且重叠面积占box1的比例超过area_ratio_threshold."""
-    if box1 is None or box2 is None:
-        return False
-
-    x0_1, y0_1, x1_1, y1_1 = box1
-    x0_2, y0_2, x1_2, y1_2 = box2
-
-    if not _is_in_or_part_overlap(box1, box2):
-        return False
-
-    # 计算重叠面积
-    x_left = max(x0_1, x0_2)
-    y_top = max(y0_1, y0_2)
-    x_right = min(x1_1, x1_2)
-    y_bottom = min(y1_1, y1_2)
-    overlap_area = (x_right - x_left) * (y_bottom - y_top)
-
-    # 计算box1的面积
-    box1_area = (x1_1 - x0_1) * (y1_1 - y0_1)
-
-    return overlap_area / box1_area > area_ratio_threshold
-
-
-def _is_in(box1, box2) -> bool:
-    """box1是否完全在box2里面."""
-    x0_1, y0_1, x1_1, y1_1 = box1
-    x0_2, y0_2, x1_2, y1_2 = box2
-
-    return (x0_1 >= x0_2 and  # box1的左边界不在box2的左边外
-            y0_1 >= y0_2 and  # box1的上边界不在box2的上边外
-            x1_1 <= x1_2 and  # box1的右边界不在box2的右边外
-            y1_1 <= y1_2)  # box1的下边界不在box2的下边外
-
-
-def _is_part_overlap(box1, box2) -> bool:
-    """两个bbox是否有部分重叠，但不完全包含."""
-    if box1 is None or box2 is None:
-        return False
-
-    return _is_in_or_part_overlap(box1, box2) and not _is_in(box1, box2)
-
-
-def _left_intersect(left_box, right_box):
-    """检查两个box的左边界是否有交集，也就是left_box的右边界是否在right_box的左边界内."""
-    if left_box is None or right_box is None:
-        return False
-
-    x0_1, y0_1, x1_1, y1_1 = left_box
-    x0_2, y0_2, x1_2, y1_2 = right_box
-
-    return x1_1 > x0_2 and x0_1 < x0_2 and (y0_1 <= y0_2 <= y1_1
-                                            or y0_1 <= y1_2 <= y1_1)
-
-
-def _right_intersect(left_box, right_box):
-    """检查box是否在右侧边界有交集，也就是left_box的左边界是否在right_box的右边界内."""
-    if left_box is None or right_box is None:
-        return False
-
-    x0_1, y0_1, x1_1, y1_1 = left_box
-    x0_2, y0_2, x1_2, y1_2 = right_box
-
-    return x0_1 < x1_2 and x1_1 > x1_2 and (y0_1 <= y0_2 <= y1_1
-                                            or y0_1 <= y1_2 <= y1_1)
-
-
-def _is_vertical_full_overlap(box1, box2, x_torlence=2):
-    """x方向上：要么box1包含box2, 要么box2包含box1。不能部分包含 y方向上：box1和box2有重叠."""
-    # 解析box的坐标
-    x11, y11, x12, y12 = box1  # 左上角和右下角的坐标 (x1, y1, x2, y2)
-    x21, y21, x22, y22 = box2
-
-    # 在x轴方向上，box1是否包含box2 或 box2包含box1
-    contains_in_x = (x11 - x_torlence <= x21 and x12 + x_torlence >= x22) or (
-        x21 - x_torlence <= x11 and x22 + x_torlence >= x12)
-
-    # 在y轴方向上，box1和box2是否有重叠
-    overlap_in_y = not (y12 < y21 or y11 > y22)
-
-    return contains_in_x and overlap_in_y
-
-
-def _is_bottom_full_overlap(box1, box2, y_tolerance=2):
-    """检查box1下方和box2的上方有轻微的重叠，轻微程度收到y_tolerance的限制 这个函数和_is_vertical-
-    full_overlap的区别是，这个函数允许box1和box2在x方向上有轻微的重叠,允许一定的模糊度."""
-    if box1 is None or box2 is None:
-        return False
-
-    x0_1, y0_1, x1_1, y1_1 = box1
-    x0_2, y0_2, x1_2, y1_2 = box2
-    tolerance_margin = 2
-    is_xdir_full_overlap = (
-        (x0_1 - tolerance_margin <= x0_2 <= x1_1 + tolerance_margin
-         and x0_1 - tolerance_margin <= x1_2 <= x1_1 + tolerance_margin)
-        or (x0_2 - tolerance_margin <= x0_1 <= x1_2 + tolerance_margin
-            and x0_2 - tolerance_margin <= x1_1 <= x1_2 + tolerance_margin))
-
-    return y0_2 < y1_1 and 0 < (y1_1 -
-                                y0_2) < y_tolerance and is_xdir_full_overlap
-
-
-def _is_left_overlap(
-    box1,
-    box2,
-):
-    """检查box1的左侧是否和box2有重叠 在Y方向上可以是部分重叠或者是完全重叠。不分box1和box2的上下关系，也就是无论box1在box2下
-    方还是box2在box1下方，都可以检测到重叠。 X方向上."""
-
-    def __overlap_y(Ay1, Ay2, By1, By2):
-        return max(0, min(Ay2, By2) - max(Ay1, By1))
-
-    if box1 is None or box2 is None:
-        return False
-
-    x0_1, y0_1, x1_1, y1_1 = box1
-    x0_2, y0_2, x1_2, y1_2 = box2
-
-    y_overlap_len = __overlap_y(y0_1, y1_1, y0_2, y1_2)
-    ratio_1 = 1.0 * y_overlap_len / (y1_1 - y0_1) if y1_1 - y0_1 != 0 else 0
-    ratio_2 = 1.0 * y_overlap_len / (y1_2 - y0_2) if y1_2 - y0_2 != 0 else 0
-    vertical_overlap_cond = ratio_1 >= 0.5 or ratio_2 >= 0.5
-
-    # vertical_overlap_cond = y0_1<=y0_2<=y1_1 or y0_1<=y1_2<=y1_1 or y0_2<=y0_1<=y1_2 or y0_2<=y1_1<=y1_2
-    return x0_1 <= x0_2 <= x1_1 and vertical_overlap_cond
-
-
-def __is_overlaps_y_exceeds_threshold(bbox1,
-                                      bbox2,
-                                      overlap_ratio_threshold=0.8):
-    """检查两个bbox在y轴上是否有重叠，并且该重叠区域的高度占两个bbox高度更低的那个超过80%"""
-    _, y0_1, _, y1_1 = bbox1
-    _, y0_2, _, y1_2 = bbox2
-
-    overlap = max(0, min(y1_1, y1_2) - max(y0_1, y0_2))
-    height1, height2 = y1_1 - y0_1, y1_2 - y0_2
-    # max_height = max(height1, height2)
-    min_height = min(height1, height2)
-
-    return (overlap / min_height) > overlap_ratio_threshold
-
-
-def calculate_iou(bbox1, bbox2):
-    """计算两个边界框的交并比(IOU)。
-
-    Args:
-        bbox1 (list[float]): 第一个边界框的坐标，格式为 [x1, y1, x2, y2]，其中 (x1, y1) 为左上角坐标，(x2, y2) 为右下角坐标。
-        bbox2 (list[float]): 第二个边界框的坐标，格式与 `bbox1` 相同。
-
-    Returns:
-        float: 两个边界框的交并比(IOU)，取值范围为 [0, 1]。
-    """
-    # Determine the coordinates of the intersection rectangle
-    x_left = max(bbox1[0], bbox2[0])
-    y_top = max(bbox1[1], bbox2[1])
-    x_right = min(bbox1[2], bbox2[2])
-    y_bottom = min(bbox1[3], bbox2[3])
-
-    if x_right < x_left or y_bottom < y_top:
-        return 0.0
-
-    # The area of overlap area
-    intersection_area = (x_right - x_left) * (y_bottom - y_top)
-
-    # The area of both rectangles
-    bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-    bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
-
-    if any([bbox1_area == 0, bbox2_area == 0]):
-        return 0
-
-    # Compute the intersection over union by taking the intersection area
-    # and dividing it by the sum of both areas minus the intersection area
-    iou = intersection_area / float(bbox1_area + bbox2_area - intersection_area)
-
-    return iou
-
-
-def calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2):
-    """计算box1和box2的重叠面积占最小面积的box的比例."""
-    # Determine the coordinates of the intersection rectangle
-    x_left = max(bbox1[0], bbox2[0])
-    y_top = max(bbox1[1], bbox2[1])
-    x_right = min(bbox1[2], bbox2[2])
-    y_bottom = min(bbox1[3], bbox2[3])
-
-    if x_right < x_left or y_bottom < y_top:
-        return 0.0
-
-    # The area of overlap area
-    intersection_area = (x_right - x_left) * (y_bottom - y_top)
-    min_box_area = min([(bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]),
-                        (bbox2[3] - bbox2[1]) * (bbox2[2] - bbox2[0])])
-    if min_box_area == 0:
-        return 0
-    else:
-        return intersection_area / min_box_area
-
-
-def calculate_overlap_area_in_bbox1_area_ratio(bbox1, bbox2):
-    """计算box1和box2的重叠面积占bbox1的比例."""
-    # Determine the coordinates of the intersection rectangle
-    x_left = max(bbox1[0], bbox2[0])
-    y_top = max(bbox1[1], bbox2[1])
-    x_right = min(bbox1[2], bbox2[2])
-    y_bottom = min(bbox1[3], bbox2[3])
-
-    if x_right < x_left or y_bottom < y_top:
-        return 0.0
-
-    # The area of overlap area
-    intersection_area = (x_right - x_left) * (y_bottom - y_top)
-    bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
-    if bbox1_area == 0:
-        return 0
-    else:
-        return intersection_area / bbox1_area
-
-
-def get_minbox_if_overlap_by_ratio(bbox1, bbox2, ratio):
-    """通过calculate_overlap_area_2_minbox_area_ratio计算两个bbox重叠的面积占最小面积的box的比例
-    如果比例大于ratio，则返回小的那个bbox, 否则返回None."""
-    x1_min, y1_min, x1_max, y1_max = bbox1
-    x2_min, y2_min, x2_max, y2_max = bbox2
-    area1 = (x1_max - x1_min) * (y1_max - y1_min)
-    area2 = (x2_max - x2_min) * (y2_max - y2_min)
-    overlap_ratio = calculate_overlap_area_2_minbox_area_ratio(bbox1, bbox2)
-    if overlap_ratio > ratio:
-        if area1 <= area2:
-            return bbox1
-        else:
-            return bbox2
-    else:
-        return None
-
-
-def get_bbox_in_boundary(bboxes: list, boundary: tuple) -> list:
-    x0, y0, x1, y1 = boundary
-    new_boxes = [
-        box for box in bboxes
-        if box[0] >= x0 and box[1] >= y0 and box[2] <= x1 and box[3] <= y1
-    ]
-    return new_boxes
-
-
-def is_vbox_on_side(bbox, width, height, side_threshold=0.2):
-    """判断一个bbox是否在pdf页面的边缘."""
-    x0, x1 = bbox[0], bbox[2]
-    if x1 <= width * side_threshold or x0 >= width * (1 - side_threshold):
-        return True
-    return False
-
-
-def find_top_nearest_text_bbox(pymu_blocks, obj_bbox):
-    tolerance_margin = 4
-    top_boxes = [
-        box for box in pymu_blocks
-        if obj_bbox[1] - box['bbox'][3] >= -tolerance_margin
-        and not _is_in(box['bbox'], obj_bbox)
-    ]
-    # 然后找到X方向上有互相重叠的
-    top_boxes = [
-        box for box in top_boxes if any([
-            obj_bbox[0] - tolerance_margin <= box['bbox'][0] <= obj_bbox[2] +
-            tolerance_margin, obj_bbox[0] -
-            tolerance_margin <= box['bbox'][2] <= obj_bbox[2] +
-            tolerance_margin, box['bbox'][0] -
-            tolerance_margin <= obj_bbox[0] <= box['bbox'][2] +
-            tolerance_margin, box['bbox'][0] -
-            tolerance_margin <= obj_bbox[2] <= box['bbox'][2] +
-            tolerance_margin
-        ])
-    ]
-
-    # 然后找到y1最大的那个
-    if len(top_boxes) > 0:
-        top_boxes.sort(key=lambda x: x['bbox'][3], reverse=True)
-        return top_boxes[0]
-    else:
-        return None
-
-
-def find_bottom_nearest_text_bbox(pymu_blocks, obj_bbox):
-    bottom_boxes = [
-        box for box in pymu_blocks if box['bbox'][1] -
-        obj_bbox[3] >= -2 and not _is_in(box['bbox'], obj_bbox)
-    ]
-    # 然后找到X方向上有互相重叠的
-    bottom_boxes = [
-        box for box in bottom_boxes if any([
-            obj_bbox[0] - 2 <= box['bbox'][0] <= obj_bbox[2] + 2, obj_bbox[0] -
-            2 <= box['bbox'][2] <= obj_bbox[2] + 2, box['bbox'][0] -
-            2 <= obj_bbox[0] <= box['bbox'][2] + 2, box['bbox'][0] -
-            2 <= obj_bbox[2] <= box['bbox'][2] + 2
-        ])
-    ]
-
-    # 然后找到y0最小的那个
-    if len(bottom_boxes) > 0:
-        bottom_boxes.sort(key=lambda x: x['bbox'][1], reverse=False)
-        return bottom_boxes[0]
-    else:
-        return None
-
-
-def find_left_nearest_text_bbox(pymu_blocks, obj_bbox):
-    """寻找左侧最近的文本block."""
-    left_boxes = [
-        box for box in pymu_blocks if obj_bbox[0] -
-        box['bbox'][2] >= -2 and not _is_in(box['bbox'], obj_bbox)
-    ]
-    # 然后找到X方向上有互相重叠的
-    left_boxes = [
-        box for box in left_boxes if any([
-            obj_bbox[1] - 2 <= box['bbox'][1] <= obj_bbox[3] + 2, obj_bbox[1] -
-            2 <= box['bbox'][3] <= obj_bbox[3] + 2, box['bbox'][1] -
-            2 <= obj_bbox[1] <= box['bbox'][3] + 2, box['bbox'][1] -
-            2 <= obj_bbox[3] <= box['bbox'][3] + 2
-        ])
-    ]
-
-    # 然后找到x1最大的那个
-    if len(left_boxes) > 0:
-        left_boxes.sort(key=lambda x: x['bbox'][2], reverse=True)
-        return left_boxes[0]
-    else:
-        return None
-
-
-def find_right_nearest_text_bbox(pymu_blocks, obj_bbox):
-    """寻找右侧最近的文本block."""
-    right_boxes = [
-        box for box in pymu_blocks if box['bbox'][0] -
-        obj_bbox[2] >= -2 and not _is_in(box['bbox'], obj_bbox)
-    ]
-    # 然后找到X方向上有互相重叠的
-    right_boxes = [
-        box for box in right_boxes if any([
-            obj_bbox[1] - 2 <= box['bbox'][1] <= obj_bbox[3] + 2, obj_bbox[1] -
-            2 <= box['bbox'][3] <= obj_bbox[3] + 2, box['bbox'][1] -
-            2 <= obj_bbox[1] <= box['bbox'][3] + 2, box['bbox'][1] -
-            2 <= obj_bbox[3] <= box['bbox'][3] + 2
-        ])
-    ]
-
-    # 然后找到x0最小的那个
-    if len(right_boxes) > 0:
-        right_boxes.sort(key=lambda x: x['bbox'][0], reverse=False)
-        return right_boxes[0]
-    else:
-        return None
-
-
-def bbox_relative_pos(bbox1, bbox2):
-    """判断两个矩形框的相对位置关系.
-
-    Args:
-        bbox1: 一个四元组，表示第一个矩形框的左上角和右下角的坐标，格式为(x1, y1, x1b, y1b)
-        bbox2: 一个四元组，表示第二个矩形框的左上角和右下角的坐标，格式为(x2, y2, x2b, y2b)
-
-    Returns:
-        一个四元组，表示矩形框1相对于矩形框2的位置关系，格式为(left, right, bottom, top)
-        其中，left表示矩形框1是否在矩形框2的左侧，right表示矩形框1是否在矩形框2的右侧，
-        bottom表示矩形框1是否在矩形框2的下方，top表示矩形框1是否在矩形框2的上方
-    """
-    x1, y1, x1b, y1b = bbox1
-    x2, y2, x2b, y2b = bbox2
-
-    left = x2b < x1
-    right = x1b < x2
-    bottom = y2b < y1
-    top = y1b < y2
-    return left, right, bottom, top
-
-
-def bbox_distance(bbox1, bbox2):
-    """计算两个矩形框的距离。
-
-    Args:
-        bbox1 (tuple): 第一个矩形框的坐标，格式为 (x1, y1, x2, y2)，其中 (x1, y1) 为左上角坐标，(x2, y2) 为右下角坐标。
-        bbox2 (tuple): 第二个矩形框的坐标，格式为 (x1, y1, x2, y2)，其中 (x1, y1) 为左上角坐标，(x2, y2) 为右下角坐标。
-
-    Returns:
-        float: 矩形框之间的距离。
-    """
-
-    def dist(point1, point2):
-        return math.sqrt((point1[0] - point2[0])**2 +
-                         (point1[1] - point2[1])**2)
-
-    x1, y1, x1b, y1b = bbox1
-    x2, y2, x2b, y2b = bbox2
-
-    left, right, bottom, top = bbox_relative_pos(bbox1, bbox2)
-
-    if top and left:
-        return dist((x1, y1b), (x2b, y2))
-    elif left and bottom:
-        return dist((x1, y1), (x2b, y2b))
-    elif bottom and right:
-        return dist((x1b, y1), (x2, y2b))
-    elif right and top:
-        return dist((x1b, y1b), (x2, y2))
-    elif left:
-        return x1 - x2b
-    elif right:
-        return x2 - x1b
-    elif bottom:
-        return y1 - y2b
-    elif top:
-        return y2 - y1b
-    return 0.0
-
-
-def box_area(bbox):
-    return (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
-
-
-def get_overlap_area(bbox1, bbox2):
-    """计算box1和box2的重叠面积占bbox1的比例."""
-    # Determine the coordinates of the intersection rectangle
-    x_left = max(bbox1[0], bbox2[0])
-    y_top = max(bbox1[1], bbox2[1])
-    x_right = min(bbox1[2], bbox2[2])
-    y_bottom = min(bbox1[3], bbox2[3])
-
-    if x_right < x_left or y_bottom < y_top:
-        return 0.0
-
-    # The area of overlap area
-    return (x_right - x_left) * (y_bottom - y_top)
-
-
-def calculate_vertical_projection_overlap_ratio(block1, block2):
-    """
-    Calculate the proportion of the x-axis covered by the vertical projection of two blocks.
-
-    Args:
-        block1 (tuple): Coordinates of the first block (x0, y0, x1, y1).
-        block2 (tuple): Coordinates of the second block (x0, y0, x1, y1).
-
-    Returns:
-        float: The proportion of the x-axis covered by the vertical projection of the two blocks.
-    """
-    x0_1, _, x1_1, _ = block1
-    x0_2, _, x1_2, _ = block2
-
-    # Calculate the intersection of the x-coordinates
-    x_left = max(x0_1, x0_2)
-    x_right = min(x1_1, x1_2)
-
-    if x_right < x_left:
-        return 0.0
-
-    # Length of the intersection
-    intersection_length = x_right - x_left
-
-    # Length of the x-axis projection of the first block
-    block1_length = x1_1 - x0_1
-
-    if block1_length == 0:
-        return 0.0
-
-    # Proportion of the x-axis covered by the intersection
-    # logger.info(f"intersection_length: {intersection_length}, block1_length: {block1_length}")
-    return intersection_length / block1_length
diff --git a/magic_pdf/libs/clean_memory.py b/magic_pdf/libs/clean_memory.py
deleted file mode 100644
index 930b99eadb71463816d938936649d82905723bd0..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/clean_memory.py
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
-import torch
-import gc
-
-
-def clean_memory(device='cuda'):
-    if device == 'cuda':
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            torch.cuda.ipc_collect()
-    elif str(device).startswith("npu"):
-        import torch_npu
-        if torch_npu.npu.is_available():
-            torch_npu.npu.empty_cache()
-    elif str(device).startswith("mps"):
-        torch.mps.empty_cache()
-    gc.collect()
\ No newline at end of file
diff --git a/magic_pdf/libs/commons.py b/magic_pdf/libs/commons.py
deleted file mode 100644
index 20f29ffd309737cfd06f04fa0426eab1ceb4a4b9..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/commons.py
+++ /dev/null
@@ -1,43 +0,0 @@
-
-def join_path(*args):
-    return '/'.join(str(s).rstrip('/') for s in args)
-
-
-def get_top_percent_list(num_list, percent):
-    """
-    获取列表中前百分之多少的元素
-    :param num_list:
-    :param percent:
-    :return:
-    """
-    if len(num_list) == 0:
-        top_percent_list = []
-    else:
-        # 对imgs_len_list排序
-        sorted_imgs_len_list = sorted(num_list, reverse=True)
-        # 计算 percent 的索引
-        top_percent_index = int(len(sorted_imgs_len_list) * percent)
-        # 取前80%的元素
-        top_percent_list = sorted_imgs_len_list[:top_percent_index]
-    return top_percent_list
-
-
-def mymax(alist: list):
-    if len(alist) == 0:
-        return 0  # 空是0， 0*0也是0大小q
-    else:
-        return max(alist)
-
-
-def parse_bucket_key(s3_full_path: str):
-    """
-    输入 s3://bucket/path/to/my/file.txt
-    输出 bucket, path/to/my/file.txt
-    """
-    s3_full_path = s3_full_path.strip()
-    if s3_full_path.startswith("s3://"):
-        s3_full_path = s3_full_path[5:]
-    if s3_full_path.startswith("/"):
-        s3_full_path = s3_full_path[1:]
-    bucket, key = s3_full_path.split("/", 1)
-    return bucket, key
diff --git a/magic_pdf/libs/config_reader.py b/magic_pdf/libs/config_reader.py
deleted file mode 100644
index 2b7e949621a606e4f8a83865945c501056fbefb6..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/config_reader.py
+++ /dev/null
@@ -1,139 +0,0 @@
-"""根据bucket的名字返回对应的s3 AK， SK，endpoint三元组."""
-
-import json
-import os
-
-from loguru import logger
-
-from magic_pdf.config.constants import MODEL_NAME
-from magic_pdf.libs.commons import parse_bucket_key
-
-# 定义配置文件名常量
-CONFIG_FILE_NAME = os.getenv('MINERU_TOOLS_CONFIG_JSON', 'magic-pdf.json')
-
-
-def read_config():
-    if os.path.isabs(CONFIG_FILE_NAME):
-        config_file = CONFIG_FILE_NAME
-    else:
-        home_dir = os.path.expanduser('~')
-        config_file = os.path.join(home_dir, CONFIG_FILE_NAME)
-
-    if not os.path.exists(config_file):
-        raise FileNotFoundError(f'{config_file} not found')
-
-    with open(config_file, 'r', encoding='utf-8') as f:
-        config = json.load(f)
-    return config
-
-
-def get_s3_config(bucket_name: str):
-    """~/magic-pdf.json 读出来."""
-    config = read_config()
-
-    bucket_info = config.get('bucket_info')
-    if bucket_name not in bucket_info:
-        access_key, secret_key, storage_endpoint = bucket_info['[default]']
-    else:
-        access_key, secret_key, storage_endpoint = bucket_info[bucket_name]
-
-    if access_key is None or secret_key is None or storage_endpoint is None:
-        raise Exception(f'ak, sk or endpoint not found in {CONFIG_FILE_NAME}')
-
-    # logger.info(f"get_s3_config: ak={access_key}, sk={secret_key}, endpoint={storage_endpoint}")
-
-    return access_key, secret_key, storage_endpoint
-
-
-def get_s3_config_dict(path: str):
-    access_key, secret_key, storage_endpoint = get_s3_config(get_bucket_name(path))
-    return {'ak': access_key, 'sk': secret_key, 'endpoint': storage_endpoint}
-
-
-def get_bucket_name(path):
-    bucket, key = parse_bucket_key(path)
-    return bucket
-
-
-def get_local_models_dir():
-    config = read_config()
-    models_dir = config.get('models-dir')
-    if models_dir is None:
-        logger.warning(f"'models-dir' not found in {CONFIG_FILE_NAME}, use '/tmp/models' as default")
-        return '/tmp/models'
-    else:
-        return models_dir
-
-
-def get_local_layoutreader_model_dir():
-    config = read_config()
-    layoutreader_model_dir = config.get('layoutreader-model-dir')
-    if layoutreader_model_dir is None or not os.path.exists(layoutreader_model_dir):
-        home_dir = os.path.expanduser('~')
-        layoutreader_at_modelscope_dir_path = os.path.join(home_dir, '.cache/modelscope/hub/ppaanngggg/layoutreader')
-        logger.warning(f"'layoutreader-model-dir' not exists, use {layoutreader_at_modelscope_dir_path} as default")
-        return layoutreader_at_modelscope_dir_path
-    else:
-        return layoutreader_model_dir
-
-
-def get_device():
-    config = read_config()
-    device = config.get('device-mode')
-    if device is None:
-        logger.warning(f"'device-mode' not found in {CONFIG_FILE_NAME}, use 'cpu' as default")
-        return 'cpu'
-    else:
-        return device
-
-
-def get_table_recog_config():
-    config = read_config()
-    table_config = config.get('table-config')
-    if table_config is None:
-        logger.warning(f"'table-config' not found in {CONFIG_FILE_NAME}, use 'False' as default")
-        return json.loads(f'{{"model": "{MODEL_NAME.RAPID_TABLE}","enable": false, "max_time": 400}}')
-    else:
-        return table_config
-
-
-def get_layout_config():
-    config = read_config()
-    layout_config = config.get('layout-config')
-    if layout_config is None:
-        logger.warning(f"'layout-config' not found in {CONFIG_FILE_NAME}, use '{MODEL_NAME.LAYOUTLMv3}' as default")
-        return json.loads(f'{{"model": "{MODEL_NAME.LAYOUTLMv3}"}}')
-    else:
-        return layout_config
-
-
-def get_formula_config():
-    config = read_config()
-    formula_config = config.get('formula-config')
-    if formula_config is None:
-        logger.warning(f"'formula-config' not found in {CONFIG_FILE_NAME}, use 'True' as default")
-        return json.loads(f'{{"mfd_model": "{MODEL_NAME.YOLO_V8_MFD}","mfr_model": "{MODEL_NAME.UniMerNet_v2_Small}","enable": true}}')
-    else:
-        return formula_config
-
-def get_llm_aided_config():
-    config = read_config()
-    llm_aided_config = config.get('llm-aided-config')
-    if llm_aided_config is None:
-        logger.warning(f"'llm-aided-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
-        return None
-    else:
-        return llm_aided_config
-
-def get_latex_delimiter_config():
-    config = read_config()
-    latex_delimiter_config = config.get('latex-delimiter-config')
-    if latex_delimiter_config is None:
-        logger.warning(f"'latex-delimiter-config' not found in {CONFIG_FILE_NAME}, use 'None' as default")
-        return None
-    else:
-        return latex_delimiter_config
-
-
-if __name__ == '__main__':
-    ak, sk, endpoint = get_s3_config('llm-raw')
diff --git a/magic_pdf/libs/convert_utils.py b/magic_pdf/libs/convert_utils.py
deleted file mode 100644
index 99a1879d46befa2de63aa1a379ab83dbf6fdb1f1..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/convert_utils.py
+++ /dev/null
@@ -1,5 +0,0 @@
-def dict_to_list(input_dict):
-    items_list = []
-    for _, item in input_dict.items():
-        items_list.append(item)
-    return items_list
diff --git a/magic_pdf/libs/coordinate_transform.py b/magic_pdf/libs/coordinate_transform.py
deleted file mode 100644
index 7cd7a0768596174d71ea8b3c8309c0ec998b3c81..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/coordinate_transform.py
+++ /dev/null
@@ -1,9 +0,0 @@
-def get_scale_ratio(model_page_info, page):
-    pix = page.get_pixmap(dpi=72)
-    pymu_width = int(pix.w)
-    pymu_height = int(pix.h)
-    width_from_json = model_page_info['page_info']['width']
-    height_from_json = model_page_info['page_info']['height']
-    horizontal_scale_ratio = width_from_json / pymu_width
-    vertical_scale_ratio = height_from_json / pymu_height
-    return horizontal_scale_ratio, vertical_scale_ratio
diff --git a/magic_pdf/libs/draw_bbox.py b/magic_pdf/libs/draw_bbox.py
deleted file mode 100644
index c2ad21d091cff9c2d3026f97da486129b6b34edf..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/draw_bbox.py
+++ /dev/null
@@ -1,418 +0,0 @@
-import fitz
-from magic_pdf.config.constants import CROSS_PAGE
-from magic_pdf.config.ocr_content_type import (BlockType, CategoryId,
-                                               ContentType)
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.model.magic_model import MagicModel
-
-
-def draw_bbox_without_number(i, bbox_list, page, rgb_config, fill_config):
-    new_rgb = []
-    for item in rgb_config:
-        item = float(item) / 255
-        new_rgb.append(item)
-    page_data = bbox_list[i]
-    for bbox in page_data:
-        x0, y0, x1, y1 = bbox
-        rect_coords = fitz.Rect(x0, y0, x1, y1)  # Define the rectangle
-        if fill_config:
-            page.draw_rect(
-                rect_coords,
-                color=None,
-                fill=new_rgb,
-                fill_opacity=0.3,
-                width=0.5,
-                overlay=True,
-            )  # Draw the rectangle
-        else:
-            page.draw_rect(
-                rect_coords,
-                color=new_rgb,
-                fill=None,
-                fill_opacity=1,
-                width=0.5,
-                overlay=True,
-            )  # Draw the rectangle
-
-
-def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config, draw_bbox=True):
-    new_rgb = []
-    for item in rgb_config:
-        item = float(item) / 255
-        new_rgb.append(item)
-    page_data = bbox_list[i]
-    for j, bbox in enumerate(page_data):
-        x0, y0, x1, y1 = bbox
-        rect_coords = fitz.Rect(x0, y0, x1, y1)  # Define the rectangle
-        if draw_bbox:
-            if fill_config:
-                page.draw_rect(
-                    rect_coords,
-                    color=None,
-                    fill=new_rgb,
-                    fill_opacity=0.3,
-                    width=0.5,
-                    overlay=True,
-                )  # Draw the rectangle
-            else:
-                page.draw_rect(
-                    rect_coords,
-                    color=new_rgb,
-                    fill=None,
-                    fill_opacity=1,
-                    width=0.5,
-                    overlay=True,
-                )  # Draw the rectangle
-        page.insert_text(
-            (x1 + 2, y0 + 10), str(j + 1), fontsize=10, color=new_rgb
-        )  # Insert the index in the top left corner of the rectangle
-
-
-def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
-    dropped_bbox_list = []
-    tables_list, tables_body_list = [], []
-    tables_caption_list, tables_footnote_list = [], []
-    imgs_list, imgs_body_list, imgs_caption_list = [], [], []
-    imgs_footnote_list = []
-    titles_list = []
-    texts_list = []
-    interequations_list = []
-    lists_list = []
-    indexs_list = []
-    for page in pdf_info:
-
-        page_dropped_list = []
-        tables, tables_body, tables_caption, tables_footnote = [], [], [], []
-        imgs, imgs_body, imgs_caption, imgs_footnote = [], [], [], []
-        titles = []
-        texts = []
-        interequations = []
-        lists = []
-        indices = []
-
-        for dropped_bbox in page['discarded_blocks']:
-            page_dropped_list.append(dropped_bbox['bbox'])
-        dropped_bbox_list.append(page_dropped_list)
-        for block in page['para_blocks']:
-            bbox = block['bbox']
-            if block['type'] == BlockType.Table:
-                tables.append(bbox)
-                for nested_block in block['blocks']:
-                    bbox = nested_block['bbox']
-                    if nested_block['type'] == BlockType.TableBody:
-                        tables_body.append(bbox)
-                    elif nested_block['type'] == BlockType.TableCaption:
-                        tables_caption.append(bbox)
-                    elif nested_block['type'] == BlockType.TableFootnote:
-                        tables_footnote.append(bbox)
-            elif block['type'] == BlockType.Image:
-                imgs.append(bbox)
-                for nested_block in block['blocks']:
-                    bbox = nested_block['bbox']
-                    if nested_block['type'] == BlockType.ImageBody:
-                        imgs_body.append(bbox)
-                    elif nested_block['type'] == BlockType.ImageCaption:
-                        imgs_caption.append(bbox)
-                    elif nested_block['type'] == BlockType.ImageFootnote:
-                        imgs_footnote.append(bbox)
-            elif block['type'] == BlockType.Title:
-                titles.append(bbox)
-            elif block['type'] == BlockType.Text:
-                texts.append(bbox)
-            elif block['type'] == BlockType.InterlineEquation:
-                interequations.append(bbox)
-            elif block['type'] == BlockType.List:
-                lists.append(bbox)
-            elif block['type'] == BlockType.Index:
-                indices.append(bbox)
-
-        tables_list.append(tables)
-        tables_body_list.append(tables_body)
-        tables_caption_list.append(tables_caption)
-        tables_footnote_list.append(tables_footnote)
-        imgs_list.append(imgs)
-        imgs_body_list.append(imgs_body)
-        imgs_caption_list.append(imgs_caption)
-        imgs_footnote_list.append(imgs_footnote)
-        titles_list.append(titles)
-        texts_list.append(texts)
-        interequations_list.append(interequations)
-        lists_list.append(lists)
-        indexs_list.append(indices)
-
-    layout_bbox_list = []
-
-    table_type_order = {
-        'table_caption': 1,
-        'table_body': 2,
-        'table_footnote': 3
-    }
-    for page in pdf_info:
-        page_block_list = []
-        for block in page['para_blocks']:
-            if block['type'] in [
-                BlockType.Text,
-                BlockType.Title,
-                BlockType.InterlineEquation,
-                BlockType.List,
-                BlockType.Index,
-            ]:
-                bbox = block['bbox']
-                page_block_list.append(bbox)
-            elif block['type'] in [BlockType.Image]:
-                for sub_block in block['blocks']:
-                    bbox = sub_block['bbox']
-                    page_block_list.append(bbox)
-            elif block['type'] in [BlockType.Table]:
-                sorted_blocks = sorted(block['blocks'], key=lambda x: table_type_order[x['type']])
-                for sub_block in sorted_blocks:
-                    bbox = sub_block['bbox']
-                    page_block_list.append(bbox)
-
-        layout_bbox_list.append(page_block_list)
-
-    pdf_docs = fitz.open('pdf', pdf_bytes)
-
-    for i, page in enumerate(pdf_docs):
-
-        draw_bbox_without_number(i, dropped_bbox_list, page, [158, 158, 158], True)
-        # draw_bbox_without_number(i, tables_list, page, [153, 153, 0], True)  # color !
-        draw_bbox_without_number(i, tables_body_list, page, [204, 204, 0], True)
-        draw_bbox_without_number(i, tables_caption_list, page, [255, 255, 102], True)
-        draw_bbox_without_number(i, tables_footnote_list, page, [229, 255, 204], True)
-        # draw_bbox_without_number(i, imgs_list, page, [51, 102, 0], True)
-        draw_bbox_without_number(i, imgs_body_list, page, [153, 255, 51], True)
-        draw_bbox_without_number(i, imgs_caption_list, page, [102, 178, 255], True)
-        draw_bbox_without_number(i, imgs_footnote_list, page, [255, 178, 102], True),
-        draw_bbox_without_number(i, titles_list, page, [102, 102, 255], True)
-        draw_bbox_without_number(i, texts_list, page, [153, 0, 76], True)
-        draw_bbox_without_number(i, interequations_list, page, [0, 255, 0], True)
-        draw_bbox_without_number(i, lists_list, page, [40, 169, 92], True)
-        draw_bbox_without_number(i, indexs_list, page, [40, 169, 92], True)
-
-        draw_bbox_with_number(
-            i, layout_bbox_list, page, [255, 0, 0], False, draw_bbox=False
-        )
-
-    # Save the PDF
-    pdf_docs.save(f'{out_path}/{filename}')
-
-
-def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename):
-    text_list = []
-    inline_equation_list = []
-    interline_equation_list = []
-    image_list = []
-    table_list = []
-    dropped_list = []
-    next_page_text_list = []
-    next_page_inline_equation_list = []
-
-    def get_span_info(span):
-        if span['type'] == ContentType.Text:
-            if span.get(CROSS_PAGE, False):
-                next_page_text_list.append(span['bbox'])
-            else:
-                page_text_list.append(span['bbox'])
-        elif span['type'] == ContentType.InlineEquation:
-            if span.get(CROSS_PAGE, False):
-                next_page_inline_equation_list.append(span['bbox'])
-            else:
-                page_inline_equation_list.append(span['bbox'])
-        elif span['type'] == ContentType.InterlineEquation:
-            page_interline_equation_list.append(span['bbox'])
-        elif span['type'] == ContentType.Image:
-            page_image_list.append(span['bbox'])
-        elif span['type'] == ContentType.Table:
-            page_table_list.append(span['bbox'])
-
-    for page in pdf_info:
-        page_text_list = []
-        page_inline_equation_list = []
-        page_interline_equation_list = []
-        page_image_list = []
-        page_table_list = []
-        page_dropped_list = []
-
-        # 将跨页的span放到移动到下一页的列表中
-        if len(next_page_text_list) > 0:
-            page_text_list.extend(next_page_text_list)
-            next_page_text_list.clear()
-        if len(next_page_inline_equation_list) > 0:
-            page_inline_equation_list.extend(next_page_inline_equation_list)
-            next_page_inline_equation_list.clear()
-
-        # 构造dropped_list
-        for block in page['discarded_blocks']:
-            if block['type'] == BlockType.Discarded:
-                for line in block['lines']:
-                    for span in line['spans']:
-                        page_dropped_list.append(span['bbox'])
-        dropped_list.append(page_dropped_list)
-        # 构造其余useful_list
-        # for block in page['para_blocks']:  # span直接用分段合并前的结果就可以
-        for block in page['preproc_blocks']:
-            if block['type'] in [
-                BlockType.Text,
-                BlockType.Title,
-                BlockType.InterlineEquation,
-                BlockType.List,
-                BlockType.Index,
-            ]:
-                for line in block['lines']:
-                    for span in line['spans']:
-                        get_span_info(span)
-            elif block['type'] in [BlockType.Image, BlockType.Table]:
-                for sub_block in block['blocks']:
-                    for line in sub_block['lines']:
-                        for span in line['spans']:
-                            get_span_info(span)
-        text_list.append(page_text_list)
-        inline_equation_list.append(page_inline_equation_list)
-        interline_equation_list.append(page_interline_equation_list)
-        image_list.append(page_image_list)
-        table_list.append(page_table_list)
-    pdf_docs = fitz.open('pdf', pdf_bytes)
-    for i, page in enumerate(pdf_docs):
-        # 获取当前页面的数据
-        draw_bbox_without_number(i, text_list, page, [255, 0, 0], False)
-        draw_bbox_without_number(i, inline_equation_list, page, [0, 255, 0], False)
-        draw_bbox_without_number(i, interline_equation_list, page, [0, 0, 255], False)
-        draw_bbox_without_number(i, image_list, page, [255, 204, 0], False)
-        draw_bbox_without_number(i, table_list, page, [204, 0, 255], False)
-        draw_bbox_without_number(i, dropped_list, page, [158, 158, 158], False)
-
-    # Save the PDF
-    pdf_docs.save(f'{out_path}/{filename}')
-
-
-def draw_model_bbox(model_list, dataset: Dataset, out_path, filename):
-    dropped_bbox_list = []
-    tables_body_list, tables_caption_list, tables_footnote_list = [], [], []
-    imgs_body_list, imgs_caption_list, imgs_footnote_list = [], [], []
-    titles_list = []
-    texts_list = []
-    interequations_list = []
-    magic_model = MagicModel(model_list, dataset)
-    for i in range(len(model_list)):
-        page_dropped_list = []
-        tables_body, tables_caption, tables_footnote = [], [], []
-        imgs_body, imgs_caption, imgs_footnote = [], [], []
-        titles = []
-        texts = []
-        interequations = []
-        page_info = magic_model.get_model_list(i)
-        layout_dets = page_info['layout_dets']
-        for layout_det in layout_dets:
-            bbox = layout_det['bbox']
-            if layout_det['category_id'] == CategoryId.Text:
-                texts.append(bbox)
-            elif layout_det['category_id'] == CategoryId.Title:
-                titles.append(bbox)
-            elif layout_det['category_id'] == CategoryId.TableBody:
-                tables_body.append(bbox)
-            elif layout_det['category_id'] == CategoryId.TableCaption:
-                tables_caption.append(bbox)
-            elif layout_det['category_id'] == CategoryId.TableFootnote:
-                tables_footnote.append(bbox)
-            elif layout_det['category_id'] == CategoryId.ImageBody:
-                imgs_body.append(bbox)
-            elif layout_det['category_id'] == CategoryId.ImageCaption:
-                imgs_caption.append(bbox)
-            elif layout_det['category_id'] == CategoryId.InterlineEquation_YOLO:
-                interequations.append(bbox)
-            elif layout_det['category_id'] == CategoryId.Abandon:
-                page_dropped_list.append(bbox)
-            elif layout_det['category_id'] == CategoryId.ImageFootnote:
-                imgs_footnote.append(bbox)
-
-        tables_body_list.append(tables_body)
-        tables_caption_list.append(tables_caption)
-        tables_footnote_list.append(tables_footnote)
-        imgs_body_list.append(imgs_body)
-        imgs_caption_list.append(imgs_caption)
-        titles_list.append(titles)
-        texts_list.append(texts)
-        interequations_list.append(interequations)
-        dropped_bbox_list.append(page_dropped_list)
-        imgs_footnote_list.append(imgs_footnote)
-
-    for i in range(len(dataset)):
-        page = dataset.get_page(i)
-        draw_bbox_with_number(
-            i, dropped_bbox_list, page, [158, 158, 158], True
-        )  # color !
-        draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
-        draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102], True)
-        draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204], True)
-        draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
-        draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255], True)
-        draw_bbox_with_number(i, imgs_footnote_list, page, [255, 178, 102], True)
-        draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
-        draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
-        draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)
-
-    # Save the PDF
-    dataset.dump_to_file(f'{out_path}/{filename}')
-
-
-def draw_line_sort_bbox(pdf_info, pdf_bytes, out_path, filename):
-    layout_bbox_list = []
-
-    for page in pdf_info:
-        page_line_list = []
-        for block in page['preproc_blocks']:
-            if block['type'] in [BlockType.Text]:
-                for line in block['lines']:
-                    bbox = line['bbox']
-                    index = line['index']
-                    page_line_list.append({'index': index, 'bbox': bbox})
-            elif block['type'] in [BlockType.Title, BlockType.InterlineEquation]:
-                if 'virtual_lines' in block:
-                    if len(block['virtual_lines']) > 0 and block['virtual_lines'][0].get('index', None) is not None:
-                        for line in block['virtual_lines']:
-                            bbox = line['bbox']
-                            index = line['index']
-                            page_line_list.append({'index': index, 'bbox': bbox})
-                else:
-                    for line in block['lines']:
-                        bbox = line['bbox']
-                        index = line['index']
-                        page_line_list.append({'index': index, 'bbox': bbox})
-            elif block['type'] in [BlockType.Image, BlockType.Table]:
-                for sub_block in block['blocks']:
-                    if sub_block['type'] in [BlockType.ImageBody, BlockType.TableBody]:
-                        if len(sub_block['virtual_lines']) > 0 and sub_block['virtual_lines'][0].get('index', None) is not None:
-                            for line in sub_block['virtual_lines']:
-                                bbox = line['bbox']
-                                index = line['index']
-                                page_line_list.append({'index': index, 'bbox': bbox})
-                        else:
-                            for line in sub_block['lines']:
-                                bbox = line['bbox']
-                                index = line['index']
-                                page_line_list.append({'index': index, 'bbox': bbox})
-                    elif sub_block['type'] in [BlockType.ImageCaption, BlockType.TableCaption, BlockType.ImageFootnote, BlockType.TableFootnote]:
-                        for line in sub_block['lines']:
-                            bbox = line['bbox']
-                            index = line['index']
-                            page_line_list.append({'index': index, 'bbox': bbox})
-        sorted_bboxes = sorted(page_line_list, key=lambda x: x['index'])
-        layout_bbox_list.append(sorted_bbox['bbox'] for sorted_bbox in sorted_bboxes)
-    pdf_docs = fitz.open('pdf', pdf_bytes)
-    for i, page in enumerate(pdf_docs):
-        draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0], False)
-
-    pdf_docs.save(f'{out_path}/{filename}')
-
-
-def draw_char_bbox(pdf_bytes, out_path, filename):
-    pdf_docs = fitz.open('pdf', pdf_bytes)
-    for i, page in enumerate(pdf_docs):
-        for block in page.get_text('rawdict', flags=fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)['blocks']:
-            for line in block['lines']:
-                for span in line['spans']:
-                    for char in span['chars']:
-                        char_bbox = char['bbox']
-                        page.draw_rect(char_bbox, color=[1, 0, 0], fill=None, fill_opacity=1, width=0.3, overlay=True,)
-    pdf_docs.save(f'{out_path}/{filename}')
diff --git a/magic_pdf/libs/hash_utils.py b/magic_pdf/libs/hash_utils.py
deleted file mode 100644
index 47b8aea746eb04eeb427b775227692ef6b4d9d29..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/hash_utils.py
+++ /dev/null
@@ -1,15 +0,0 @@
-import hashlib
-
-
-def compute_md5(file_bytes):
-    hasher = hashlib.md5()
-    hasher.update(file_bytes)
-    return hasher.hexdigest().upper()
-
-
-def compute_sha256(input_string):
-    hasher = hashlib.sha256()
-    # 在Python3中，需要将字符串转化为字节对象才能被哈希函数处理
-    input_bytes = input_string.encode('utf-8')
-    hasher.update(input_bytes)
-    return hasher.hexdigest()
diff --git a/magic_pdf/libs/json_compressor.py b/magic_pdf/libs/json_compressor.py
deleted file mode 100644
index 77ef1c876fcae0b34a42355b3edb079bb5dd891b..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/json_compressor.py
+++ /dev/null
@@ -1,27 +0,0 @@
-import json
-import brotli
-import base64
-
-class JsonCompressor:
-
-    @staticmethod
-    def compress_json(data):
-        """
-        Compress a json object and encode it with base64
-        """
-        json_str = json.dumps(data)
-        json_bytes = json_str.encode('utf-8')
-        compressed = brotli.compress(json_bytes, quality=6)
-        compressed_str = base64.b64encode(compressed).decode('utf-8')  # convert bytes to string
-        return compressed_str
-
-    @staticmethod
-    def decompress_json(compressed_str):
-        """
-        Decode the base64 string and decompress the json object
-        """
-        compressed = base64.b64decode(compressed_str.encode('utf-8'))  # convert string to bytes
-        decompressed_bytes = brotli.decompress(compressed)
-        json_str = decompressed_bytes.decode('utf-8')
-        data = json.loads(json_str)
-        return data
diff --git a/magic_pdf/libs/language.py b/magic_pdf/libs/language.py
deleted file mode 100644
index 73d382b7c436f8c0a8a7498e4ea1584b0719e8a5..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/language.py
+++ /dev/null
@@ -1,48 +0,0 @@
-import os
-import unicodedata
-
-if not os.getenv("FTLANG_CACHE"):
-    current_file_path = os.path.abspath(__file__)
-    current_dir = os.path.dirname(current_file_path)
-    root_dir = os.path.dirname(current_dir)
-    ftlang_cache_dir = os.path.join(root_dir, 'resources', 'fasttext-langdetect')
-    os.environ["FTLANG_CACHE"] = str(ftlang_cache_dir)
-    # print(os.getenv("FTLANG_CACHE"))
-
-from fast_langdetect import detect_language
-
-
-def remove_invalid_surrogates(text):
-    # 移除无效的 UTF-16 代理对
-    return ''.join(c for c in text if not (0xD800 <= ord(c) <= 0xDFFF))
-
-
-def detect_lang(text: str) -> str:
-
-    if len(text) == 0:
-        return ""
-
-    text = text.replace("\n", "")
-    text = remove_invalid_surrogates(text)
-
-    # print(text)
-    try:
-        lang_upper = detect_language(text)
-    except:
-        html_no_ctrl_chars = ''.join([l for l in text if unicodedata.category(l)[0] not in ['C', ]])
-        lang_upper = detect_language(html_no_ctrl_chars)
-
-    try:
-        lang = lang_upper.lower()
-    except:
-        lang = ""
-    return lang
-
-
-if __name__ == '__main__':
-    print(os.getenv("FTLANG_CACHE"))
-    print(detect_lang("This is a test."))
-    print(detect_lang("<html>This is a test</html>"))
-    print(detect_lang("这个是中文测试。"))
-    print(detect_lang("<html>这个是中文测试。</html>"))
-    print(detect_lang("〖\ud835\udc46\ud835〗这是个包含utf-16的中文测试"))
\ No newline at end of file
diff --git a/magic_pdf/libs/local_math.py b/magic_pdf/libs/local_math.py
deleted file mode 100644
index 9edbcc7074dfa189a8508eb76366ae31dba4d665..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/local_math.py
+++ /dev/null
@@ -1,9 +0,0 @@
-def float_gt(a, b):
-    if 0.0001 >= abs(a -b):
-        return False
-    return a > b
-    
-def float_equal(a, b):
-    if 0.0001 >= abs(a-b):
-        return True
-    return False
\ No newline at end of file
diff --git a/magic_pdf/libs/markdown_utils.py b/magic_pdf/libs/markdown_utils.py
deleted file mode 100644
index 036232c880b584573a4cd031fed4f457d8d63e6f..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/markdown_utils.py
+++ /dev/null
@@ -1,10 +0,0 @@
-
-def ocr_escape_special_markdown_char(content):
-    """
-    转义正文里对markdown语法有特殊意义的字符
-    """
-    special_chars = ["*", "`", "~", "$"]
-    for char in special_chars:
-        content = content.replace(char, "\\" + char)
-
-    return content
diff --git a/magic_pdf/libs/path_utils.py b/magic_pdf/libs/path_utils.py
deleted file mode 100644
index 15fff01b5a698fbd6b1df11d9608b9ef12ffc715..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/path_utils.py
+++ /dev/null
@@ -1,32 +0,0 @@
-
-
-def remove_non_official_s3_args(s3path):
-    """
-    example: s3://abc/xxxx.json?bytes=0,81350 ==> s3://abc/xxxx.json
-    """
-    arr = s3path.split("?")
-    return arr[0]
-
-def parse_s3path(s3path: str):
-    # from s3pathlib import S3Path
-    # p = S3Path(remove_non_official_s3_args(s3path))
-    # return p.bucket, p.key
-    s3path = remove_non_official_s3_args(s3path).strip()
-    if s3path.startswith(('s3://', 's3a://')):
-        prefix, path = s3path.split('://', 1)
-        bucket_name, key = path.split('/', 1)
-        return bucket_name, key
-    elif s3path.startswith('/'):
-        raise ValueError("The provided path starts with '/'. This does not conform to a valid S3 path format.")
-    else:
-        raise ValueError("Invalid S3 path format. Expected 's3://bucket-name/key' or 's3a://bucket-name/key'.")
-
-
-def parse_s3_range_params(s3path: str):
-    """
-    example: s3://abc/xxxx.json?bytes=0,81350 ==> [0, 81350]
-    """
-    arr = s3path.split("?bytes=")
-    if len(arr) == 1:
-        return None
-    return arr[1].split(",")
diff --git a/magic_pdf/libs/pdf_check.py b/magic_pdf/libs/pdf_check.py
deleted file mode 100644
index 98402b383b74800817a0770cb495e280a52b5e6c..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/pdf_check.py
+++ /dev/null
@@ -1,99 +0,0 @@
-import fitz
-import numpy as np
-from loguru import logger
-import re
-from io import BytesIO
-from pdfminer.high_level import extract_text
-from pdfminer.layout import LAParams
-
-
-def calculate_sample_count(total_page: int):
-    """
-    根据总页数和采样率计算采样页面的数量。
-    """
-    select_page_cnt = min(10, total_page)
-    return select_page_cnt
-
-
-def extract_pages(src_pdf_bytes: bytes) -> fitz.Document:
-    pdf_docs = fitz.open("pdf", src_pdf_bytes)
-    total_page = len(pdf_docs)
-    if total_page == 0:
-        # 如果PDF没有页面，直接返回空文档
-        logger.warning("PDF is empty, return empty document")
-        return fitz.Document()
-    select_page_cnt = calculate_sample_count(total_page)
-
-    page_num = np.random.choice(total_page, select_page_cnt, replace=False)
-    sample_docs = fitz.Document()
-    try:
-        for index in page_num:
-            sample_docs.insert_pdf(pdf_docs, from_page=int(index), to_page=int(index))
-    except Exception as e:
-        logger.exception(e)
-    return sample_docs
-
-
-def detect_invalid_chars(src_pdf_bytes: bytes) -> bool:
-    """"
-    检测PDF中是否包含非法字符
-    """
-    '''pdfminer比较慢,需要先随机抽取10页左右的sample'''
-    sample_docs = extract_pages(src_pdf_bytes)
-    sample_pdf_bytes = sample_docs.tobytes()
-    sample_pdf_file_like_object = BytesIO(sample_pdf_bytes)
-    laparams = LAParams(
-        line_overlap=0.5,
-        char_margin=2.0,
-        line_margin=0.5,
-        word_margin=0.1,
-        boxes_flow=None,
-        detect_vertical=False,
-        all_texts=False,
-    )
-    text = extract_text(pdf_file=sample_pdf_file_like_object, laparams=laparams)
-    text = text.replace("\n", "")
-    # logger.info(text)
-    '''乱码文本用pdfminer提取出来的文本特征是(cid:xxx)'''
-    cid_pattern = re.compile(r'\(cid:\d+\)')
-    matches = cid_pattern.findall(text)
-    cid_count = len(matches)
-    cid_len = sum(len(match) for match in matches)
-    text_len = len(text)
-    if text_len == 0:
-        cid_chars_radio = 0
-    else:
-        cid_chars_radio = cid_count/(cid_count + text_len - cid_len)
-    logger.info(f"cid_count: {cid_count}, text_len: {text_len}, cid_chars_radio: {cid_chars_radio}")
-    '''当一篇文章存在5%以上的文本是乱码时,认为该文档为乱码文档'''
-    if cid_chars_radio > 0.05:
-        return False  # 乱码文档
-    else:
-        return True   # 正常文档
-
-
-def count_replacement_characters(text: str) -> int:
-    """
-    统计字符串中 0xfffd 字符的数量。
-    """
-    return text.count('\ufffd')
-
-
-def detect_invalid_chars_by_pymupdf(src_pdf_bytes: bytes) -> bool:
-    sample_docs = extract_pages(src_pdf_bytes)
-    doc_text = ""
-    for page in sample_docs:
-        page_text = page.get_text('text', flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)
-        doc_text += page_text
-    text_len = len(doc_text)
-    uffd_count = count_replacement_characters(doc_text)
-    if text_len == 0:
-        uffd_chars_radio = 0
-    else:
-        uffd_chars_radio = uffd_count / text_len
-    logger.info(f"uffd_count: {uffd_count}, text_len: {text_len}, uffd_chars_radio: {uffd_chars_radio}")
-    '''当一篇文章存在1%以上的文本是乱码时,认为该文档为乱码文档'''
-    if uffd_chars_radio > 0.01:
-        return False  # 乱码文档
-    else:
-        return True   # 正常文档
\ No newline at end of file
diff --git a/magic_pdf/libs/pdf_image_tools.py b/magic_pdf/libs/pdf_image_tools.py
deleted file mode 100644
index 80201167da768f8f182c1d0eb2ae10771d96caa9..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/pdf_image_tools.py
+++ /dev/null
@@ -1,63 +0,0 @@
-from io import BytesIO
-import cv2
-import fitz
-import numpy as np
-from PIL import Image
-from magic_pdf.data.data_reader_writer import DataWriter
-from magic_pdf.libs.commons import join_path
-from magic_pdf.libs.hash_utils import compute_sha256
-
-
-def cut_image(bbox: tuple, page_num: int, page: fitz.Page, return_path, imageWriter: DataWriter):
-    """从第page_num页的page中，根据bbox进行裁剪出一张jpg图片，返回图片路径 save_path：需要同时支持s3和本地,
-    图片存放在save_path下，文件名是:
-    {page_num}_{bbox[0]}_{bbox[1]}_{bbox[2]}_{bbox[3]}.jpg , bbox内数字取整。"""
-    # 拼接文件名
-    filename = f'{page_num}_{int(bbox[0])}_{int(bbox[1])}_{int(bbox[2])}_{int(bbox[3])}'
-
-    # 老版本返回不带bucket的路径
-    img_path = join_path(return_path, filename) if return_path is not None else None
-
-    # 新版本生成平铺路径
-    img_hash256_path = f'{compute_sha256(img_path)}.jpg'
-
-    # 将坐标转换为fitz.Rect对象
-    rect = fitz.Rect(*bbox)
-    # 配置缩放倍数为3倍
-    zoom = fitz.Matrix(3, 3)
-    # 截取图片
-    pix = page.get_pixmap(clip=rect, matrix=zoom)
-
-    byte_data = pix.tobytes(output='jpeg', jpg_quality=95)
-
-    imageWriter.write(img_hash256_path, byte_data)
-
-    return img_hash256_path
-
-
-def cut_image_to_pil_image(bbox: tuple, page: fitz.Page, mode="pillow"):
-
-    # 将坐标转换为fitz.Rect对象
-    rect = fitz.Rect(*bbox)
-    # 配置缩放倍数为3倍
-    zoom = fitz.Matrix(3, 3)
-    # 截取图片
-    pix = page.get_pixmap(clip=rect, matrix=zoom)
-
-    if mode == "cv2":
-        # 直接转换为numpy数组供cv2使用
-        img_array = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.height, pix.width, pix.n)
-        # PyMuPDF使用RGB顺序，而cv2使用BGR顺序
-        if pix.n == 3 or pix.n == 4:
-            image_result = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
-        else:
-            image_result = img_array
-    elif mode == "pillow":
-        # 将字节数据转换为文件对象
-        image_file = BytesIO(pix.tobytes(output='png'))
-        # 使用 Pillow 打开图像
-        image_result = Image.open(image_file)
-    else:
-        raise ValueError(f"mode: {mode} is not supported.")
-
-    return image_result
\ No newline at end of file
diff --git a/magic_pdf/libs/performance_stats.py b/magic_pdf/libs/performance_stats.py
deleted file mode 100644
index 3aeaeb33cb6832c35fea5520a78cf31626c4270c..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/performance_stats.py
+++ /dev/null
@@ -1,65 +0,0 @@
-import time
-import functools
-from collections import defaultdict
-from typing import Dict, List
-
-
-class PerformanceStats:
-    """性能统计类，用于收集和展示方法执行时间"""
-
-    _stats: Dict[str, List[float]] = defaultdict(list)
-
-    @classmethod
-    def add_execution_time(cls, func_name: str, execution_time: float):
-        """添加执行时间记录"""
-        cls._stats[func_name].append(execution_time)
-
-    @classmethod
-    def get_stats(cls) -> Dict[str, dict]:
-        """获取统计结果"""
-        results = {}
-        for func_name, times in cls._stats.items():
-            results[func_name] = {
-                'count': len(times),
-                'total_time': sum(times),
-                'avg_time': sum(times) / len(times),
-                'min_time': min(times),
-                'max_time': max(times)
-            }
-        return results
-
-    @classmethod
-    def print_stats(cls):
-        """打印统计结果"""
-        stats = cls.get_stats()
-        print("\n性能统计结果:")
-        print("-" * 80)
-        print(f"{'方法名':<40} {'调用次数':>8} {'总时间(s)':>12} {'平均时间(s)':>12}")
-        print("-" * 80)
-        for func_name, data in stats.items():
-            print(f"{func_name:<40} {data['count']:8d} {data['total_time']:12.6f} {data['avg_time']:12.6f}")
-
-
-def measure_time(func):
-    """测量方法执行时间的装饰器"""
-
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        start_time = time.time()
-        result = func(*args, **kwargs)
-        execution_time = time.time() - start_time
-
-        # 获取更详细的函数标识
-        if hasattr(func, "__self__"):  # 实例方法
-            class_name = func.__self__.__class__.__name__
-            full_name = f"{class_name}.{func.__name__}"
-        elif hasattr(func, "__qualname__"):  # 类方法或静态方法
-            full_name = func.__qualname__
-        else:
-            module_name = func.__module__
-            full_name = f"{module_name}.{func.__name__}"
-
-        PerformanceStats.add_execution_time(full_name, execution_time)
-        return result
-
-    return wrapper
\ No newline at end of file
diff --git a/magic_pdf/libs/safe_filename.py b/magic_pdf/libs/safe_filename.py
deleted file mode 100644
index 1076a4bae218e180351ef2ec4692f156e03be1c7..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/safe_filename.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import os
-
-
-def sanitize_filename(filename, replacement="_"):
-    if os.name == 'nt':
-        invalid_chars = '<>:"|?*'
-
-        for char in invalid_chars:
-            filename = filename.replace(char, replacement)
-
-    return filename
diff --git a/magic_pdf/libs/version.py b/magic_pdf/libs/version.py
deleted file mode 100644
index c45d9dbf3a2fb0a83065d719614b463df244d2b3..0000000000000000000000000000000000000000
--- a/magic_pdf/libs/version.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = "1.3.12"
diff --git a/magic_pdf/model/__init__.py b/magic_pdf/model/__init__.py
deleted file mode 100644
index 859d01b33457ba56047073fdfefb9ef718cfa236..0000000000000000000000000000000000000000
--- a/magic_pdf/model/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-__use_inside_model__ = True
-__model_mode__ = 'full'
\ No newline at end of file
diff --git a/magic_pdf/model/batch_analyze.py b/magic_pdf/model/batch_analyze.py
deleted file mode 100644
index be5e331fd801433fea2f41de317c9e1424649b00..0000000000000000000000000000000000000000
--- a/magic_pdf/model/batch_analyze.py
+++ /dev/null
@@ -1,265 +0,0 @@
-import time
-import cv2
-from loguru import logger
-from tqdm import tqdm
-
-from magic_pdf.config.constants import MODEL_NAME
-from magic_pdf.model.sub_modules.model_init import AtomModelSingleton
-from magic_pdf.model.sub_modules.model_utils import (
-    clean_vram, crop_img, get_res_list_from_layout_res, get_coords_and_area)
-from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils import (
-    get_adjusted_mfdetrec_res, get_ocr_result_list)
-
-YOLO_LAYOUT_BASE_BATCH_SIZE = 1
-MFD_BASE_BATCH_SIZE = 1
-MFR_BASE_BATCH_SIZE = 16
-
-
-class BatchAnalyze:
-    def __init__(self, model_manager, batch_ratio: int, show_log, layout_model, formula_enable, table_enable):
-        self.model_manager = model_manager
-        self.batch_ratio = batch_ratio
-        self.show_log = show_log
-        self.layout_model = layout_model
-        self.formula_enable = formula_enable
-        self.table_enable = table_enable
-
-    def __call__(self, images_with_extra_info: list) -> list:
-        if len(images_with_extra_info) == 0:
-            return []
-    
-        images_layout_res = []
-        layout_start_time = time.time()
-        self.model = self.model_manager.get_model(
-            ocr=True,
-            show_log=self.show_log,
-            lang = None,
-            layout_model = self.layout_model,
-            formula_enable = self.formula_enable,
-            table_enable = self.table_enable,
-        )
-
-        images = [image for image, _, _ in images_with_extra_info]
-
-        if self.model.layout_model_name == MODEL_NAME.LAYOUTLMv3:
-            # layoutlmv3
-            for image in images:
-                layout_res = self.model.layout_model(image, ignore_catids=[])
-                images_layout_res.append(layout_res)
-        elif self.model.layout_model_name == MODEL_NAME.DocLayout_YOLO:
-            # doclayout_yolo
-            layout_images = []
-            for image_index, image in enumerate(images):
-                layout_images.append(image)
-
-            images_layout_res += self.model.layout_model.batch_predict(
-                # layout_images, self.batch_ratio * YOLO_LAYOUT_BASE_BATCH_SIZE
-                layout_images, YOLO_LAYOUT_BASE_BATCH_SIZE
-            )
-
-        # logger.info(
-        #     f'layout time: {round(time.time() - layout_start_time, 2)}, image num: {len(images)}'
-        # )
-
-        if self.model.apply_formula:
-            # 公式检测
-            mfd_start_time = time.time()
-            images_mfd_res = self.model.mfd_model.batch_predict(
-                # images, self.batch_ratio * MFD_BASE_BATCH_SIZE
-                images, MFD_BASE_BATCH_SIZE
-            )
-            # logger.info(
-            #     f'mfd time: {round(time.time() - mfd_start_time, 2)}, image num: {len(images)}'
-            # )
-
-            # 公式识别
-            mfr_start_time = time.time()
-            images_formula_list = self.model.mfr_model.batch_predict(
-                images_mfd_res,
-                images,
-                batch_size=self.batch_ratio * MFR_BASE_BATCH_SIZE,
-            )
-            mfr_count = 0
-            for image_index in range(len(images)):
-                images_layout_res[image_index] += images_formula_list[image_index]
-                mfr_count += len(images_formula_list[image_index])
-            # logger.info(
-            #     f'mfr time: {round(time.time() - mfr_start_time, 2)}, image num: {mfr_count}'
-            # )
-
-        # 清理显存
-        # clean_vram(self.model.device, vram_threshold=8)
-
-        ocr_res_list_all_page = []
-        table_res_list_all_page = []
-        for index in range(len(images)):
-            _, ocr_enable, _lang = images_with_extra_info[index]
-            layout_res = images_layout_res[index]
-            np_array_img = images[index]
-
-            ocr_res_list, table_res_list, single_page_mfdetrec_res = (
-                get_res_list_from_layout_res(layout_res)
-            )
-
-            ocr_res_list_all_page.append({'ocr_res_list':ocr_res_list,
-                                          'lang':_lang,
-                                          'ocr_enable':ocr_enable,
-                                          'np_array_img':np_array_img,
-                                          'single_page_mfdetrec_res':single_page_mfdetrec_res,
-                                          'layout_res':layout_res,
-                                          })
-
-            for table_res in table_res_list:
-                table_img, _ = crop_img(table_res, np_array_img)
-                table_res_list_all_page.append({'table_res':table_res,
-                                                'lang':_lang,
-                                                'table_img':table_img,
-                                              })
-
-        # 文本框检测
-        det_start = time.time()
-        det_count = 0
-        # for ocr_res_list_dict in ocr_res_list_all_page:
-        for ocr_res_list_dict in tqdm(ocr_res_list_all_page, desc="OCR-det Predict"):
-            # Process each area that requires OCR processing
-            _lang = ocr_res_list_dict['lang']
-            # Get OCR results for this language's images
-            atom_model_manager = AtomModelSingleton()
-            ocr_model = atom_model_manager.get_atom_model(
-                atom_model_name='ocr',
-                ocr_show_log=False,
-                det_db_box_thresh=0.3,
-                lang=_lang
-            )
-            for res in ocr_res_list_dict['ocr_res_list']:
-                new_image, useful_list = crop_img(
-                    res, ocr_res_list_dict['np_array_img'], crop_paste_x=50, crop_paste_y=50
-                )
-                adjusted_mfdetrec_res = get_adjusted_mfdetrec_res(
-                    ocr_res_list_dict['single_page_mfdetrec_res'], useful_list
-                )
-
-                # OCR-det
-                new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
-                ocr_res = ocr_model.ocr(
-                    new_image, mfd_res=adjusted_mfdetrec_res, rec=False
-                )[0]
-
-                # Integration results
-                if ocr_res:
-                    ocr_result_list = get_ocr_result_list(ocr_res, useful_list, ocr_res_list_dict['ocr_enable'], new_image, _lang)
-
-                    if res["category_id"] == 3:
-                        # ocr_result_list中所有bbox的面积之和
-                        ocr_res_area = sum(get_coords_and_area(ocr_res_item)[4] for ocr_res_item in ocr_result_list if 'poly' in ocr_res_item)
-                        # 求ocr_res_area和res的面积的比值
-                        res_area = get_coords_and_area(res)[4]
-                        if res_area > 0:
-                            ratio = ocr_res_area / res_area
-                            if ratio > 0.25:
-                                res["category_id"] = 1
-                            else:
-                                continue
-
-                    ocr_res_list_dict['layout_res'].extend(ocr_result_list)
-
-            # det_count += len(ocr_res_list_dict['ocr_res_list'])
-        # logger.info(f'ocr-det time: {round(time.time()-det_start, 2)}, image num: {det_count}')
-
-
-        # 表格识别 table recognition
-        if self.model.apply_table:
-            table_start = time.time()
-            # for table_res_list_dict in table_res_list_all_page:
-            for table_res_dict in tqdm(table_res_list_all_page, desc="Table Predict"):
-                _lang = table_res_dict['lang']
-                atom_model_manager = AtomModelSingleton()
-                table_model = atom_model_manager.get_atom_model(
-                    atom_model_name='table',
-                    table_model_name='rapid_table',
-                    table_model_path='',
-                    table_max_time=400,
-                    device='cpu',
-                    lang=_lang,
-                    table_sub_model_name='slanet_plus'
-                )
-                html_code, table_cell_bboxes, logic_points, elapse = table_model.predict(table_res_dict['table_img'])
-                # 判断是否返回正常
-                if html_code:
-                    expected_ending = html_code.strip().endswith(
-                        '</html>'
-                    ) or html_code.strip().endswith('</table>')
-                    if expected_ending:
-                        table_res_dict['table_res']['html'] = html_code
-                    else:
-                        logger.warning(
-                            'table recognition processing fails, not found expected HTML table end'
-                        )
-                else:
-                    logger.warning(
-                        'table recognition processing fails, not get html return'
-                    )
-            # logger.info(f'table time: {round(time.time() - table_start, 2)}, image num: {len(table_res_list_all_page)}')
-
-        # Create dictionaries to store items by language
-        need_ocr_lists_by_lang = {}  # Dict of lists for each language
-        img_crop_lists_by_lang = {}  # Dict of lists for each language
-
-        for layout_res in images_layout_res:
-            for layout_res_item in layout_res:
-                if layout_res_item['category_id'] in [15]:
-                    if 'np_img' in layout_res_item and 'lang' in layout_res_item:
-                        lang = layout_res_item['lang']
-
-                        # Initialize lists for this language if not exist
-                        if lang not in need_ocr_lists_by_lang:
-                            need_ocr_lists_by_lang[lang] = []
-                            img_crop_lists_by_lang[lang] = []
-
-                        # Add to the appropriate language-specific lists
-                        need_ocr_lists_by_lang[lang].append(layout_res_item)
-                        img_crop_lists_by_lang[lang].append(layout_res_item['np_img'])
-
-                        # Remove the fields after adding to lists
-                        layout_res_item.pop('np_img')
-                        layout_res_item.pop('lang')
-
-
-        if len(img_crop_lists_by_lang) > 0:
-
-            # Process OCR by language
-            rec_time = 0
-            rec_start = time.time()
-            total_processed = 0
-
-            # Process each language separately
-            for lang, img_crop_list in img_crop_lists_by_lang.items():
-                if len(img_crop_list) > 0:
-                    # Get OCR results for this language's images
-                    atom_model_manager = AtomModelSingleton()
-                    ocr_model = atom_model_manager.get_atom_model(
-                        atom_model_name='ocr',
-                        ocr_show_log=False,
-                        det_db_box_thresh=0.3,
-                        lang=lang
-                    )
-                    ocr_res_list = ocr_model.ocr(img_crop_list, det=False, tqdm_enable=True)[0]
-
-                    # Verify we have matching counts
-                    assert len(ocr_res_list) == len(
-                        need_ocr_lists_by_lang[lang]), f'ocr_res_list: {len(ocr_res_list)}, need_ocr_list: {len(need_ocr_lists_by_lang[lang])} for lang: {lang}'
-
-                    # Process OCR results for this language
-                    for index, layout_res_item in enumerate(need_ocr_lists_by_lang[lang]):
-                        ocr_text, ocr_score = ocr_res_list[index]
-                        layout_res_item['text'] = ocr_text
-                        layout_res_item['score'] = float(f"{ocr_score:.3f}")
-
-                    total_processed += len(img_crop_list)
-
-            rec_time += time.time() - rec_start
-            # logger.info(f'ocr-rec time: {round(rec_time, 2)}, total images processed: {total_processed}')
-
-
-
-        return images_layout_res
diff --git a/magic_pdf/model/doc_analyze_by_custom_model.py b/magic_pdf/model/doc_analyze_by_custom_model.py
deleted file mode 100644
index 93eecc6892f0986823d0a84693c96145dc8b9fea..0000000000000000000000000000000000000000
--- a/magic_pdf/model/doc_analyze_by_custom_model.py
+++ /dev/null
@@ -1,301 +0,0 @@
-import os
-import time
-
-import numpy as np
-import torch
-
-os.environ['FLAGS_npu_jit_compile'] = '0'  # 关闭paddle的jit编译
-os.environ['FLAGS_use_stride_kernel'] = '0'
-os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'  # 让mps可以fallback
-os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
-
-
-from loguru import logger
-
-from magic_pdf.model.sub_modules.model_utils import get_vram
-from magic_pdf.config.enums import SupportedPdfParseMethod
-import magic_pdf.model as model_config
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.libs.clean_memory import clean_memory
-from magic_pdf.libs.config_reader import (get_device, get_formula_config,
-                                          get_layout_config,
-                                          get_local_models_dir,
-                                          get_table_recog_config)
-from magic_pdf.model.model_list import MODEL
-
-class ModelSingleton:
-    _instance = None
-    _models = {}
-
-    def __new__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def get_model(
-        self,
-        ocr: bool,
-        show_log: bool,
-        lang=None,
-        layout_model=None,
-        formula_enable=None,
-        table_enable=None,
-    ):
-        key = (ocr, show_log, lang, layout_model, formula_enable, table_enable)
-        if key not in self._models:
-            self._models[key] = custom_model_init(
-                ocr=ocr,
-                show_log=show_log,
-                lang=lang,
-                layout_model=layout_model,
-                formula_enable=formula_enable,
-                table_enable=table_enable,
-            )
-        return self._models[key]
-
-
-def custom_model_init(
-    ocr: bool = False,
-    show_log: bool = False,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-):
-    model = None
-    if model_config.__model_mode__ == 'lite':
-        logger.warning(
-            'The Lite mode is provided for developers to conduct testing only, and the output quality is '
-            'not guaranteed to be reliable.'
-        )
-        model = MODEL.Paddle
-    elif model_config.__model_mode__ == 'full':
-        model = MODEL.PEK
-
-    if model_config.__use_inside_model__:
-        model_init_start = time.time()
-        if model == MODEL.Paddle:
-            from magic_pdf.model.pp_structure_v2 import CustomPaddleModel
-
-            custom_model = CustomPaddleModel(ocr=ocr, show_log=show_log, lang=lang)
-        elif model == MODEL.PEK:
-            from magic_pdf.model.pdf_extract_kit import CustomPEKModel
-
-            # 从配置文件读取model-dir和device
-            local_models_dir = get_local_models_dir()
-            device = get_device()
-
-            layout_config = get_layout_config()
-            if layout_model is not None:
-                layout_config['model'] = layout_model
-
-            formula_config = get_formula_config()
-            if formula_enable is not None:
-                formula_config['enable'] = formula_enable
-
-            table_config = get_table_recog_config()
-            if table_enable is not None:
-                table_config['enable'] = table_enable
-
-            model_input = {
-                'ocr': ocr,
-                'show_log': show_log,
-                'models_dir': local_models_dir,
-                'device': device,
-                'table_config': table_config,
-                'layout_config': layout_config,
-                'formula_config': formula_config,
-                'lang': lang,
-            }
-
-            custom_model = CustomPEKModel(**model_input)
-        else:
-            logger.error('Not allow model_name!')
-            exit(1)
-        model_init_cost = time.time() - model_init_start
-        logger.info(f'model init cost: {model_init_cost}')
-    else:
-        logger.error('use_inside_model is False, not allow to use inside model')
-        exit(1)
-
-    return custom_model
-
-def doc_analyze(
-    dataset: Dataset,
-    ocr: bool = False,
-    show_log: bool = False,
-    start_page_id=0,
-    end_page_id=None,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-):
-    end_page_id = (
-        end_page_id
-        if end_page_id is not None and end_page_id >= 0
-        else len(dataset) - 1
-    )
-
-    MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 200))
-    images = []
-    page_wh_list = []
-    for index in range(len(dataset)):
-        if start_page_id <= index <= end_page_id:
-            page_data = dataset.get_page(index)
-            img_dict = page_data.get_image()
-            images.append(img_dict['img'])
-            page_wh_list.append((img_dict['width'], img_dict['height']))
-
-    images_with_extra_info = [(images[index], ocr, dataset._lang) for index in range(len(images))]
-
-    if len(images) >= MIN_BATCH_INFERENCE_SIZE:
-        batch_size = MIN_BATCH_INFERENCE_SIZE
-        batch_images = [images_with_extra_info[i:i+batch_size] for i in range(0, len(images_with_extra_info), batch_size)]
-    else:
-        batch_images = [images_with_extra_info]
-
-    results = []
-    processed_images_count = 0
-    for index, batch_image in enumerate(batch_images):
-        processed_images_count += len(batch_image)
-        logger.info(f'Batch {index + 1}/{len(batch_images)}: {processed_images_count} pages/{len(images_with_extra_info)} pages')
-        result = may_batch_image_analyze(batch_image, ocr, show_log,layout_model, formula_enable, table_enable)
-        results.extend(result)
-
-    model_json = []
-    for index in range(len(dataset)):
-        if start_page_id <= index <= end_page_id:
-            result = results.pop(0)
-            page_width, page_height = page_wh_list.pop(0)
-        else:
-            result = []
-            page_height = 0
-            page_width = 0
-
-        page_info = {'page_no': index, 'width': page_width, 'height': page_height}
-        page_dict = {'layout_dets': result, 'page_info': page_info}
-        model_json.append(page_dict)
-
-    from magic_pdf.operators.models import InferenceResult
-    return InferenceResult(model_json, dataset)
-
-def batch_doc_analyze(
-    datasets: list[Dataset],
-    parse_method: str = 'auto',
-    show_log: bool = False,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-):
-    MIN_BATCH_INFERENCE_SIZE = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
-    batch_size = MIN_BATCH_INFERENCE_SIZE
-    page_wh_list = []
-
-    images_with_extra_info = []
-    for dataset in datasets:
-
-        ocr = False
-        if parse_method == 'auto':
-            if dataset.classify() == SupportedPdfParseMethod.TXT:
-                ocr = False
-            elif dataset.classify() == SupportedPdfParseMethod.OCR:
-                ocr = True
-        elif parse_method == 'ocr':
-            ocr = True
-        elif parse_method == 'txt':
-            ocr = False
-
-        _lang = dataset._lang
-
-        for index in range(len(dataset)):
-            page_data = dataset.get_page(index)
-            img_dict = page_data.get_image()
-            page_wh_list.append((img_dict['width'], img_dict['height']))
-            images_with_extra_info.append((img_dict['img'], ocr, _lang))
-
-    batch_images = [images_with_extra_info[i:i+batch_size] for i in range(0, len(images_with_extra_info), batch_size)]
-    results = []
-    processed_images_count = 0
-    for index, batch_image in enumerate(batch_images):
-        processed_images_count += len(batch_image)
-        logger.info(f'Batch {index + 1}/{len(batch_images)}: {processed_images_count} pages/{len(images_with_extra_info)} pages')
-        result = may_batch_image_analyze(batch_image, True, show_log, layout_model, formula_enable, table_enable)
-        results.extend(result)
-
-    infer_results = []
-    from magic_pdf.operators.models import InferenceResult
-    for index in range(len(datasets)):
-        dataset = datasets[index]
-        model_json = []
-        for i in range(len(dataset)):
-            result = results.pop(0)
-            page_width, page_height = page_wh_list.pop(0)
-            page_info = {'page_no': i, 'width': page_width, 'height': page_height}
-            page_dict = {'layout_dets': result, 'page_info': page_info}
-            model_json.append(page_dict)
-        infer_results.append(InferenceResult(model_json, dataset))
-    return infer_results
-
-
-def may_batch_image_analyze(
-        images_with_extra_info: list[(np.ndarray, bool, str)],
-        ocr: bool,
-        show_log: bool = False,
-        layout_model=None,
-        formula_enable=None,
-        table_enable=None):
-    # os.environ['CUDA_VISIBLE_DEVICES'] = str(idx)
-
-    from magic_pdf.model.batch_analyze import BatchAnalyze
-
-    model_manager = ModelSingleton()
-
-    # images = [image for image, _, _ in images_with_extra_info]
-    batch_ratio = 1
-    device = get_device()
-
-    if str(device).startswith('npu'):
-        import torch_npu
-        if torch_npu.npu.is_available():
-            torch.npu.set_compile_mode(jit_compile=False)
-
-    if str(device).startswith('npu') or str(device).startswith('cuda'):
-        vram = get_vram(device)
-        if vram is not None:
-            gpu_memory = int(os.getenv('VIRTUAL_VRAM_SIZE', round(vram)))
-            if gpu_memory >= 16:
-                batch_ratio = 16
-            elif gpu_memory >= 12:
-                batch_ratio = 8
-            elif gpu_memory >= 8:
-                batch_ratio = 4
-            elif gpu_memory >= 6:
-                batch_ratio = 2
-            else:
-                batch_ratio = 1
-            logger.info(f'gpu_memory: {gpu_memory} GB, batch_ratio: {batch_ratio}')
-        else:
-            # Default batch_ratio when VRAM can't be determined
-            batch_ratio = 1
-            logger.info(f'Could not determine GPU memory, using default batch_ratio: {batch_ratio}')
-
-
-    # doc_analyze_start = time.time()
-
-    batch_model = BatchAnalyze(model_manager, batch_ratio, show_log, layout_model, formula_enable, table_enable)
-    results = batch_model(images_with_extra_info)
-
-    # gc_start = time.time()
-    clean_memory(get_device())
-    # gc_time = round(time.time() - gc_start, 2)
-    # logger.debug(f'gc time: {gc_time}')
-
-    # doc_analyze_time = round(time.time() - doc_analyze_start, 2)
-    # doc_analyze_speed = round(len(images) / doc_analyze_time, 2)
-    # logger.debug(
-    #     f'doc analyze time: {round(time.time() - doc_analyze_start, 2)},'
-    #     f' speed: {doc_analyze_speed} pages/second'
-    # )
-    return results
\ No newline at end of file
diff --git a/magic_pdf/model/magic_model.py b/magic_pdf/model/magic_model.py
deleted file mode 100644
index b5922d35cf9622685bde3478d872476ca63d7487..0000000000000000000000000000000000000000
--- a/magic_pdf/model/magic_model.py
+++ /dev/null
@@ -1,771 +0,0 @@
-import enum
-
-from magic_pdf.config.model_block_type import ModelBlockTypeEnum
-from magic_pdf.config.ocr_content_type import CategoryId, ContentType
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.libs.boxbase import (_is_in, bbox_distance, bbox_relative_pos,
-                                    calculate_iou)
-from magic_pdf.libs.coordinate_transform import get_scale_ratio
-from magic_pdf.pre_proc.remove_bbox_overlap import _remove_overlap_between_bbox
-
-CAPATION_OVERLAP_AREA_RATIO = 0.6
-MERGE_BOX_OVERLAP_AREA_RATIO = 1.1
-
-
-class PosRelationEnum(enum.Enum):
-    LEFT = 'left'
-    RIGHT = 'right'
-    UP = 'up'
-    BOTTOM = 'bottom'
-    ALL = 'all'
-
-
-class MagicModel:
-    """每个函数没有得到元素的时候返回空list."""
-
-    def __fix_axis(self):
-        for model_page_info in self.__model_list:
-            need_remove_list = []
-            page_no = model_page_info['page_info']['page_no']
-            horizontal_scale_ratio, vertical_scale_ratio = get_scale_ratio(
-                model_page_info, self.__docs.get_page(page_no)
-            )
-            layout_dets = model_page_info['layout_dets']
-            for layout_det in layout_dets:
-
-                if layout_det.get('bbox') is not None:
-                    # 兼容直接输出bbox的模型数据,如paddle
-                    x0, y0, x1, y1 = layout_det['bbox']
-                else:
-                    # 兼容直接输出poly的模型数据，如xxx
-                    x0, y0, _, _, x1, y1, _, _ = layout_det['poly']
-
-                bbox = [
-                    int(x0 / horizontal_scale_ratio),
-                    int(y0 / vertical_scale_ratio),
-                    int(x1 / horizontal_scale_ratio),
-                    int(y1 / vertical_scale_ratio),
-                ]
-                layout_det['bbox'] = bbox
-                # 删除高度或者宽度小于等于0的spans
-                if bbox[2] - bbox[0] <= 0 or bbox[3] - bbox[1] <= 0:
-                    need_remove_list.append(layout_det)
-            for need_remove in need_remove_list:
-                layout_dets.remove(need_remove)
-
-    def __fix_by_remove_low_confidence(self):
-        for model_page_info in self.__model_list:
-            need_remove_list = []
-            layout_dets = model_page_info['layout_dets']
-            for layout_det in layout_dets:
-                if layout_det['score'] <= 0.05:
-                    need_remove_list.append(layout_det)
-                else:
-                    continue
-            for need_remove in need_remove_list:
-                layout_dets.remove(need_remove)
-
-    def __fix_by_remove_high_iou_and_low_confidence(self):
-        for model_page_info in self.__model_list:
-            need_remove_list = []
-            layout_dets = model_page_info['layout_dets']
-            for layout_det1 in layout_dets:
-                for layout_det2 in layout_dets:
-                    if layout_det1 == layout_det2:
-                        continue
-                    if layout_det1['category_id'] in [
-                        0,
-                        1,
-                        2,
-                        3,
-                        4,
-                        5,
-                        6,
-                        7,
-                        8,
-                        9,
-                    ] and layout_det2['category_id'] in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:
-                        if (
-                            calculate_iou(layout_det1['bbox'], layout_det2['bbox'])
-                            > 0.9
-                        ):
-                            if layout_det1['score'] < layout_det2['score']:
-                                layout_det_need_remove = layout_det1
-                            else:
-                                layout_det_need_remove = layout_det2
-
-                            if layout_det_need_remove not in need_remove_list:
-                                need_remove_list.append(layout_det_need_remove)
-                        else:
-                            continue
-                    else:
-                        continue
-            for need_remove in need_remove_list:
-                layout_dets.remove(need_remove)
-
-    def __init__(self, model_list: list, docs: Dataset):
-        self.__model_list = model_list
-        self.__docs = docs
-        """为所有模型数据添加bbox信息(缩放，poly->bbox)"""
-        self.__fix_axis()
-        """删除置信度特别低的模型数据(<0.05),提高质量"""
-        self.__fix_by_remove_low_confidence()
-        """删除高iou(>0.9)数据中置信度较低的那个"""
-        self.__fix_by_remove_high_iou_and_low_confidence()
-        self.__fix_footnote()
-
-    def _bbox_distance(self, bbox1, bbox2):
-        left, right, bottom, top = bbox_relative_pos(bbox1, bbox2)
-        flags = [left, right, bottom, top]
-        count = sum([1 if v else 0 for v in flags])
-        if count > 1:
-            return float('inf')
-        if left or right:
-            l1 = bbox1[3] - bbox1[1]
-            l2 = bbox2[3] - bbox2[1]
-        else:
-            l1 = bbox1[2] - bbox1[0]
-            l2 = bbox2[2] - bbox2[0]
-
-        if l2 > l1 and (l2 - l1) / l1 > 0.3:
-            return float('inf')
-
-        return bbox_distance(bbox1, bbox2)
-
-    def __fix_footnote(self):
-        # 3: figure, 5: table, 7: footnote
-        for model_page_info in self.__model_list:
-            footnotes = []
-            figures = []
-            tables = []
-
-            for obj in model_page_info['layout_dets']:
-                if obj['category_id'] == 7:
-                    footnotes.append(obj)
-                elif obj['category_id'] == 3:
-                    figures.append(obj)
-                elif obj['category_id'] == 5:
-                    tables.append(obj)
-                if len(footnotes) * len(figures) == 0:
-                    continue
-            dis_figure_footnote = {}
-            dis_table_footnote = {}
-
-            for i in range(len(footnotes)):
-                for j in range(len(figures)):
-                    pos_flag_count = sum(
-                        list(
-                            map(
-                                lambda x: 1 if x else 0,
-                                bbox_relative_pos(
-                                    footnotes[i]['bbox'], figures[j]['bbox']
-                                ),
-                            )
-                        )
-                    )
-                    if pos_flag_count > 1:
-                        continue
-                    dis_figure_footnote[i] = min(
-                        self._bbox_distance(figures[j]['bbox'], footnotes[i]['bbox']),
-                        dis_figure_footnote.get(i, float('inf')),
-                    )
-            for i in range(len(footnotes)):
-                for j in range(len(tables)):
-                    pos_flag_count = sum(
-                        list(
-                            map(
-                                lambda x: 1 if x else 0,
-                                bbox_relative_pos(
-                                    footnotes[i]['bbox'], tables[j]['bbox']
-                                ),
-                            )
-                        )
-                    )
-                    if pos_flag_count > 1:
-                        continue
-
-                    dis_table_footnote[i] = min(
-                        self._bbox_distance(tables[j]['bbox'], footnotes[i]['bbox']),
-                        dis_table_footnote.get(i, float('inf')),
-                    )
-            for i in range(len(footnotes)):
-                if i not in dis_figure_footnote:
-                    continue
-                if dis_table_footnote.get(i, float('inf')) > dis_figure_footnote[i]:
-                    footnotes[i]['category_id'] = CategoryId.ImageFootnote
-
-    def __reduct_overlap(self, bboxes):
-        N = len(bboxes)
-        keep = [True] * N
-        for i in range(N):
-            for j in range(N):
-                if i == j:
-                    continue
-                if _is_in(bboxes[i]['bbox'], bboxes[j]['bbox']):
-                    keep[i] = False
-        return [bboxes[i] for i in range(N) if keep[i]]
-
-    def __tie_up_category_by_distance_v2(
-        self,
-        page_no: int,
-        subject_category_id: int,
-        object_category_id: int,
-        priority_pos: PosRelationEnum,
-    ):
-        """_summary_
-
-        Args:
-            page_no (int): _description_
-            subject_category_id (int): _description_
-            object_category_id (int): _description_
-            priority_pos (PosRelationEnum): _description_
-
-        Returns:
-            _type_: _description_
-        """
-        AXIS_MULPLICITY = 0.5
-        subjects = self.__reduct_overlap(
-            list(
-                map(
-                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
-                    filter(
-                        lambda x: x['category_id'] == subject_category_id,
-                        self.__model_list[page_no]['layout_dets'],
-                    ),
-                )
-            )
-        )
-
-        objects = self.__reduct_overlap(
-            list(
-                map(
-                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
-                    filter(
-                        lambda x: x['category_id'] == object_category_id,
-                        self.__model_list[page_no]['layout_dets'],
-                    ),
-                )
-            )
-        )
-        M = len(objects)
-
-        subjects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
-        objects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
-
-        sub_obj_map_h = {i: [] for i in range(len(subjects))}
-
-        dis_by_directions = {
-            'top': [[-1, float('inf')]] * M,
-            'bottom': [[-1, float('inf')]] * M,
-            'left': [[-1, float('inf')]] * M,
-            'right': [[-1, float('inf')]] * M,
-        }
-
-        for i, obj in enumerate(objects):
-            l_x_axis, l_y_axis = (
-                obj['bbox'][2] - obj['bbox'][0],
-                obj['bbox'][3] - obj['bbox'][1],
-            )
-            axis_unit = min(l_x_axis, l_y_axis)
-            for j, sub in enumerate(subjects):
-
-                bbox1, bbox2, _ = _remove_overlap_between_bbox(
-                    objects[i]['bbox'], subjects[j]['bbox']
-                )
-                left, right, bottom, top = bbox_relative_pos(bbox1, bbox2)
-                flags = [left, right, bottom, top]
-                if sum([1 if v else 0 for v in flags]) > 1:
-                    continue
-
-                if left:
-                    if dis_by_directions['left'][i][1] > bbox_distance(
-                        obj['bbox'], sub['bbox']
-                    ):
-                        dis_by_directions['left'][i] = [
-                            j,
-                            bbox_distance(obj['bbox'], sub['bbox']),
-                        ]
-                if right:
-                    if dis_by_directions['right'][i][1] > bbox_distance(
-                        obj['bbox'], sub['bbox']
-                    ):
-                        dis_by_directions['right'][i] = [
-                            j,
-                            bbox_distance(obj['bbox'], sub['bbox']),
-                        ]
-                if bottom:
-                    if dis_by_directions['bottom'][i][1] > bbox_distance(
-                        obj['bbox'], sub['bbox']
-                    ):
-                        dis_by_directions['bottom'][i] = [
-                            j,
-                            bbox_distance(obj['bbox'], sub['bbox']),
-                        ]
-                if top:
-                    if dis_by_directions['top'][i][1] > bbox_distance(
-                        obj['bbox'], sub['bbox']
-                    ):
-                        dis_by_directions['top'][i] = [
-                            j,
-                            bbox_distance(obj['bbox'], sub['bbox']),
-                        ]
-
-            if (
-                dis_by_directions['top'][i][1] != float('inf')
-                and dis_by_directions['bottom'][i][1] != float('inf')
-                and priority_pos in (PosRelationEnum.BOTTOM, PosRelationEnum.UP)
-            ):
-                RATIO = 3
-                if (
-                    abs(
-                        dis_by_directions['top'][i][1]
-                        - dis_by_directions['bottom'][i][1]
-                    )
-                    < RATIO * axis_unit
-                ):
-
-                    if priority_pos == PosRelationEnum.BOTTOM:
-                        sub_obj_map_h[dis_by_directions['bottom'][i][0]].append(i)
-                    else:
-                        sub_obj_map_h[dis_by_directions['top'][i][0]].append(i)
-                    continue
-
-            if dis_by_directions['left'][i][1] != float('inf') or dis_by_directions[
-                'right'
-            ][i][1] != float('inf'):
-                if dis_by_directions['left'][i][1] != float(
-                    'inf'
-                ) and dis_by_directions['right'][i][1] != float('inf'):
-                    if AXIS_MULPLICITY * axis_unit >= abs(
-                        dis_by_directions['left'][i][1]
-                        - dis_by_directions['right'][i][1]
-                    ):
-                        left_sub_bbox = subjects[dis_by_directions['left'][i][0]][
-                            'bbox'
-                        ]
-                        right_sub_bbox = subjects[dis_by_directions['right'][i][0]][
-                            'bbox'
-                        ]
-
-                        left_sub_bbox_y_axis = left_sub_bbox[3] - left_sub_bbox[1]
-                        right_sub_bbox_y_axis = right_sub_bbox[3] - right_sub_bbox[1]
-
-                        if (
-                            abs(left_sub_bbox_y_axis - l_y_axis)
-                            + dis_by_directions['left'][i][0]
-                            > abs(right_sub_bbox_y_axis - l_y_axis)
-                            + dis_by_directions['right'][i][0]
-                        ):
-                            left_or_right = dis_by_directions['right'][i]
-                        else:
-                            left_or_right = dis_by_directions['left'][i]
-                    else:
-                        left_or_right = dis_by_directions['left'][i]
-                        if left_or_right[1] > dis_by_directions['right'][i][1]:
-                            left_or_right = dis_by_directions['right'][i]
-                else:
-                    left_or_right = dis_by_directions['left'][i]
-                    if left_or_right[1] == float('inf'):
-                        left_or_right = dis_by_directions['right'][i]
-            else:
-                left_or_right = [-1, float('inf')]
-
-            if dis_by_directions['top'][i][1] != float('inf') or dis_by_directions[
-                'bottom'
-            ][i][1] != float('inf'):
-                if dis_by_directions['top'][i][1] != float('inf') and dis_by_directions[
-                    'bottom'
-                ][i][1] != float('inf'):
-                    if AXIS_MULPLICITY * axis_unit >= abs(
-                        dis_by_directions['top'][i][1]
-                        - dis_by_directions['bottom'][i][1]
-                    ):
-                        top_bottom = subjects[dis_by_directions['bottom'][i][0]]['bbox']
-                        bottom_top = subjects[dis_by_directions['top'][i][0]]['bbox']
-
-                        top_bottom_x_axis = top_bottom[2] - top_bottom[0]
-                        bottom_top_x_axis = bottom_top[2] - bottom_top[0]
-                        if (
-                            abs(top_bottom_x_axis - l_x_axis)
-                            + dis_by_directions['bottom'][i][1]
-                            > abs(bottom_top_x_axis - l_x_axis)
-                            + dis_by_directions['top'][i][1]
-                        ):
-                            top_or_bottom = dis_by_directions['top'][i]
-                        else:
-                            top_or_bottom = dis_by_directions['bottom'][i]
-                    else:
-                        top_or_bottom = dis_by_directions['top'][i]
-                        if top_or_bottom[1] > dis_by_directions['bottom'][i][1]:
-                            top_or_bottom = dis_by_directions['bottom'][i]
-                else:
-                    top_or_bottom = dis_by_directions['top'][i]
-                    if top_or_bottom[1] == float('inf'):
-                        top_or_bottom = dis_by_directions['bottom'][i]
-            else:
-                top_or_bottom = [-1, float('inf')]
-
-            if left_or_right[1] != float('inf') or top_or_bottom[1] != float('inf'):
-                if left_or_right[1] != float('inf') and top_or_bottom[1] != float(
-                    'inf'
-                ):
-                    if AXIS_MULPLICITY * axis_unit >= abs(
-                        left_or_right[1] - top_or_bottom[1]
-                    ):
-                        y_axis_bbox = subjects[left_or_right[0]]['bbox']
-                        x_axis_bbox = subjects[top_or_bottom[0]]['bbox']
-
-                        if (
-                            abs((x_axis_bbox[2] - x_axis_bbox[0]) - l_x_axis) / l_x_axis
-                            > abs((y_axis_bbox[3] - y_axis_bbox[1]) - l_y_axis)
-                            / l_y_axis
-                        ):
-                            sub_obj_map_h[left_or_right[0]].append(i)
-                        else:
-                            sub_obj_map_h[top_or_bottom[0]].append(i)
-                    else:
-                        if left_or_right[1] > top_or_bottom[1]:
-                            sub_obj_map_h[top_or_bottom[0]].append(i)
-                        else:
-                            sub_obj_map_h[left_or_right[0]].append(i)
-                else:
-                    if left_or_right[1] != float('inf'):
-                        sub_obj_map_h[left_or_right[0]].append(i)
-                    else:
-                        sub_obj_map_h[top_or_bottom[0]].append(i)
-        ret = []
-        for i in sub_obj_map_h.keys():
-            ret.append(
-                {
-                    'sub_bbox': {
-                        'bbox': subjects[i]['bbox'],
-                        'score': subjects[i]['score'],
-                    },
-                    'obj_bboxes': [
-                        {'score': objects[j]['score'], 'bbox': objects[j]['bbox']}
-                        for j in sub_obj_map_h[i]
-                    ],
-                    'sub_idx': i,
-                }
-            )
-        return ret
-
-
-    def __tie_up_category_by_distance_v3(
-        self,
-        page_no: int,
-        subject_category_id: int,
-        object_category_id: int,
-        priority_pos: PosRelationEnum,
-    ):
-        subjects = self.__reduct_overlap(
-            list(
-                map(
-                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
-                    filter(
-                        lambda x: x['category_id'] == subject_category_id,
-                        self.__model_list[page_no]['layout_dets'],
-                    ),
-                )
-            )
-        )
-        objects = self.__reduct_overlap(
-            list(
-                map(
-                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
-                    filter(
-                        lambda x: x['category_id'] == object_category_id,
-                        self.__model_list[page_no]['layout_dets'],
-                    ),
-                )
-            )
-        )
-
-        ret = []
-        N, M = len(subjects), len(objects)
-        subjects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
-        objects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
-
-        OBJ_IDX_OFFSET = 10000
-        SUB_BIT_KIND, OBJ_BIT_KIND = 0, 1
-
-        all_boxes_with_idx = [(i, SUB_BIT_KIND, sub['bbox'][0], sub['bbox'][1]) for i, sub in enumerate(subjects)] + [(i + OBJ_IDX_OFFSET , OBJ_BIT_KIND, obj['bbox'][0], obj['bbox'][1]) for i, obj in enumerate(objects)]
-        seen_idx = set()
-        seen_sub_idx = set()
-
-        while N > len(seen_sub_idx):
-            candidates = []
-            for idx, kind, x0, y0 in all_boxes_with_idx:
-                if idx in seen_idx:
-                    continue
-                candidates.append((idx, kind, x0, y0))
-
-            if len(candidates) == 0:
-                break
-            left_x = min([v[2] for v in candidates])
-            top_y =  min([v[3] for v in candidates])
-
-            candidates.sort(key=lambda x: (x[2]-left_x) ** 2 + (x[3] - top_y) ** 2)
-
-
-            fst_idx, fst_kind, left_x, top_y = candidates[0]
-            candidates.sort(key=lambda x: (x[2] - left_x) ** 2 + (x[3] - top_y)**2)
-            nxt = None
-
-            for i in range(1, len(candidates)):
-                if candidates[i][1] ^ fst_kind == 1:
-                    nxt = candidates[i]
-                    break
-            if nxt is None:
-                break
-
-            if fst_kind == SUB_BIT_KIND:
-                sub_idx, obj_idx = fst_idx, nxt[0] - OBJ_IDX_OFFSET
-
-            else:
-                sub_idx, obj_idx = nxt[0], fst_idx - OBJ_IDX_OFFSET
-
-            pair_dis = bbox_distance(subjects[sub_idx]['bbox'], objects[obj_idx]['bbox'])
-            nearest_dis = float('inf')
-            for i in range(N):
-                if i in seen_idx or i == sub_idx:continue
-                nearest_dis = min(nearest_dis, bbox_distance(subjects[i]['bbox'], objects[obj_idx]['bbox']))
-
-            if pair_dis >= 3*nearest_dis:
-                seen_idx.add(sub_idx)
-                continue
-
-            seen_idx.add(sub_idx)
-            seen_idx.add(obj_idx + OBJ_IDX_OFFSET)
-            seen_sub_idx.add(sub_idx)
-
-            ret.append(
-                {
-                    'sub_bbox': {
-                        'bbox': subjects[sub_idx]['bbox'],
-                        'score': subjects[sub_idx]['score'],
-                    },
-                    'obj_bboxes': [
-                        {'score': objects[obj_idx]['score'], 'bbox': objects[obj_idx]['bbox']}
-                    ],
-                    'sub_idx': sub_idx,
-                }
-            )
-
-        for i in range(len(objects)):
-            j = i + OBJ_IDX_OFFSET
-            if j in seen_idx:
-                continue
-            seen_idx.add(j)
-            nearest_dis, nearest_sub_idx = float('inf'), -1
-            for k in range(len(subjects)):
-                dis = bbox_distance(objects[i]['bbox'], subjects[k]['bbox'])
-                if dis < nearest_dis:
-                    nearest_dis = dis
-                    nearest_sub_idx = k
-
-            for k in range(len(subjects)):
-                if k != nearest_sub_idx: continue
-                if k in seen_sub_idx:
-                    for kk in range(len(ret)):
-                        if ret[kk]['sub_idx'] == k:
-                            ret[kk]['obj_bboxes'].append({'score': objects[i]['score'], 'bbox': objects[i]['bbox']})
-                            break
-                else:
-                    ret.append(
-                        {
-                            'sub_bbox': {
-                                'bbox': subjects[k]['bbox'],
-                                'score': subjects[k]['score'],
-                            },
-                            'obj_bboxes': [
-                                {'score': objects[i]['score'], 'bbox': objects[i]['bbox']}
-                            ],
-                            'sub_idx': k,
-                        }
-                    )
-                seen_sub_idx.add(k)
-                seen_idx.add(k)
-
-
-        for i in range(len(subjects)):
-            if i in seen_sub_idx:
-                continue
-            ret.append(
-                {
-                    'sub_bbox': {
-                        'bbox': subjects[i]['bbox'],
-                        'score': subjects[i]['score'],
-                    },
-                    'obj_bboxes': [],
-                    'sub_idx': i,
-                }
-            )
-
-
-        return ret
-
-
-    def get_imgs_v2(self, page_no: int):
-        with_captions = self.__tie_up_category_by_distance_v3(
-            page_no, 3, 4, PosRelationEnum.BOTTOM
-        )
-        with_footnotes = self.__tie_up_category_by_distance_v3(
-            page_no, 3, CategoryId.ImageFootnote, PosRelationEnum.ALL
-        )
-        ret = []
-        for v in with_captions:
-            record = {
-                'image_body': v['sub_bbox'],
-                'image_caption_list': v['obj_bboxes'],
-            }
-            filter_idx = v['sub_idx']
-            d = next(filter(lambda x: x['sub_idx'] == filter_idx, with_footnotes))
-            record['image_footnote_list'] = d['obj_bboxes']
-            ret.append(record)
-        return ret
-
-    def get_tables_v2(self, page_no: int) -> list:
-        with_captions = self.__tie_up_category_by_distance_v3(
-            page_no, 5, 6, PosRelationEnum.UP
-        )
-        with_footnotes = self.__tie_up_category_by_distance_v3(
-            page_no, 5, 7, PosRelationEnum.ALL
-        )
-        ret = []
-        for v in with_captions:
-            record = {
-                'table_body': v['sub_bbox'],
-                'table_caption_list': v['obj_bboxes'],
-            }
-            filter_idx = v['sub_idx']
-            d = next(filter(lambda x: x['sub_idx'] == filter_idx, with_footnotes))
-            record['table_footnote_list'] = d['obj_bboxes']
-            ret.append(record)
-        return ret
-
-    def get_imgs(self, page_no: int):
-        return self.get_imgs_v2(page_no)
-
-    def get_tables(
-        self, page_no: int
-    ) -> list:  # 3个坐标， caption, table主体，table-note
-        return self.get_tables_v2(page_no)
-
-    def get_equations(self, page_no: int) -> list:  # 有坐标，也有字
-        inline_equations = self.__get_blocks_by_type(
-            ModelBlockTypeEnum.EMBEDDING.value, page_no, ['latex']
-        )
-        interline_equations = self.__get_blocks_by_type(
-            ModelBlockTypeEnum.ISOLATED.value, page_no, ['latex']
-        )
-        interline_equations_blocks = self.__get_blocks_by_type(
-            ModelBlockTypeEnum.ISOLATE_FORMULA.value, page_no
-        )
-        return inline_equations, interline_equations, interline_equations_blocks
-
-    def get_discarded(self, page_no: int) -> list:  # 自研模型，只有坐标
-        blocks = self.__get_blocks_by_type(ModelBlockTypeEnum.ABANDON.value, page_no)
-        return blocks
-
-    def get_text_blocks(self, page_no: int) -> list:  # 自研模型搞的，只有坐标，没有字
-        blocks = self.__get_blocks_by_type(ModelBlockTypeEnum.PLAIN_TEXT.value, page_no)
-        return blocks
-
-    def get_title_blocks(self, page_no: int) -> list:  # 自研模型，只有坐标，没字
-        blocks = self.__get_blocks_by_type(ModelBlockTypeEnum.TITLE.value, page_no)
-        return blocks
-
-    def get_ocr_text(self, page_no: int) -> list:  # paddle 搞的，有字也有坐标
-        text_spans = []
-        model_page_info = self.__model_list[page_no]
-        layout_dets = model_page_info['layout_dets']
-        for layout_det in layout_dets:
-            if layout_det['category_id'] == '15':
-                span = {
-                    'bbox': layout_det['bbox'],
-                    'content': layout_det['text'],
-                }
-                text_spans.append(span)
-        return text_spans
-
-    def get_all_spans(self, page_no: int) -> list:
-
-        def remove_duplicate_spans(spans):
-            new_spans = []
-            for span in spans:
-                if not any(span == existing_span for existing_span in new_spans):
-                    new_spans.append(span)
-            return new_spans
-
-        all_spans = []
-        model_page_info = self.__model_list[page_no]
-        layout_dets = model_page_info['layout_dets']
-        allow_category_id_list = [3, 5, 13, 14, 15]
-        """当成span拼接的"""
-        #  3: 'image', # 图片
-        #  5: 'table',       # 表格
-        #  13: 'inline_equation',     # 行内公式
-        #  14: 'interline_equation',      # 行间公式
-        #  15: 'text',      # ocr识别文本
-        for layout_det in layout_dets:
-            category_id = layout_det['category_id']
-            if category_id in allow_category_id_list:
-                span = {'bbox': layout_det['bbox'], 'score': layout_det['score']}
-                if category_id == 3:
-                    span['type'] = ContentType.Image
-                elif category_id == 5:
-                    # 获取table模型结果
-                    latex = layout_det.get('latex', None)
-                    html = layout_det.get('html', None)
-                    if latex:
-                        span['latex'] = latex
-                    elif html:
-                        span['html'] = html
-                    span['type'] = ContentType.Table
-                elif category_id == 13:
-                    span['content'] = layout_det['latex']
-                    span['type'] = ContentType.InlineEquation
-                elif category_id == 14:
-                    span['content'] = layout_det['latex']
-                    span['type'] = ContentType.InterlineEquation
-                elif category_id == 15:
-                    span['content'] = layout_det['text']
-                    span['type'] = ContentType.Text
-                all_spans.append(span)
-        return remove_duplicate_spans(all_spans)
-
-    def get_page_size(self, page_no: int):  # 获取页面宽高
-        # 获取当前页的page对象
-        page = self.__docs.get_page(page_no).get_page_info()
-        # 获取当前页的宽高
-        page_w = page.w
-        page_h = page.h
-        return page_w, page_h
-
-    def __get_blocks_by_type(
-        self, type: int, page_no: int, extra_col: list[str] = []
-    ) -> list:
-        blocks = []
-        for page_dict in self.__model_list:
-            layout_dets = page_dict.get('layout_dets', [])
-            page_info = page_dict.get('page_info', {})
-            page_number = page_info.get('page_no', -1)
-            if page_no != page_number:
-                continue
-            for item in layout_dets:
-                category_id = item.get('category_id', -1)
-                bbox = item.get('bbox', None)
-
-                if category_id == type:
-                    block = {
-                        'bbox': bbox,
-                        'score': item.get('score'),
-                    }
-                    for col in extra_col:
-                        block[col] = item.get(col, None)
-                    blocks.append(block)
-        return blocks
-
-    def get_model_list(self, page_no):
-        return self.__model_list[page_no]
diff --git a/magic_pdf/model/model_list.py b/magic_pdf/model/model_list.py
deleted file mode 100644
index ec871d16981dc663d9ff635886ca847d6b16f6d5..0000000000000000000000000000000000000000
--- a/magic_pdf/model/model_list.py
+++ /dev/null
@@ -1,12 +0,0 @@
-class MODEL:
-    Paddle = "pp_structure_v2"
-    PEK = "pdf_extract_kit"
-
-
-class AtomicModel:
-    Layout = "layout"
-    MFD = "mfd"
-    MFR = "mfr"
-    OCR = "ocr"
-    Table = "table"
-    LangDetect = "langdetect"
diff --git a/magic_pdf/model/pdf_extract_kit.py b/magic_pdf/model/pdf_extract_kit.py
deleted file mode 100644
index f389a306565e20ff00dfef7c784bce30151583f1..0000000000000000000000000000000000000000
--- a/magic_pdf/model/pdf_extract_kit.py
+++ /dev/null
@@ -1,266 +0,0 @@
-# flake8: noqa
-import os
-import time
-
-import cv2
-import torch
-import yaml
-from loguru import logger
-
-os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
-
-from magic_pdf.config.constants import *
-from magic_pdf.model.model_list import AtomicModel
-from magic_pdf.model.sub_modules.model_init import AtomModelSingleton
-from magic_pdf.model.sub_modules.model_utils import (
-    clean_vram, crop_img, get_res_list_from_layout_res)
-from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.ocr_utils import (
-    get_adjusted_mfdetrec_res, get_ocr_result_list)
-
-
-class CustomPEKModel:
-
-    def __init__(self, ocr: bool = False, show_log: bool = False, **kwargs):
-        """
-        ======== model init ========
-        """
-        # 获取当前文件（即 pdf_extract_kit.py）的绝对路径
-        current_file_path = os.path.abspath(__file__)
-        # 获取当前文件所在的目录(model)
-        current_dir = os.path.dirname(current_file_path)
-        # 上一级目录(magic_pdf)
-        root_dir = os.path.dirname(current_dir)
-        # model_config目录
-        model_config_dir = os.path.join(root_dir, 'resources', 'model_config')
-        # 构建 model_configs.yaml 文件的完整路径
-        config_path = os.path.join(model_config_dir, 'model_configs.yaml')
-        with open(config_path, 'r', encoding='utf-8') as f:
-            self.configs = yaml.load(f, Loader=yaml.FullLoader)
-        # 初始化解析配置
-
-        # layout config
-        self.layout_config = kwargs.get('layout_config')
-        self.layout_model_name = self.layout_config.get(
-            'model', MODEL_NAME.DocLayout_YOLO
-        )
-
-        # formula config
-        self.formula_config = kwargs.get('formula_config')
-        self.mfd_model_name = self.formula_config.get(
-            'mfd_model', MODEL_NAME.YOLO_V8_MFD
-        )
-        self.mfr_model_name = self.formula_config.get(
-            'mfr_model', MODEL_NAME.UniMerNet_v2_Small
-        )
-        self.apply_formula = self.formula_config.get('enable', True)
-
-        # table config
-        self.table_config = kwargs.get('table_config')
-        self.apply_table = self.table_config.get('enable', False)
-        self.table_max_time = self.table_config.get('max_time', TABLE_MAX_TIME_VALUE)
-        self.table_model_name = self.table_config.get('model', MODEL_NAME.RAPID_TABLE)
-        self.table_sub_model_name = self.table_config.get('sub_model', None)
-
-        # ocr config
-        self.apply_ocr = ocr
-        self.lang = kwargs.get('lang', None)
-
-        logger.info(
-            'DocAnalysis init, this may take some times, layout_model: {}, apply_formula: {}, apply_ocr: {}, '
-            'apply_table: {}, table_model: {}, lang: {}'.format(
-                self.layout_model_name,
-                self.apply_formula,
-                self.apply_ocr,
-                self.apply_table,
-                self.table_model_name,
-                self.lang,
-            )
-        )
-        # 初始化解析方案
-        self.device = kwargs.get('device', 'cpu')
-
-        logger.info('using device: {}'.format(self.device))
-        models_dir = kwargs.get(
-            'models_dir', os.path.join(root_dir, 'resources', 'models')
-        )
-        logger.info('using models_dir: {}'.format(models_dir))
-
-        atom_model_manager = AtomModelSingleton()
-
-        # 初始化公式识别
-        if self.apply_formula:
-            # 初始化公式检测模型
-            self.mfd_model = atom_model_manager.get_atom_model(
-                atom_model_name=AtomicModel.MFD,
-                mfd_weights=str(
-                    os.path.join(
-                        models_dir, self.configs['weights'][self.mfd_model_name]
-                    )
-                ),
-                device=self.device,
-            )
-
-            # 初始化公式解析模型
-            mfr_weight_dir = str(
-                os.path.join(models_dir, self.configs['weights'][self.mfr_model_name])
-            )
-            mfr_cfg_path = str(os.path.join(model_config_dir, 'UniMERNet', 'demo.yaml'))
-
-            self.mfr_model = atom_model_manager.get_atom_model(
-                atom_model_name=AtomicModel.MFR,
-                mfr_weight_dir=mfr_weight_dir,
-                mfr_cfg_path=mfr_cfg_path,
-                device=self.device,
-            )
-
-        # 初始化layout模型
-        if self.layout_model_name == MODEL_NAME.LAYOUTLMv3:
-            self.layout_model = atom_model_manager.get_atom_model(
-                atom_model_name=AtomicModel.Layout,
-                layout_model_name=MODEL_NAME.LAYOUTLMv3,
-                layout_weights=str(
-                    os.path.join(
-                        models_dir, self.configs['weights'][self.layout_model_name]
-                    )
-                ),
-                layout_config_file=str(
-                    os.path.join(
-                        model_config_dir, 'layoutlmv3', 'layoutlmv3_base_inference.yaml'
-                    )
-                ),
-                device='cpu' if str(self.device).startswith("mps") else self.device,
-            )
-        elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
-            self.layout_model = atom_model_manager.get_atom_model(
-                atom_model_name=AtomicModel.Layout,
-                layout_model_name=MODEL_NAME.DocLayout_YOLO,
-                doclayout_yolo_weights=str(
-                    os.path.join(
-                        models_dir, self.configs['weights'][self.layout_model_name]
-                    )
-                ),
-                device=self.device,
-            )
-        # 初始化ocr
-        self.ocr_model = atom_model_manager.get_atom_model(
-            atom_model_name=AtomicModel.OCR,
-            ocr_show_log=show_log,
-            det_db_box_thresh=0.3,
-            lang=self.lang
-        )
-        # init table model
-        if self.apply_table:
-            table_model_dir = self.configs['weights'][self.table_model_name]
-            self.table_model = atom_model_manager.get_atom_model(
-                atom_model_name=AtomicModel.Table,
-                table_model_name=self.table_model_name,
-                table_model_path=str(os.path.join(models_dir, table_model_dir)),
-                table_max_time=self.table_max_time,
-                device=self.device,
-                ocr_engine=self.ocr_model,
-                table_sub_model_name=self.table_sub_model_name
-            )
-
-        logger.info('DocAnalysis init done!')
-
-    def __call__(self, image):
-        # layout检测
-        layout_start = time.time()
-        layout_res = []
-        if self.layout_model_name == MODEL_NAME.LAYOUTLMv3:
-            # layoutlmv3
-            layout_res = self.layout_model(image, ignore_catids=[])
-        elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
-            layout_res = self.layout_model.predict(image)
-
-        layout_cost = round(time.time() - layout_start, 2)
-        logger.info(f'layout detection time: {layout_cost}')
-
-        if self.apply_formula:
-            # 公式检测
-            mfd_start = time.time()
-            mfd_res = self.mfd_model.predict(image)
-            logger.info(f'mfd time: {round(time.time() - mfd_start, 2)}')
-
-            # 公式识别
-            mfr_start = time.time()
-            formula_list = self.mfr_model.predict(mfd_res, image)
-            layout_res.extend(formula_list)
-            mfr_cost = round(time.time() - mfr_start, 2)
-            logger.info(f'formula nums: {len(formula_list)}, mfr time: {mfr_cost}')
-
-        # 清理显存
-        clean_vram(self.device, vram_threshold=6)
-
-        # 从layout_res中获取ocr区域、表格区域、公式区域
-        ocr_res_list, table_res_list, single_page_mfdetrec_res = (
-            get_res_list_from_layout_res(layout_res)
-        )
-
-        # ocr识别
-        ocr_start = time.time()
-        # Process each area that requires OCR processing
-        for res in ocr_res_list:
-            new_image, useful_list = crop_img(res, image, crop_paste_x=50, crop_paste_y=50)
-            adjusted_mfdetrec_res = get_adjusted_mfdetrec_res(single_page_mfdetrec_res, useful_list)
-
-            # OCR recognition
-            new_image = cv2.cvtColor(new_image, cv2.COLOR_RGB2BGR)
-
-            if self.apply_ocr:
-                ocr_res = self.ocr_model.ocr(new_image, mfd_res=adjusted_mfdetrec_res)[0]
-            else:
-                ocr_res = self.ocr_model.ocr(new_image, mfd_res=adjusted_mfdetrec_res, rec=False)[0]
-
-            # Integration results
-            if ocr_res:
-                ocr_result_list = get_ocr_result_list(ocr_res, useful_list)
-                layout_res.extend(ocr_result_list)
-
-        ocr_cost = round(time.time() - ocr_start, 2)
-        if self.apply_ocr:
-            logger.info(f"ocr time: {ocr_cost}")
-        else:
-            logger.info(f"det time: {ocr_cost}")
-
-        # 表格识别 table recognition
-        if self.apply_table:
-            table_start = time.time()
-            for res in table_res_list:
-                new_image, _ = crop_img(res, image)
-                single_table_start_time = time.time()
-                html_code = None
-                if self.table_model_name == MODEL_NAME.STRUCT_EQTABLE:
-                    with torch.no_grad():
-                        table_result = self.table_model.predict(new_image, 'html')
-                        if len(table_result) > 0:
-                            html_code = table_result[0]
-                elif self.table_model_name == MODEL_NAME.TABLE_MASTER:
-                    html_code = self.table_model.img2html(new_image)
-                elif self.table_model_name == MODEL_NAME.RAPID_TABLE:
-                    html_code, table_cell_bboxes, logic_points, elapse = self.table_model.predict(
-                        new_image
-                    )
-                run_time = time.time() - single_table_start_time
-                if run_time > self.table_max_time:
-                    logger.warning(
-                        f'table recognition processing exceeds max time {self.table_max_time}s'
-                    )
-                # 判断是否返回正常
-                if html_code:
-                    expected_ending = html_code.strip().endswith(
-                        '</html>'
-                    ) or html_code.strip().endswith('</table>')
-                    if expected_ending:
-                        res['html'] = html_code
-                    else:
-                        logger.warning(
-                            'table recognition processing fails, not found expected HTML table end'
-                        )
-                else:
-                    logger.warning(
-                        'table recognition processing fails, not get html return'
-                    )
-            logger.info(f'table time: {round(time.time() - table_start, 2)}')
-
-        return layout_res
diff --git a/magic_pdf/model/pp_structure_v2.py b/magic_pdf/model/pp_structure_v2.py
deleted file mode 100644
index ad9f71ad47d0dc9513b4913f4b156e1ab7fb65b2..0000000000000000000000000000000000000000
--- a/magic_pdf/model/pp_structure_v2.py
+++ /dev/null
@@ -1,110 +0,0 @@
-import random
-
-from loguru import logger
-
-try:
-    from paddleocr import PPStructure
-except ImportError:
-    logger.error('paddleocr not installed, please install by "pip install magic-pdf[lite]"')
-    exit(1)
-
-
-def region_to_bbox(region):
-    x0 = region[0][0]
-    y0 = region[0][1]
-    x1 = region[2][0]
-    y1 = region[2][1]
-    return [x0, y0, x1, y1]
-
-
-class CustomPaddleModel:
-    def __init__(self,
-                 ocr: bool = False,
-                 show_log: bool = False,
-                 lang=None,
-                 det_db_box_thresh=0.3,
-                 use_dilation=True,
-                 det_db_unclip_ratio=1.8
-    ):
-        if lang is not None:
-            self.model = PPStructure(table=False,
-                                     ocr=True,
-                                     show_log=show_log,
-                                     lang=lang,
-                                     det_db_box_thresh=det_db_box_thresh,
-                                     use_dilation=use_dilation,
-                                     det_db_unclip_ratio=det_db_unclip_ratio,
-            )
-        else:
-            self.model = PPStructure(table=False,
-                                     ocr=True,
-                                     show_log=show_log,
-                                     det_db_box_thresh=det_db_box_thresh,
-                                     use_dilation=use_dilation,
-                                     det_db_unclip_ratio=det_db_unclip_ratio,
-            )
-
-    def __call__(self, img):
-        try:
-            import cv2
-        except ImportError:
-            logger.error("opencv-python not installed, please install by pip.")
-            exit(1)
-        # 将RGB图片转换为BGR格式适配paddle
-        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
-        result = self.model(img)
-        spans = []
-        for line in result:
-            line.pop("img")
-            """
-            为paddle输出适配type no.    
-            title: 0 # 标题
-            text: 1 # 文本
-            header: 2 # abandon
-            footer: 2 # abandon
-            reference: 1 # 文本 or abandon
-            equation: 8 # 行间公式 block
-            equation: 14 # 行间公式 text
-            figure: 3 # 图片
-            figure_caption: 4 # 图片描述
-            table: 5 # 表格
-            table_caption: 6 # 表格描述
-            """
-            if line["type"] == "title":
-                line["category_id"] = 0
-            elif line["type"] in ["text", "reference"]:
-                line["category_id"] = 1
-            elif line["type"] == "figure":
-                line["category_id"] = 3
-            elif line["type"] == "figure_caption":
-                line["category_id"] = 4
-            elif line["type"] == "table":
-                line["category_id"] = 5
-            elif line["type"] == "table_caption":
-                line["category_id"] = 6
-            elif line["type"] == "equation":
-                line["category_id"] = 8
-            elif line["type"] in ["header", "footer"]:
-                line["category_id"] = 2
-            else:
-                logger.warning(f"unknown type: {line['type']}")
-
-            # 兼容不输出score的paddleocr版本
-            if line.get("score") is None:
-                line["score"] = 0.5 + random.random() * 0.5
-
-            res = line.pop("res", None)
-            if res is not None and len(res) > 0:
-                for span in res:
-                    new_span = {
-                        "category_id": 15,
-                        "bbox": region_to_bbox(span["text_region"]),
-                        "score": span["confidence"],
-                        "text": span["text"],
-                    }
-                    spans.append(new_span)
-
-        if len(spans) > 0:
-            result.extend(spans)
-
-        return result
diff --git a/magic_pdf/model/sub_modules/__init__.py b/magic_pdf/model/sub_modules/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/language_detection/utils.py b/magic_pdf/model/sub_modules/language_detection/utils.py
deleted file mode 100644
index 20aefaf6a870edb0247e094cd583314d35bbb5d2..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/language_detection/utils.py
+++ /dev/null
@@ -1,80 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
-import os
-from pathlib import Path
-
-import yaml
-os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
-
-from magic_pdf.config.constants import MODEL_NAME
-from magic_pdf.data.utils import load_images_from_pdf
-from magic_pdf.libs.config_reader import get_local_models_dir, get_device
-from magic_pdf.libs.pdf_check import extract_pages
-from magic_pdf.model.model_list import AtomicModel
-from magic_pdf.model.sub_modules.model_init import AtomModelSingleton
-
-
-def get_model_config():
-    local_models_dir = get_local_models_dir()
-    device = get_device()
-    current_file_path = os.path.abspath(__file__)
-    root_dir = Path(current_file_path).parents[3]
-    model_config_dir = os.path.join(root_dir, 'resources', 'model_config')
-    config_path = os.path.join(model_config_dir, 'model_configs.yaml')
-    with open(config_path, 'r', encoding='utf-8') as f:
-        configs = yaml.load(f, Loader=yaml.FullLoader)
-    return root_dir, local_models_dir, device, configs
-
-
-def get_text_images(simple_images):
-    _, local_models_dir, device, configs = get_model_config()
-    atom_model_manager = AtomModelSingleton()
-    temp_layout_model = atom_model_manager.get_atom_model(
-        atom_model_name=AtomicModel.Layout,
-        layout_model_name=MODEL_NAME.DocLayout_YOLO,
-        doclayout_yolo_weights=str(
-            os.path.join(
-                local_models_dir, configs['weights'][MODEL_NAME.DocLayout_YOLO]
-            )
-        ),
-        device=device,
-    )
-    text_images = []
-    for simple_image in simple_images:
-        image = simple_image['img']
-        layout_res = temp_layout_model.predict(image)
-        # 给textblock截图
-        for res in layout_res:
-            if res['category_id'] in [1]:
-                x1, y1, _, _, x2, y2, _, _ = res['poly']
-                # 初步清洗（宽和高都小于100）
-                if x2 - x1 < 100 and y2 - y1 < 100:
-                    continue
-                text_images.append(image[y1:y2, x1:x2])
-    return text_images
-
-
-def auto_detect_lang(pdf_bytes: bytes):
-    sample_docs = extract_pages(pdf_bytes)
-    sample_pdf_bytes = sample_docs.tobytes()
-    simple_images = load_images_from_pdf(sample_pdf_bytes, dpi=200)
-    text_images = get_text_images(simple_images)
-    langdetect_model = model_init(MODEL_NAME.YOLO_V11_LangDetect)
-    lang = langdetect_model.do_detect(text_images)
-    return lang
-
-
-def model_init(model_name: str):
-    atom_model_manager = AtomModelSingleton()
-
-    if model_name == MODEL_NAME.YOLO_V11_LangDetect:
-        root_dir, _, device, _ = get_model_config()
-        model = atom_model_manager.get_atom_model(
-            atom_model_name=AtomicModel.LangDetect,
-            langdetect_model_name=MODEL_NAME.YOLO_V11_LangDetect,
-            langdetect_model_weight=str(os.path.join(root_dir, 'resources', 'yolov11-langdetect', 'yolo_v11_ft.pt')),
-            device=device,
-        )
-    else:
-        raise ValueError(f"model_name {model_name} not found")
-    return model
-
diff --git a/magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py b/magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py
deleted file mode 100644
index 28cdb17cd0f06aa8edfd3037e680edacec63a90e..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/language_detection/yolov11/YOLOv11.py
+++ /dev/null
@@ -1,145 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
-import time
-from collections import Counter
-from uuid import uuid4
-import cv2
-import numpy as np
-import torch
-from loguru import logger
-from ultralytics import YOLO
-
-language_dict = {
-    "ch": "中文简体",
-    "en": "英语",
-    "japan": "日语",
-    "korean": "韩语",
-    "fr": "法语",
-    "german": "德语",
-    "ar": "阿拉伯语",
-    "ru": "俄语"
-}
-
-
-def split_images(image, result_images=None):
-    """
-    对输入文件夹内的图片进行处理,若图片竖向(y方向)分辨率超过400,则进行拆分，
-    每次平分图片,直至拆分出的图片竖向分辨率都满足400以下,将处理后的图片(拆分后的子图片)保存到输出文件夹。
-    避免保存因裁剪区域超出图片范围导致出现的无效黑色图片部分。
-    """
-    if result_images is None:
-        result_images = []
-
-    height, width = image.shape[:2]
-    long_side = max(width, height)  # 获取较长边长度
-
-    if long_side <= 400:
-        result_images.append(image)
-        return result_images
-
-    new_long_side = long_side // 2
-    sub_images = []
-
-    if width >= height:  # 如果宽度是较长边
-        for x in range(0, width, new_long_side):
-            # 判断裁剪区域是否超出图片范围，如果超出则不进行裁剪保存操作
-            if x + new_long_side > width:
-                continue
-            sub_image = image[0:height, x:x + new_long_side]
-            sub_images.append(sub_image)
-    else:  # 如果高度是较长边
-        for y in range(0, height, new_long_side):
-            # 判断裁剪区域是否超出图片范围，如果超出则不进行裁剪保存操作
-            if y + new_long_side > height:
-                continue
-            sub_image = image[y:y + new_long_side, 0:width]
-            sub_images.append(sub_image)
-
-    for sub_image in sub_images:
-        split_images(sub_image, result_images)
-
-    return result_images
-
-
-def resize_images_to_224(image):
-    """
-    若分辨率小于224则用黑色背景补齐到224*224大小,若大于等于224则调整为224*224大小。
-    Works directly with NumPy arrays.
-    """
-    try:
-        height, width = image.shape[:2]
-
-        if width < 224 or height < 224:
-            # Create black background
-            new_image = np.zeros((224, 224, 3), dtype=np.uint8)
-            # Calculate paste position (ensure they're not negative)
-            paste_x = max(0, (224 - width) // 2)
-            paste_y = max(0, (224 - height) // 2)
-            # Make sure we don't exceed the boundaries of new_image
-            paste_width = min(width, 224)
-            paste_height = min(height, 224)
-            # Paste original image onto black background
-            new_image[paste_y:paste_y + paste_height, paste_x:paste_x + paste_width] = image[:paste_height, :paste_width]
-            image = new_image
-        else:
-            # Resize using cv2
-            image = cv2.resize(image, (224, 224), interpolation=cv2.INTER_LANCZOS4)
-
-        return image
-    except Exception as e:
-        logger.exception(f"Error in resize_images_to_224: {e}")
-        return None
-
-
-class YOLOv11LangDetModel(object):
-    def __init__(self, langdetect_model_weight, device):
-
-        self.model = YOLO(langdetect_model_weight)
-
-        if str(device).startswith("npu"):
-            self.device = torch.device(device)
-        else:
-            self.device = device
-    def do_detect(self, images: list):
-        all_images = []
-        for image in images:
-            height, width = image.shape[:2]
-            if width < 100 and height < 100:
-                continue
-            temp_images = split_images(image)
-            for temp_image in temp_images:
-                all_images.append(resize_images_to_224(temp_image))
-        # langdetect_start = time.time()
-        images_lang_res = self.batch_predict(all_images, batch_size=256)
-        # logger.info(f"image number of langdetect: {len(images_lang_res)}, langdetect time: {round(time.time() - langdetect_start, 2)}")
-        if len(images_lang_res) > 0:
-            count_dict = Counter(images_lang_res)
-            language = max(count_dict, key=count_dict.get)
-        else:
-            language = None
-        return language
-
-    def predict(self, image):
-        results = self.model.predict(image, verbose=False, device=self.device)
-        predicted_class_id = int(results[0].probs.top1)
-        predicted_class_name = self.model.names[predicted_class_id]
-        return predicted_class_name
-
-
-    def batch_predict(self, images: list, batch_size: int) -> list:
-        images_lang_res = []
-
-        for index in range(0, len(images), batch_size):
-            lang_res = [
-                image_res.cpu()
-                for image_res in self.model.predict(
-                    images[index: index + batch_size],
-                    verbose = False,
-                    device=self.device,
-                )
-            ]
-            for res in lang_res:
-                predicted_class_id = int(res.probs.top1)
-                predicted_class_name = self.model.names[predicted_class_id]
-                images_lang_res.append(predicted_class_name)
-
-        return images_lang_res
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/layout/__init__.py b/magic_pdf/model/sub_modules/layout/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py b/magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py
deleted file mode 100644
index 2c7a23a37e7e0dc19f364db743e6947e8349fe59..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/doclayout_yolo/DocLayoutYOLO.py
+++ /dev/null
@@ -1,64 +0,0 @@
-from doclayout_yolo import YOLOv10
-from tqdm import tqdm
-
-
-class DocLayoutYOLOModel(object):
-    def __init__(self, weight, device):
-        self.model = YOLOv10(weight)
-        self.device = device
-
-    def predict(self, image):
-        layout_res = []
-        doclayout_yolo_res = self.model.predict(
-            image,
-            imgsz=1280,
-            conf=0.10,
-            iou=0.45,
-            verbose=False, device=self.device
-        )[0]
-        for xyxy, conf, cla in zip(
-            doclayout_yolo_res.boxes.xyxy.cpu(),
-            doclayout_yolo_res.boxes.conf.cpu(),
-            doclayout_yolo_res.boxes.cls.cpu(),
-        ):
-            xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
-            new_item = {
-                "category_id": int(cla.item()),
-                "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-                "score": round(float(conf.item()), 3),
-            }
-            layout_res.append(new_item)
-        return layout_res
-
-    def batch_predict(self, images: list, batch_size: int) -> list:
-        images_layout_res = []
-        # for index in range(0, len(images), batch_size):
-        for index in tqdm(range(0, len(images), batch_size), desc="Layout Predict"):
-            doclayout_yolo_res = [
-                image_res.cpu()
-                for image_res in self.model.predict(
-                    images[index : index + batch_size],
-                    imgsz=1280,
-                    conf=0.10,
-                    iou=0.45,
-                    verbose=False,
-                    device=self.device,
-                )
-            ]
-            for image_res in doclayout_yolo_res:
-                layout_res = []
-                for xyxy, conf, cla in zip(
-                    image_res.boxes.xyxy,
-                    image_res.boxes.conf,
-                    image_res.boxes.cls,
-                ):
-                    xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
-                    new_item = {
-                        "category_id": int(cla.item()),
-                        "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-                        "score": round(float(conf.item()), 3),
-                    }
-                    layout_res.append(new_item)
-                images_layout_res.append(layout_res)
-
-        return images_layout_res
diff --git a/magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py b/magic_pdf/model/sub_modules/layout/doclayout_yolo/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py
deleted file mode 100644
index 5364f862e78205c65ffe3fdeba6aef09da148c39..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/backbone.py
+++ /dev/null
@@ -1,179 +0,0 @@
-# --------------------------------------------------------------------------------
-# VIT: Multi-Path Vision Transformer for Dense Prediction
-# Copyright (c) 2022 Electronics and Telecommunications Research Institute (ETRI).
-# All Rights Reserved.
-# Written by Youngwan Lee
-# This source code is licensed(Dual License(GPL3.0 & Commercial)) under the license found in the
-# LICENSE file in the root directory of this source tree.
-# --------------------------------------------------------------------------------
-# References:
-# timm: https://github.com/rwightman/pytorch-image-models/tree/master/timm
-# CoaT: https://github.com/mlpc-ucsd/CoaT
-# --------------------------------------------------------------------------------
-
-
-import torch
-
-from detectron2.layers import (
-    ShapeSpec,
-)
-from detectron2.modeling import Backbone, BACKBONE_REGISTRY, FPN
-from detectron2.modeling.backbone.fpn import LastLevelP6P7, LastLevelMaxPool
-
-from .beit import beit_base_patch16, dit_base_patch16, dit_large_patch16, beit_large_patch16
-from .deit import deit_base_patch16, mae_base_patch16
-from .layoutlmft.models.layoutlmv3 import LayoutLMv3Model
-from transformers import AutoConfig
-
-__all__ = [
-    "build_vit_fpn_backbone",
-]
-
-
-class VIT_Backbone(Backbone):
-    """
-    Implement VIT backbone.
-    """
-
-    def __init__(self, name, out_features, drop_path, img_size, pos_type, model_kwargs,
-                 config_path=None, image_only=False, cfg=None):
-        super().__init__()
-        self._out_features = out_features
-        if 'base' in name:
-            self._out_feature_strides = {"layer3": 4, "layer5": 8, "layer7": 16, "layer11": 32}
-            self._out_feature_channels = {"layer3": 768, "layer5": 768, "layer7": 768, "layer11": 768}
-        else:
-            self._out_feature_strides = {"layer7": 4, "layer11": 8, "layer15": 16, "layer23": 32}
-            self._out_feature_channels = {"layer7": 1024, "layer11": 1024, "layer15": 1024, "layer23": 1024}
-
-        if name == 'beit_base_patch16':
-            model_func = beit_base_patch16
-        elif name == 'dit_base_patch16':
-            model_func = dit_base_patch16
-        elif name == "deit_base_patch16":
-            model_func = deit_base_patch16
-        elif name == "mae_base_patch16":
-            model_func = mae_base_patch16
-        elif name == "dit_large_patch16":
-            model_func = dit_large_patch16
-        elif name == "beit_large_patch16":
-            model_func = beit_large_patch16
-
-        if 'beit' in name or 'dit' in name:
-            if pos_type == "abs":
-                self.backbone = model_func(img_size=img_size,
-                                           out_features=out_features,
-                                           drop_path_rate=drop_path,
-                                           use_abs_pos_emb=True,
-                                           **model_kwargs)
-            elif pos_type == "shared_rel":
-                self.backbone = model_func(img_size=img_size,
-                                           out_features=out_features,
-                                           drop_path_rate=drop_path,
-                                           use_shared_rel_pos_bias=True,
-                                           **model_kwargs)
-            elif pos_type == "rel":
-                self.backbone = model_func(img_size=img_size,
-                                           out_features=out_features,
-                                           drop_path_rate=drop_path,
-                                           use_rel_pos_bias=True,
-                                           **model_kwargs)
-            else:
-                raise ValueError()
-        elif "layoutlmv3" in name:
-            config = AutoConfig.from_pretrained(config_path)
-            # disable relative bias as DiT
-            config.has_spatial_attention_bias = False
-            config.has_relative_attention_bias = False
-            self.backbone = LayoutLMv3Model(config, detection=True,
-                                               out_features=out_features, image_only=image_only)
-        else:
-            self.backbone = model_func(img_size=img_size,
-                                       out_features=out_features,
-                                       drop_path_rate=drop_path,
-                                       **model_kwargs)
-        self.name = name
-
-    def forward(self, x):
-        """
-        Args:
-            x: Tensor of shape (N,C,H,W). H, W must be a multiple of ``self.size_divisibility``.
-
-        Returns:
-            dict[str->Tensor]: names and the corresponding features
-        """
-        if "layoutlmv3" in self.name:
-            return self.backbone.forward(
-                input_ids=x["input_ids"] if "input_ids" in x else None,
-                bbox=x["bbox"] if "bbox" in x else None,
-                images=x["images"] if "images" in x else None,
-                attention_mask=x["attention_mask"] if "attention_mask" in x else None,
-                # output_hidden_states=True,
-            )
-        assert x.dim() == 4, f"VIT takes an input of shape (N, C, H, W). Got {x.shape} instead!"
-        return self.backbone.forward_features(x)
-
-    def output_shape(self):
-        return {
-            name: ShapeSpec(
-                channels=self._out_feature_channels[name], stride=self._out_feature_strides[name]
-            )
-            for name in self._out_features
-        }
-
-
-def build_VIT_backbone(cfg):
-    """
-    Create a VIT instance from config.
-
-    Args:
-        cfg: a detectron2 CfgNode
-
-    Returns:
-        A VIT backbone instance.
-    """
-    # fmt: off
-    name = cfg.MODEL.VIT.NAME
-    out_features = cfg.MODEL.VIT.OUT_FEATURES
-    drop_path = cfg.MODEL.VIT.DROP_PATH
-    img_size = cfg.MODEL.VIT.IMG_SIZE
-    pos_type = cfg.MODEL.VIT.POS_TYPE
-
-    model_kwargs = eval(str(cfg.MODEL.VIT.MODEL_KWARGS).replace("`", ""))
-
-    if 'layoutlmv3' in name:
-        if cfg.MODEL.CONFIG_PATH != '':
-            config_path = cfg.MODEL.CONFIG_PATH
-        else:
-            config_path = cfg.MODEL.WEIGHTS.replace('pytorch_model.bin', '')  # layoutlmv3 pre-trained models
-            config_path = config_path.replace('model_final.pth', '')  # detection fine-tuned models
-    else:
-        config_path = None
-
-    return VIT_Backbone(name, out_features, drop_path, img_size, pos_type, model_kwargs,
-                        config_path=config_path, image_only=cfg.MODEL.IMAGE_ONLY, cfg=cfg)
-
-
-@BACKBONE_REGISTRY.register()
-def build_vit_fpn_backbone(cfg, input_shape: ShapeSpec):
-    """
-    Create a VIT w/ FPN backbone.
-
-    Args:
-        cfg: a detectron2 CfgNode
-
-    Returns:
-        backbone (Backbone): backbone module, must be a subclass of :class:`Backbone`.
-    """
-    bottom_up = build_VIT_backbone(cfg)
-    in_features = cfg.MODEL.FPN.IN_FEATURES
-    out_channels = cfg.MODEL.FPN.OUT_CHANNELS
-    backbone = FPN(
-        bottom_up=bottom_up,
-        in_features=in_features,
-        out_channels=out_channels,
-        norm=cfg.MODEL.FPN.NORM,
-        top_block=LastLevelMaxPool(),
-        fuse_type=cfg.MODEL.FPN.FUSE_TYPE,
-    )
-    return backbone
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py
deleted file mode 100644
index 03d4fabdc7816f19a8810e3c443643bc9e53e6b9..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/beit.py
+++ /dev/null
@@ -1,671 +0,0 @@
-""" Vision Transformer (ViT) in PyTorch
-
-A PyTorch implement of Vision Transformers as described in
-'An Image Is Worth 16 x 16 Words: Transformers for Image Recognition at Scale' - https://arxiv.org/abs/2010.11929
-
-The official jax code is released and available at https://github.com/google-research/vision_transformer
-
-Status/TODO:
-* Models updated to be compatible with official impl. Args added to support backward compat for old PyTorch weights.
-* Weights ported from official jax impl for 384x384 base and small models, 16x16 and 32x32 patches.
-* Trained (supervised on ImageNet-1k) my custom 'small' patch model to 77.9, 'base' to 79.4 top-1 with this code.
-* Hopefully find time and GPUs for SSL or unsupervised pretraining on OpenImages w/ ImageNet fine-tune in future.
-
-Acknowledgments:
-* The paper authors for releasing code and weights, thanks!
-* I fixed my class token impl based on Phil Wang's https://github.com/lucidrains/vit-pytorch ... check it out
-for some einops/einsum fun
-* Simple transformer style inspired by Andrej Karpathy's https://github.com/karpathy/minGPT
-* Bert reference code checks against Huggingface Transformers and Tensorflow Bert
-
-Hacked together by / Copyright 2020 Ross Wightman
-"""
-import warnings
-import math
-import torch
-from functools import partial
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint as checkpoint
-from timm.models.layers import drop_path, to_2tuple, trunc_normal_
-
-
-def _cfg(url='', **kwargs):
-    return {
-        'url': url,
-        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
-        'crop_pct': .9, 'interpolation': 'bicubic',
-        'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5),
-        **kwargs
-    }
-
-
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-
-    def extra_repr(self) -> str:
-        return 'p={}'.format(self.drop_prob)
-
-
-class Mlp(nn.Module):
-    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        # x = self.drop(x)
-        # commit this for the orignal BERT implement
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-class Attention(nn.Module):
-    def __init__(
-            self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0.,
-            proj_drop=0., window_size=None, attn_head_dim=None):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        if attn_head_dim is not None:
-            head_dim = attn_head_dim
-        all_head_dim = head_dim * self.num_heads
-        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
-        self.scale = qk_scale or head_dim ** -0.5
-
-        self.qkv = nn.Linear(dim, all_head_dim * 3, bias=False)
-        if qkv_bias:
-            self.q_bias = nn.Parameter(torch.zeros(all_head_dim))
-            self.v_bias = nn.Parameter(torch.zeros(all_head_dim))
-        else:
-            self.q_bias = None
-            self.v_bias = None
-
-        if window_size:
-            self.window_size = window_size
-            self.num_relative_distance = (2 * window_size[0] - 1) * (2 * window_size[1] - 1) + 3
-            self.relative_position_bias_table = nn.Parameter(
-                torch.zeros(self.num_relative_distance, num_heads))  # 2*Wh-1 * 2*Ww-1, nH
-            # cls to token & token 2 cls & cls to cls
-
-            # get pair-wise relative position index for each token inside the window
-            coords_h = torch.arange(window_size[0])
-            coords_w = torch.arange(window_size[1])
-            coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-            coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-            relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-            relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-            relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
-            relative_coords[:, :, 1] += window_size[1] - 1
-            relative_coords[:, :, 0] *= 2 * window_size[1] - 1
-            relative_position_index = \
-                torch.zeros(size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype)
-            relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-            relative_position_index[0, 0:] = self.num_relative_distance - 3
-            relative_position_index[0:, 0] = self.num_relative_distance - 2
-            relative_position_index[0, 0] = self.num_relative_distance - 1
-
-            self.register_buffer("relative_position_index", relative_position_index)
-
-            # trunc_normal_(self.relative_position_bias_table, std=.0)
-        else:
-            self.window_size = None
-            self.relative_position_bias_table = None
-            self.relative_position_index = None
-
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(all_head_dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-
-    def forward(self, x, rel_pos_bias=None, training_window_size=None):
-        B, N, C = x.shape
-        qkv_bias = None
-        if self.q_bias is not None:
-            qkv_bias = torch.cat((self.q_bias, torch.zeros_like(self.v_bias, requires_grad=False), self.v_bias))
-        # qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
-        qkv = F.linear(input=x, weight=self.qkv.weight, bias=qkv_bias)
-        qkv = qkv.reshape(B, N, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
-        q, k, v = qkv[0], qkv[1], qkv[2]  # make torchscript happy (cannot use tensor as tuple)
-
-        q = q * self.scale
-        attn = (q @ k.transpose(-2, -1))
-
-        if self.relative_position_bias_table is not None:
-            if training_window_size == self.window_size:
-                relative_position_bias = \
-                    self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
-                        self.window_size[0] * self.window_size[1] + 1,
-                        self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
-                relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-                attn = attn + relative_position_bias.unsqueeze(0)
-            else:
-                training_window_size = tuple(training_window_size.tolist())
-                new_num_relative_distance = (2 * training_window_size[0] - 1) * (2 * training_window_size[1] - 1) + 3
-                # new_num_relative_dis 为 所有可能的相对位置选项，包含cls-cls，tok-cls，与cls-tok
-                new_relative_position_bias_table = F.interpolate(
-                    self.relative_position_bias_table[:-3, :].permute(1, 0).view(1, self.num_heads,
-                                                                                 2 * self.window_size[0] - 1,
-                                                                                 2 * self.window_size[1] - 1),
-                    size=(2 * training_window_size[0] - 1, 2 * training_window_size[1] - 1), mode='bicubic',
-                    align_corners=False)
-                new_relative_position_bias_table = new_relative_position_bias_table.view(self.num_heads,
-                                                                                         new_num_relative_distance - 3).permute(
-                    1, 0)
-                new_relative_position_bias_table = torch.cat(
-                    [new_relative_position_bias_table, self.relative_position_bias_table[-3::]], dim=0)
-
-                # get pair-wise relative position index for each token inside the window
-                coords_h = torch.arange(training_window_size[0])
-                coords_w = torch.arange(training_window_size[1])
-                coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-                coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-                relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-                relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-                relative_coords[:, :, 0] += training_window_size[0] - 1  # shift to start from 0
-                relative_coords[:, :, 1] += training_window_size[1] - 1
-                relative_coords[:, :, 0] *= 2 * training_window_size[1] - 1
-                relative_position_index = \
-                    torch.zeros(size=(training_window_size[0] * training_window_size[1] + 1,) * 2,
-                                dtype=relative_coords.dtype)
-                relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-                relative_position_index[0, 0:] = new_num_relative_distance - 3
-                relative_position_index[0:, 0] = new_num_relative_distance - 2
-                relative_position_index[0, 0] = new_num_relative_distance - 1
-
-                relative_position_bias = \
-                    new_relative_position_bias_table[relative_position_index.view(-1)].view(
-                        training_window_size[0] * training_window_size[1] + 1,
-                        training_window_size[0] * training_window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
-                relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-                attn = attn + relative_position_bias.unsqueeze(0)
-
-        if rel_pos_bias is not None:
-            attn = attn + rel_pos_bias
-
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-
-        x = (attn @ v).transpose(1, 2).reshape(B, N, -1)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-
-class Block(nn.Module):
-
-    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
-                 drop_path=0., init_values=None, act_layer=nn.GELU, norm_layer=nn.LayerNorm,
-                 window_size=None, attn_head_dim=None):
-        super().__init__()
-        self.norm1 = norm_layer(dim)
-        self.attn = Attention(
-            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale,
-            attn_drop=attn_drop, proj_drop=drop, window_size=window_size, attn_head_dim=attn_head_dim)
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
-
-        if init_values is not None:
-            self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
-            self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
-        else:
-            self.gamma_1, self.gamma_2 = None, None
-
-    def forward(self, x, rel_pos_bias=None, training_window_size=None):
-        if self.gamma_1 is None:
-            x = x + self.drop_path(
-                self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias, training_window_size=training_window_size))
-            x = x + self.drop_path(self.mlp(self.norm2(x)))
-        else:
-            x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x), rel_pos_bias=rel_pos_bias,
-                                                            training_window_size=training_window_size))
-            x = x + self.drop_path(self.gamma_2 * self.mlp(self.norm2(x)))
-        return x
-
-
-class PatchEmbed(nn.Module):
-    """ Image to Patch Embedding
-    """
-
-    def __init__(self, img_size=[224, 224], patch_size=16, in_chans=3, embed_dim=768):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
-        self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
-        self.num_patches_w = self.patch_shape[0]
-        self.num_patches_h = self.patch_shape[1]
-        # the so-called patch_shape is the patch shape during pre-training
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.num_patches = num_patches
-
-        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-
-    def forward(self, x, position_embedding=None, **kwargs):
-        # FIXME look at relaxing size constraints
-        # assert H == self.img_size[0] and W == self.img_size[1], \
-        #     f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
-        x = self.proj(x)
-        Hp, Wp = x.shape[2], x.shape[3]
-
-        if position_embedding is not None:
-            # interpolate the position embedding to the corresponding size
-            position_embedding = position_embedding.view(1, self.patch_shape[0], self.patch_shape[1], -1).permute(0, 3,
-                                                                                                                  1, 2)
-            position_embedding = F.interpolate(position_embedding, size=(Hp, Wp), mode='bicubic')
-            x = x + position_embedding
-
-        x = x.flatten(2).transpose(1, 2)
-        return x, (Hp, Wp)
-
-
-class HybridEmbed(nn.Module):
-    """ CNN Feature Map Embedding
-    Extract feature map from CNN, flatten, project to embedding dim.
-    """
-
-    def __init__(self, backbone, img_size=[224, 224], feature_size=None, in_chans=3, embed_dim=768):
-        super().__init__()
-        assert isinstance(backbone, nn.Module)
-        img_size = to_2tuple(img_size)
-        self.img_size = img_size
-        self.backbone = backbone
-        if feature_size is None:
-            with torch.no_grad():
-                # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature
-                # map for all networks, the feature metadata has reliable channel and stride info, but using
-                # stride to calc feature dim requires info about padding of each stage that isn't captured.
-                training = backbone.training
-                if training:
-                    backbone.eval()
-                o = self.backbone(torch.zeros(1, in_chans, img_size[0], img_size[1]))[-1]
-                feature_size = o.shape[-2:]
-                feature_dim = o.shape[1]
-                backbone.train(training)
-        else:
-            feature_size = to_2tuple(feature_size)
-            feature_dim = self.backbone.feature_info.channels()[-1]
-        self.num_patches = feature_size[0] * feature_size[1]
-        self.proj = nn.Linear(feature_dim, embed_dim)
-
-    def forward(self, x):
-        x = self.backbone(x)[-1]
-        x = x.flatten(2).transpose(1, 2)
-        x = self.proj(x)
-        return x
-
-
-class RelativePositionBias(nn.Module):
-
-    def __init__(self, window_size, num_heads):
-        super().__init__()
-        self.window_size = window_size
-        self.num_heads = num_heads
-        self.num_relative_distance = (2 * window_size[0] - 1) * (2 * window_size[1] - 1) + 3
-        self.relative_position_bias_table = nn.Parameter(
-            torch.zeros(self.num_relative_distance, num_heads))  # 2*Wh-1 * 2*Ww-1, nH
-        # cls to token & token 2 cls & cls to cls
-
-        # get pair-wise relative position index for each token inside the window
-        coords_h = torch.arange(window_size[0])
-        coords_w = torch.arange(window_size[1])
-        coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-        coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-        relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-        relative_coords[:, :, 0] += window_size[0] - 1  # shift to start from 0
-        relative_coords[:, :, 1] += window_size[1] - 1
-        relative_coords[:, :, 0] *= 2 * window_size[1] - 1
-        relative_position_index = \
-            torch.zeros(size=(window_size[0] * window_size[1] + 1,) * 2, dtype=relative_coords.dtype)
-        relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-        relative_position_index[0, 0:] = self.num_relative_distance - 3
-        relative_position_index[0:, 0] = self.num_relative_distance - 2
-        relative_position_index[0, 0] = self.num_relative_distance - 1
-
-        self.register_buffer("relative_position_index", relative_position_index)
-
-        # trunc_normal_(self.relative_position_bias_table, std=.02)
-
-    def forward(self, training_window_size):
-        if training_window_size == self.window_size:
-            relative_position_bias = \
-                self.relative_position_bias_table[self.relative_position_index.view(-1)].view(
-                    self.window_size[0] * self.window_size[1] + 1,
-                    self.window_size[0] * self.window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
-            relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-        else:
-            training_window_size = tuple(training_window_size.tolist())
-            new_num_relative_distance = (2 * training_window_size[0] - 1) * (2 * training_window_size[1] - 1) + 3
-            # new_num_relative_dis 为 所有可能的相对位置选项，包含cls-cls，tok-cls，与cls-tok
-            new_relative_position_bias_table = F.interpolate(
-                self.relative_position_bias_table[:-3, :].permute(1, 0).view(1, self.num_heads,
-                                                                             2 * self.window_size[0] - 1,
-                                                                             2 * self.window_size[1] - 1),
-                size=(2 * training_window_size[0] - 1, 2 * training_window_size[1] - 1), mode='bicubic',
-                align_corners=False)
-            new_relative_position_bias_table = new_relative_position_bias_table.view(self.num_heads,
-                                                                                     new_num_relative_distance - 3).permute(
-                1, 0)
-            new_relative_position_bias_table = torch.cat(
-                [new_relative_position_bias_table, self.relative_position_bias_table[-3::]], dim=0)
-
-            # get pair-wise relative position index for each token inside the window
-            coords_h = torch.arange(training_window_size[0])
-            coords_w = torch.arange(training_window_size[1])
-            coords = torch.stack(torch.meshgrid([coords_h, coords_w]))  # 2, Wh, Ww
-            coords_flatten = torch.flatten(coords, 1)  # 2, Wh*Ww
-            relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]  # 2, Wh*Ww, Wh*Ww
-            relative_coords = relative_coords.permute(1, 2, 0).contiguous()  # Wh*Ww, Wh*Ww, 2
-            relative_coords[:, :, 0] += training_window_size[0] - 1  # shift to start from 0
-            relative_coords[:, :, 1] += training_window_size[1] - 1
-            relative_coords[:, :, 0] *= 2 * training_window_size[1] - 1
-            relative_position_index = \
-                torch.zeros(size=(training_window_size[0] * training_window_size[1] + 1,) * 2,
-                            dtype=relative_coords.dtype)
-            relative_position_index[1:, 1:] = relative_coords.sum(-1)  # Wh*Ww, Wh*Ww
-            relative_position_index[0, 0:] = new_num_relative_distance - 3
-            relative_position_index[0:, 0] = new_num_relative_distance - 2
-            relative_position_index[0, 0] = new_num_relative_distance - 1
-
-            relative_position_bias = \
-                new_relative_position_bias_table[relative_position_index.view(-1)].view(
-                    training_window_size[0] * training_window_size[1] + 1,
-                    training_window_size[0] * training_window_size[1] + 1, -1)  # Wh*Ww,Wh*Ww,nH
-            relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()  # nH, Wh*Ww, Wh*Ww
-
-        return relative_position_bias
-
-
-class BEiT(nn.Module):
-    """ Vision Transformer with support for patch or hybrid CNN input stage
-    """
-
-    def __init__(self,
-                 img_size=[224, 224],
-                 patch_size=16,
-                 in_chans=3,
-                 num_classes=80,
-                 embed_dim=768,
-                 depth=12,
-                 num_heads=12,
-                 mlp_ratio=4.,
-                 qkv_bias=False,
-                 qk_scale=None,
-                 drop_rate=0.,
-                 attn_drop_rate=0.,
-                 drop_path_rate=0.,
-                 hybrid_backbone=None,
-                 norm_layer=None,
-                 init_values=None,
-                 use_abs_pos_emb=False,
-                 use_rel_pos_bias=False,
-                 use_shared_rel_pos_bias=False,
-                 use_checkpoint=True,
-                 pretrained=None,
-                 out_features=None,
-                 ):
-
-        super(BEiT, self).__init__()
-
-        norm_layer = norm_layer or partial(nn.LayerNorm, eps=1e-6)
-        self.num_classes = num_classes
-        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
-        self.use_checkpoint = use_checkpoint
-
-        if hybrid_backbone is not None:
-            self.patch_embed = HybridEmbed(
-                hybrid_backbone, img_size=img_size, in_chans=in_chans, embed_dim=embed_dim)
-        else:
-            self.patch_embed = PatchEmbed(
-                img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
-        num_patches = self.patch_embed.num_patches
-        self.out_features = out_features
-        self.out_indices = [int(name[5:]) for name in out_features]
-
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
-        # self.mask_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
-        if use_abs_pos_emb:
-            self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
-        else:
-            self.pos_embed = None
-        self.pos_drop = nn.Dropout(p=drop_rate)
-
-        self.use_shared_rel_pos_bias = use_shared_rel_pos_bias
-        if use_shared_rel_pos_bias:
-            self.rel_pos_bias = RelativePositionBias(window_size=self.patch_embed.patch_shape, num_heads=num_heads)
-        else:
-            self.rel_pos_bias = None
-
-        dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
-        self.use_rel_pos_bias = use_rel_pos_bias
-        self.blocks = nn.ModuleList([
-            Block(
-                dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
-                drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer,
-                init_values=init_values, window_size=self.patch_embed.patch_shape if use_rel_pos_bias else None)
-            for i in range(depth)])
-
-        # trunc_normal_(self.mask_token, std=.02)
-
-        if patch_size == 16:
-            self.fpn1 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-                # nn.SyncBatchNorm(embed_dim),
-                nn.BatchNorm2d(embed_dim),
-                nn.GELU(),
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn2 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn3 = nn.Identity()
-
-            self.fpn4 = nn.MaxPool2d(kernel_size=2, stride=2)
-        elif patch_size == 8:
-            self.fpn1 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn2 = nn.Identity()
-
-            self.fpn3 = nn.Sequential(
-                nn.MaxPool2d(kernel_size=2, stride=2),
-            )
-
-            self.fpn4 = nn.Sequential(
-                nn.MaxPool2d(kernel_size=4, stride=4),
-            )
-
-        if self.pos_embed is not None:
-            trunc_normal_(self.pos_embed, std=.02)
-        trunc_normal_(self.cls_token, std=.02)
-        self.apply(self._init_weights)
-        self.fix_init_weight()
-
-    def fix_init_weight(self):
-        def rescale(param, layer_id):
-            param.div_(math.sqrt(2.0 * layer_id))
-
-        for layer_id, layer in enumerate(self.blocks):
-            rescale(layer.attn.proj.weight.data, layer_id + 1)
-            rescale(layer.mlp.fc2.weight.data, layer_id + 1)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight, std=.02)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-
-    '''
-    def init_weights(self):
-        """Initialize the weights in backbone.
-
-        Args:
-            pretrained (str, optional): Path to pre-trained weights.
-                Defaults to None.
-        """
-        logger = get_root_logger()
-
-        if self.pos_embed is not None:
-            trunc_normal_(self.pos_embed, std=.02)
-        trunc_normal_(self.cls_token, std=.02)
-        self.apply(self._init_weights)
-        self.fix_init_weight()
-
-        if self.init_cfg is None:
-            logger.warn(f'No pre-trained weights for '
-                        f'{self.__class__.__name__}, '
-                        f'training start from scratch')
-        else:
-            assert 'checkpoint' in self.init_cfg, f'Only support ' \
-                                                  f'specify `Pretrained` in ' \
-                                                  f'`init_cfg` in ' \
-                                                  f'{self.__class__.__name__} '
-            logger.info(f"Will load ckpt from {self.init_cfg['checkpoint']}")
-            load_checkpoint(self,
-                            filename=self.init_cfg['checkpoint'],
-                            strict=False,
-                            logger=logger,
-                            beit_spec_expand_rel_pos = self.use_rel_pos_bias,
-                            )
-    '''
-
-    def get_num_layers(self):
-        return len(self.blocks)
-
-    @torch.jit.ignore
-    def no_weight_decay(self):
-        return {'pos_embed', 'cls_token'}
-
-    def forward_features(self, x):
-        B, C, H, W = x.shape
-        x, (Hp, Wp) = self.patch_embed(x, self.pos_embed[:, 1:, :] if self.pos_embed is not None else None)
-        # Hp, Wp are HW for patches
-        batch_size, seq_len, _ = x.size()
-
-        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
-        if self.pos_embed is not None:
-            cls_tokens = cls_tokens + self.pos_embed[:, :1, :]
-        x = torch.cat((cls_tokens, x), dim=1)
-        x = self.pos_drop(x)
-
-        features = []
-        training_window_size = torch.tensor([Hp, Wp])
-
-        rel_pos_bias = self.rel_pos_bias(training_window_size) if self.rel_pos_bias is not None else None
-
-        for i, blk in enumerate(self.blocks):
-            if self.use_checkpoint:
-                x = checkpoint.checkpoint(blk, x, rel_pos_bias, training_window_size)
-            else:
-                x = blk(x, rel_pos_bias=rel_pos_bias, training_window_size=training_window_size)
-            if i in self.out_indices:
-                xp = x[:, 1:, :].permute(0, 2, 1).reshape(B, -1, Hp, Wp)
-                features.append(xp.contiguous())
-
-        ops = [self.fpn1, self.fpn2, self.fpn3, self.fpn4]
-        for i in range(len(features)):
-            features[i] = ops[i](features[i])
-
-        feat_out = {}
-
-        for name, value in zip(self.out_features, features):
-            feat_out[name] = value
-
-        return feat_out
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        return x
-
-
-def beit_base_patch16(pretrained=False, **kwargs):
-    model = BEiT(
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6),
-        init_values=None,
-        **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-def beit_large_patch16(pretrained=False, **kwargs):
-    model = BEiT(
-        patch_size=16,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6),
-        init_values=None,
-        **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-def dit_base_patch16(pretrained=False, **kwargs):
-    model = BEiT(
-        patch_size=16,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        mlp_ratio=4,
-        qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6),
-        init_values=0.1,
-        **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-def dit_large_patch16(pretrained=False, **kwargs):
-    model = BEiT(
-        patch_size=16,
-        embed_dim=1024,
-        depth=24,
-        num_heads=16,
-        mlp_ratio=4,
-        qkv_bias=True,
-        norm_layer=partial(nn.LayerNorm, eps=1e-6),
-        init_values=1e-5,
-        **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-if __name__ == '__main__':
-    model = BEiT(use_checkpoint=True, use_shared_rel_pos_bias=True)
-    model = model.to("cuda:0")
-    input1 = torch.rand(2, 3, 512, 762).to("cuda:0")
-    input2 = torch.rand(2, 3, 800, 1200).to("cuda:0")
-    input3 = torch.rand(2, 3, 720, 1000).to("cuda:0")
-    output1 = model(input1)
-    output2 = model(input2)
-    output3 = model(input3)
-    print("all done")
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py
deleted file mode 100644
index 9a13bb0a8514df29fb4b0ec58c3726ba9c221a8a..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/deit.py
+++ /dev/null
@@ -1,476 +0,0 @@
-"""
-Mostly copy-paste from DINO and timm library:
-https://github.com/facebookresearch/dino
-https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
-"""
-import warnings
-
-import math
-import torch
-import torch.nn as nn
-import torch.utils.checkpoint as checkpoint
-from timm.models.layers import trunc_normal_, drop_path, to_2tuple
-from functools import partial
-
-def _cfg(url='', **kwargs):
-    return {
-        'url': url,
-        'num_classes': 1000, 'input_size': (3, 224, 224), 'pool_size': None,
-        'crop_pct': .9, 'interpolation': 'bicubic',
-        'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5),
-        **kwargs
-    }
-
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
-    """
-
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-
-    def extra_repr(self) -> str:
-        return 'p={}'.format(self.drop_prob)
-
-
-class Mlp(nn.Module):
-    def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = act_layer()
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-class Attention(nn.Module):
-    def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        # NOTE scale factor was wrong in my original version, can set manually to be compat with prev weights
-        self.scale = qk_scale or head_dim ** -0.5
-
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-
-    def forward(self, x):
-        B, N, C = x.shape
-        q, k, v = self.qkv(x).reshape(B, N, 3, self.num_heads,
-                                      C // self.num_heads).permute(2, 0, 3, 1, 4)
-
-        attn = (q @ k.transpose(-2, -1)) * self.scale
-        attn = attn.softmax(dim=-1)
-        attn = self.attn_drop(attn)
-
-        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-
-class Block(nn.Module):
-
-    def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
-                 drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
-        super().__init__()
-        self.norm1 = norm_layer(dim)
-        self.attn = Attention(
-            dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
-        # NOTE: drop path for stochastic depth, we shall see if this is better than dropout here
-        self.drop_path = DropPath(
-            drop_path) if drop_path > 0. else nn.Identity()
-        self.norm2 = norm_layer(dim)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim,
-                       act_layer=act_layer, drop=drop)
-
-    def forward(self, x):
-        x = x + self.drop_path(self.attn(self.norm1(x)))
-        x = x + self.drop_path(self.mlp(self.norm2(x)))
-        return x
-
-
-class PatchEmbed(nn.Module):
-    """ Image to Patch Embedding
-    """
-
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-
-        self.window_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
-
-        self.num_patches_w, self.num_patches_h = self.window_size
-
-        self.num_patches = self.window_size[0] * self.window_size[1]
-        self.img_size = img_size
-        self.patch_size = patch_size
-
-        self.proj = nn.Conv2d(in_chans, embed_dim,
-                              kernel_size=patch_size, stride=patch_size)
-
-    def forward(self, x):
-        x = self.proj(x)
-        return x
-
-
-class HybridEmbed(nn.Module):
-    """ CNN Feature Map Embedding
-    Extract feature map from CNN, flatten, project to embedding dim.
-    """
-
-    def __init__(self, backbone, img_size=224, feature_size=None, in_chans=3, embed_dim=768):
-        super().__init__()
-        assert isinstance(backbone, nn.Module)
-        img_size = to_2tuple(img_size)
-        self.img_size = img_size
-        self.backbone = backbone
-        if feature_size is None:
-            with torch.no_grad():
-                # FIXME this is hacky, but most reliable way of determining the exact dim of the output feature
-                # map for all networks, the feature metadata has reliable channel and stride info, but using
-                # stride to calc feature dim requires info about padding of each stage that isn't captured.
-                training = backbone.training
-                if training:
-                    backbone.eval()
-                o = self.backbone(torch.zeros(
-                    1, in_chans, img_size[0], img_size[1]))[-1]
-                feature_size = o.shape[-2:]
-                feature_dim = o.shape[1]
-                backbone.train(training)
-        else:
-            feature_size = to_2tuple(feature_size)
-            feature_dim = self.backbone.feature_info.channels()[-1]
-        self.num_patches = feature_size[0] * feature_size[1]
-        self.proj = nn.Linear(feature_dim, embed_dim)
-
-    def forward(self, x):
-        x = self.backbone(x)[-1]
-        x = x.flatten(2).transpose(1, 2)
-        x = self.proj(x)
-        return x
-
-
-class ViT(nn.Module):
-    """ Vision Transformer with support for patch or hybrid CNN input stage
-    """
-
-    def __init__(self,
-                 model_name='vit_base_patch16_224',
-                 img_size=384,
-                 patch_size=16,
-                 in_chans=3,
-                 embed_dim=1024,
-                 depth=24,
-                 num_heads=16,
-                 num_classes=19,
-                 mlp_ratio=4.,
-                 qkv_bias=True,
-                 qk_scale=None,
-                 drop_rate=0.1,
-                 attn_drop_rate=0.,
-                 drop_path_rate=0.,
-                 hybrid_backbone=None,
-                 norm_layer=partial(nn.LayerNorm, eps=1e-6),
-                 norm_cfg=None,
-                 pos_embed_interp=False,
-                 random_init=False,
-                 align_corners=False,
-                 use_checkpoint=False,
-                 num_extra_tokens=1,
-                 out_features=None,
-                 **kwargs,
-                 ):
-
-        super(ViT, self).__init__()
-        self.model_name = model_name
-        self.img_size = img_size
-        self.patch_size = patch_size
-        self.in_chans = in_chans
-        self.embed_dim = embed_dim
-        self.depth = depth
-        self.num_heads = num_heads
-        self.num_classes = num_classes
-        self.mlp_ratio = mlp_ratio
-        self.qkv_bias = qkv_bias
-        self.qk_scale = qk_scale
-        self.drop_rate = drop_rate
-        self.attn_drop_rate = attn_drop_rate
-        self.drop_path_rate = drop_path_rate
-        self.hybrid_backbone = hybrid_backbone
-        self.norm_layer = norm_layer
-        self.norm_cfg = norm_cfg
-        self.pos_embed_interp = pos_embed_interp
-        self.random_init = random_init
-        self.align_corners = align_corners
-        self.use_checkpoint = use_checkpoint
-        self.num_extra_tokens = num_extra_tokens
-        self.out_features = out_features
-        self.out_indices = [int(name[5:]) for name in out_features]
-
-        # self.num_stages = self.depth
-        # self.out_indices = tuple(range(self.num_stages))
-
-        if self.hybrid_backbone is not None:
-            self.patch_embed = HybridEmbed(
-                self.hybrid_backbone, img_size=self.img_size, in_chans=self.in_chans, embed_dim=self.embed_dim)
-        else:
-            self.patch_embed = PatchEmbed(
-                img_size=self.img_size, patch_size=self.patch_size, in_chans=self.in_chans, embed_dim=self.embed_dim)
-        self.num_patches = self.patch_embed.num_patches
-
-        self.cls_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim))
-
-        if self.num_extra_tokens == 2:
-            self.dist_token = nn.Parameter(torch.zeros(1, 1, self.embed_dim))
-
-        self.pos_embed = nn.Parameter(torch.zeros(
-            1, self.num_patches + self.num_extra_tokens, self.embed_dim))
-        self.pos_drop = nn.Dropout(p=self.drop_rate)
-
-        # self.num_extra_tokens = self.pos_embed.shape[-2] - self.num_patches
-        dpr = [x.item() for x in torch.linspace(0, self.drop_path_rate,
-                                                self.depth)]  # stochastic depth decay rule
-        self.blocks = nn.ModuleList([
-            Block(
-                dim=self.embed_dim, num_heads=self.num_heads, mlp_ratio=self.mlp_ratio, qkv_bias=self.qkv_bias,
-                qk_scale=self.qk_scale,
-                drop=self.drop_rate, attn_drop=self.attn_drop_rate, drop_path=dpr[i], norm_layer=self.norm_layer)
-            for i in range(self.depth)])
-
-        # NOTE as per official impl, we could have a pre-logits representation dense layer + tanh here
-        # self.repr = nn.Linear(embed_dim, representation_size)
-        # self.repr_act = nn.Tanh()
-
-        if patch_size == 16:
-            self.fpn1 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-                nn.SyncBatchNorm(embed_dim),
-                nn.GELU(),
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn2 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn3 = nn.Identity()
-
-            self.fpn4 = nn.MaxPool2d(kernel_size=2, stride=2)
-        elif patch_size == 8:
-            self.fpn1 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn2 = nn.Identity()
-
-            self.fpn3 = nn.Sequential(
-                nn.MaxPool2d(kernel_size=2, stride=2),
-            )
-
-            self.fpn4 = nn.Sequential(
-                nn.MaxPool2d(kernel_size=4, stride=4),
-            )
-
-        trunc_normal_(self.pos_embed, std=.02)
-        trunc_normal_(self.cls_token, std=.02)
-        if self.num_extra_tokens==2:
-            trunc_normal_(self.dist_token, std=0.2)
-        self.apply(self._init_weights)
-        # self.fix_init_weight()
-
-    def fix_init_weight(self):
-        def rescale(param, layer_id):
-            param.div_(math.sqrt(2.0 * layer_id))
-
-        for layer_id, layer in enumerate(self.blocks):
-            rescale(layer.attn.proj.weight.data, layer_id + 1)
-            rescale(layer.mlp.fc2.weight.data, layer_id + 1)
-
-    def _init_weights(self, m):
-        if isinstance(m, nn.Linear):
-            trunc_normal_(m.weight, std=.02)
-            if isinstance(m, nn.Linear) and m.bias is not None:
-                nn.init.constant_(m.bias, 0)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.constant_(m.bias, 0)
-            nn.init.constant_(m.weight, 1.0)
-
-    '''
-    def init_weights(self):
-        logger = get_root_logger()
-
-        trunc_normal_(self.pos_embed, std=.02)
-        trunc_normal_(self.cls_token, std=.02)
-        self.apply(self._init_weights)
-
-        if self.init_cfg is None:
-            logger.warn(f'No pre-trained weights for '
-                        f'{self.__class__.__name__}, '
-                        f'training start from scratch')
-        else:
-            assert 'checkpoint' in self.init_cfg, f'Only support ' \
-                                                  f'specify `Pretrained` in ' \
-                                                  f'`init_cfg` in ' \
-                                                  f'{self.__class__.__name__} '
-            logger.info(f"Will load ckpt from {self.init_cfg['checkpoint']}")
-            load_checkpoint(self, filename=self.init_cfg['checkpoint'], strict=False, logger=logger)
-    '''
-
-    def get_num_layers(self):
-        return len(self.blocks)
-
-    @torch.jit.ignore
-    def no_weight_decay(self):
-        return {'pos_embed', 'cls_token'}
-
-    def _conv_filter(self, state_dict, patch_size=16):
-        """ convert patch embedding weight from manual patchify + linear proj to conv"""
-        out_dict = {}
-        for k, v in state_dict.items():
-            if 'patch_embed.proj.weight' in k:
-                v = v.reshape((v.shape[0], 3, patch_size, patch_size))
-            out_dict[k] = v
-        return out_dict
-
-    def to_2D(self, x):
-        n, hw, c = x.shape
-        h = w = int(math.sqrt(hw))
-        x = x.transpose(1, 2).reshape(n, c, h, w)
-        return x
-
-    def to_1D(self, x):
-        n, c, h, w = x.shape
-        x = x.reshape(n, c, -1).transpose(1, 2)
-        return x
-
-    def interpolate_pos_encoding(self, x, w, h):
-        npatch = x.shape[1] - self.num_extra_tokens
-        N = self.pos_embed.shape[1] - self.num_extra_tokens
-        if npatch == N and w == h:
-            return self.pos_embed
-
-        class_ORdist_pos_embed = self.pos_embed[:, 0:self.num_extra_tokens]
-
-        patch_pos_embed = self.pos_embed[:, self.num_extra_tokens:]
-
-        dim = x.shape[-1]
-        w0 = w // self.patch_embed.patch_size[0]
-        h0 = h // self.patch_embed.patch_size[1]
-        # we add a small number to avoid floating point error in the interpolation
-        # see discussion at https://github.com/facebookresearch/dino/issues/8
-        w0, h0 = w0 + 0.1, h0 + 0.1
-        patch_pos_embed = nn.functional.interpolate(
-            patch_pos_embed.reshape(1, int(math.sqrt(N)), int(math.sqrt(N)), dim).permute(0, 3, 1, 2),
-            scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
-            mode='bicubic',
-        )
-        assert int(w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1]
-        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
-
-        return torch.cat((class_ORdist_pos_embed, patch_pos_embed), dim=1)
-
-    def prepare_tokens(self, x, mask=None):
-        B, nc, w, h = x.shape
-        # patch linear embedding
-        x = self.patch_embed(x)
-
-        # mask image modeling
-        if mask is not None:
-            x = self.mask_model(x, mask)
-        x = x.flatten(2).transpose(1, 2)
-
-        # add the [CLS] token to the embed patch tokens
-        all_tokens = [self.cls_token.expand(B, -1, -1)]
-
-        if self.num_extra_tokens == 2:
-            dist_tokens = self.dist_token.expand(B, -1, -1)
-            all_tokens.append(dist_tokens)
-        all_tokens.append(x)
-
-        x = torch.cat(all_tokens, dim=1)
-
-        # add positional encoding to each token
-        x = x + self.interpolate_pos_encoding(x, w, h)
-
-        return self.pos_drop(x)
-
-    def forward_features(self, x):
-        # print(f"==========shape of x is {x.shape}==========")
-        B, _, H, W = x.shape
-        Hp, Wp = H // self.patch_size, W // self.patch_size
-        x = self.prepare_tokens(x)
-
-        features = []
-        for i, blk in enumerate(self.blocks):
-            if self.use_checkpoint:
-                x = checkpoint.checkpoint(blk, x)
-            else:
-                x = blk(x)
-            if i in self.out_indices:
-                xp = x[:, self.num_extra_tokens:, :].permute(0, 2, 1).reshape(B, -1, Hp, Wp)
-                features.append(xp.contiguous())
-
-        ops = [self.fpn1, self.fpn2, self.fpn3, self.fpn4]
-        for i in range(len(features)):
-            features[i] = ops[i](features[i])
-
-        feat_out = {}
-
-        for name, value in zip(self.out_features, features):
-            feat_out[name] = value
-
-        return feat_out
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        return x
-
-
-def deit_base_patch16(pretrained=False, **kwargs):
-    model = ViT(
-        patch_size=16,
-        drop_rate=0.,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        num_classes=1000,
-        mlp_ratio=4.,
-        qkv_bias=True,
-        use_checkpoint=True,
-        num_extra_tokens=2,
-        **kwargs)
-    model.default_cfg = _cfg()
-    return model
-
-def mae_base_patch16(pretrained=False, **kwargs):
-    model = ViT(
-        patch_size=16,
-        drop_rate=0.,
-        embed_dim=768,
-        depth=12,
-        num_heads=12,
-        num_classes=1000,
-        mlp_ratio=4.,
-        qkv_bias=True,
-        use_checkpoint=True,
-        num_extra_tokens=1,
-        **kwargs)
-    model.default_cfg = _cfg()
-    return model
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py
deleted file mode 100644
index cd997b55f3118a01f5d49ae2f080525c7d7c9534..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from .models import (
-    LayoutLMv3Config,
-    LayoutLMv3ForTokenClassification,
-    LayoutLMv3ForQuestionAnswering,
-    LayoutLMv3ForSequenceClassification,
-    LayoutLMv3Tokenizer,
-)
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py
deleted file mode 100644
index 5bcec6c7c65b7add5c3440f106b8f1049781167a..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/__init__.py
+++ /dev/null
@@ -1,2 +0,0 @@
-# flake8: noqa
-from .data_collator import DataCollatorForKeyValueExtraction
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py
deleted file mode 100644
index 820dc53a4ea8bc79ddac2d36b57ea2110e8d27d5..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/cord.py
+++ /dev/null
@@ -1,171 +0,0 @@
-'''
-Reference: https://huggingface.co/datasets/pierresi/cord/blob/main/cord.py
-'''
-
-
-import json
-import os
-from pathlib import Path
-import datasets
-from .image_utils import load_image, normalize_bbox
-logger = datasets.logging.get_logger(__name__)
-_CITATION = """\
-@article{park2019cord,
-  title={CORD: A Consolidated Receipt Dataset for Post-OCR Parsing},
-  author={Park, Seunghyun and Shin, Seung and Lee, Bado and Lee, Junyeop and Surh, Jaeheung and Seo, Minjoon and Lee, Hwalsuk}
-  booktitle={Document Intelligence Workshop at Neural Information Processing Systems}
-  year={2019}
-}
-"""
-_DESCRIPTION = """\
-https://github.com/clovaai/cord/
-"""
-
-def quad_to_box(quad):
-    # test 87 is wrongly annotated
-    box = (
-        max(0, quad["x1"]),
-        max(0, quad["y1"]),
-        quad["x3"],
-        quad["y3"]
-    )
-    if box[3] < box[1]:
-        bbox = list(box)
-        tmp = bbox[3]
-        bbox[3] = bbox[1]
-        bbox[1] = tmp
-        box = tuple(bbox)
-    if box[2] < box[0]:
-        bbox = list(box)
-        tmp = bbox[2]
-        bbox[2] = bbox[0]
-        bbox[0] = tmp
-        box = tuple(bbox)
-    return box
-
-def _get_drive_url(url):
-    base_url = 'https://drive.google.com/uc?id='
-    split_url = url.split('/')
-    return base_url + split_url[5]
-
-_URLS = [
-    _get_drive_url("https://drive.google.com/file/d/1MqhTbcj-AHXOqYoeoh12aRUwIprzTJYI/"),
-    _get_drive_url("https://drive.google.com/file/d/1wYdp5nC9LnHQZ2FcmOoC0eClyWvcuARU/")
-    # If you failed to download the dataset through the automatic downloader,
-    # you can download it manually and modify the code to get the local dataset.
-    # Or you can use the following links. Please follow the original LICENSE of CORD for usage.
-    # "https://layoutlm.blob.core.windows.net/cord/CORD-1k-001.zip",
-    # "https://layoutlm.blob.core.windows.net/cord/CORD-1k-002.zip"
-]
-
-class CordConfig(datasets.BuilderConfig):
-    """BuilderConfig for CORD"""
-    def __init__(self, **kwargs):
-        """BuilderConfig for CORD.
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(CordConfig, self).__init__(**kwargs)
-
-class Cord(datasets.GeneratorBasedBuilder):
-    BUILDER_CONFIGS = [
-        CordConfig(name="cord", version=datasets.Version("1.0.0"), description="CORD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "words": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O","B-MENU.NM","B-MENU.NUM","B-MENU.UNITPRICE","B-MENU.CNT","B-MENU.DISCOUNTPRICE","B-MENU.PRICE","B-MENU.ITEMSUBTOTAL","B-MENU.VATYN","B-MENU.ETC","B-MENU.SUB_NM","B-MENU.SUB_UNITPRICE","B-MENU.SUB_CNT","B-MENU.SUB_PRICE","B-MENU.SUB_ETC","B-VOID_MENU.NM","B-VOID_MENU.PRICE","B-SUB_TOTAL.SUBTOTAL_PRICE","B-SUB_TOTAL.DISCOUNT_PRICE","B-SUB_TOTAL.SERVICE_PRICE","B-SUB_TOTAL.OTHERSVC_PRICE","B-SUB_TOTAL.TAX_PRICE","B-SUB_TOTAL.ETC","B-TOTAL.TOTAL_PRICE","B-TOTAL.TOTAL_ETC","B-TOTAL.CASHPRICE","B-TOTAL.CHANGEPRICE","B-TOTAL.CREDITCARDPRICE","B-TOTAL.EMONEYPRICE","B-TOTAL.MENUTYPE_CNT","B-TOTAL.MENUQTY_CNT","I-MENU.NM","I-MENU.NUM","I-MENU.UNITPRICE","I-MENU.CNT","I-MENU.DISCOUNTPRICE","I-MENU.PRICE","I-MENU.ITEMSUBTOTAL","I-MENU.VATYN","I-MENU.ETC","I-MENU.SUB_NM","I-MENU.SUB_UNITPRICE","I-MENU.SUB_CNT","I-MENU.SUB_PRICE","I-MENU.SUB_ETC","I-VOID_MENU.NM","I-VOID_MENU.PRICE","I-SUB_TOTAL.SUBTOTAL_PRICE","I-SUB_TOTAL.DISCOUNT_PRICE","I-SUB_TOTAL.SERVICE_PRICE","I-SUB_TOTAL.OTHERSVC_PRICE","I-SUB_TOTAL.TAX_PRICE","I-SUB_TOTAL.ETC","I-TOTAL.TOTAL_PRICE","I-TOTAL.TOTAL_ETC","I-TOTAL.CASHPRICE","I-TOTAL.CHANGEPRICE","I-TOTAL.CREDITCARDPRICE","I-TOTAL.EMONEYPRICE","I-TOTAL.MENUTYPE_CNT","I-TOTAL.MENUQTY_CNT"]
-                        )
-                    ),
-                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
-                    "image_path": datasets.Value("string"),
-                }
-            ),
-            supervised_keys=None,
-            citation=_CITATION,
-            homepage="https://github.com/clovaai/cord/",
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        """Uses local files located with data_dir"""
-        downloaded_file = dl_manager.download_and_extract(_URLS)
-        # move files from the second URL together with files from the first one.
-        dest = Path(downloaded_file[0])/"CORD"
-        for split in ["train", "dev", "test"]:
-            for file_type in ["image", "json"]:
-                if split == "test" and file_type == "json":
-                    continue
-                files = (Path(downloaded_file[1])/"CORD"/split/file_type).iterdir()
-                for f in files:
-                    os.rename(f, dest/split/file_type/f.name)
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": dest/"train"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.VALIDATION, gen_kwargs={"filepath": dest/"dev"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": dest/"test"}
-            ),
-        ]
-
-    def get_line_bbox(self, bboxs):
-        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
-        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
-
-        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
-
-        assert x1 >= x0 and y1 >= y0
-        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
-        return bbox
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "json")
-        img_dir = os.path.join(filepath, "image")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            words = []
-            bboxes = []
-            ner_tags = []
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["valid_line"]:
-                cur_line_bboxes = []
-                line_words, label = item["words"], item["category"]
-                line_words = [w for w in line_words if w["text"].strip() != ""]
-                if len(line_words) == 0:
-                    continue
-                if label == "other":
-                    for w in line_words:
-                        words.append(w["text"])
-                        ner_tags.append("O")
-                        cur_line_bboxes.append(normalize_bbox(quad_to_box(w["quad"]), size))
-                else:
-                    words.append(line_words[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    cur_line_bboxes.append(normalize_bbox(quad_to_box(line_words[0]["quad"]), size))
-                    for w in line_words[1:]:
-                        words.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        cur_line_bboxes.append(normalize_bbox(quad_to_box(w["quad"]), size))
-                # by default: --segment_level_layout 1
-                # if do not want to use segment_level_layout, comment the following line
-                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
-                bboxes.extend(cur_line_bboxes)
-            # yield guid, {"id": str(guid), "words": words, "bboxes": bboxes, "ner_tags": ner_tags, "image": image}
-            yield guid, {"id": str(guid), "words": words, "bboxes": bboxes, "ner_tags": ner_tags,
-                         "image": image, "image_path": image_path}
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py
deleted file mode 100644
index 4232a6660ba2678ba20be5479629550419a798b4..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/data_collator.py
+++ /dev/null
@@ -1,124 +0,0 @@
-import torch
-from dataclasses import dataclass
-from typing import Any, Dict, List, Optional, Tuple, Union
-
-from transformers import BatchEncoding, PreTrainedTokenizerBase
-from transformers.data.data_collator import (
-    DataCollatorMixin,
-    _torch_collate_batch,
-)
-from transformers.file_utils import PaddingStrategy
-
-from typing import NewType
-InputDataClass = NewType("InputDataClass", Any)
-
-def pre_calc_rel_mat(segment_ids):
-    valid_span = torch.zeros((segment_ids.shape[0], segment_ids.shape[1], segment_ids.shape[1]),
-                             device=segment_ids.device, dtype=torch.bool)
-    for i in range(segment_ids.shape[0]):
-        for j in range(segment_ids.shape[1]):
-            valid_span[i, j, :] = segment_ids[i, :] == segment_ids[i, j]
-
-    return valid_span
-
-@dataclass
-class DataCollatorForKeyValueExtraction(DataCollatorMixin):
-    """
-    Data collator that will dynamically pad the inputs received, as well as the labels.
-    Args:
-        tokenizer (:class:`~transformers.PreTrainedTokenizer` or :class:`~transformers.PreTrainedTokenizerFast`):
-            The tokenizer used for encoding the data.
-        padding (:obj:`bool`, :obj:`str` or :class:`~transformers.file_utils.PaddingStrategy`, `optional`, defaults to :obj:`True`):
-            Select a strategy to pad the returned sequences (according to the model's padding side and padding index)
-            among:
-            * :obj:`True` or :obj:`'longest'`: Pad to the longest sequence in the batch (or no padding if only a single
-              sequence if provided).
-            * :obj:`'max_length'`: Pad to a maximum length specified with the argument :obj:`max_length` or to the
-              maximum acceptable input length for the model if that argument is not provided.
-            * :obj:`False` or :obj:`'do_not_pad'` (default): No padding (i.e., can output a batch with sequences of
-              different lengths).
-        max_length (:obj:`int`, `optional`):
-            Maximum length of the returned list and optionally padding length (see above).
-        pad_to_multiple_of (:obj:`int`, `optional`):
-            If set will pad the sequence to a multiple of the provided value.
-            This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability >=
-            7.5 (Volta).
-        label_pad_token_id (:obj:`int`, `optional`, defaults to -100):
-            The id to use when padding the labels (-100 will be automatically ignore by PyTorch loss functions).
-    """
-
-    tokenizer: PreTrainedTokenizerBase
-    padding: Union[bool, str, PaddingStrategy] = True
-    max_length: Optional[int] = None
-    pad_to_multiple_of: Optional[int] = None
-    label_pad_token_id: int = -100
-
-    def __call__(self, features):
-        label_name = "label" if "label" in features[0].keys() else "labels"
-        labels = [feature[label_name] for feature in features] if label_name in features[0].keys() else None
-
-        images = None
-        if "images" in features[0]:
-            images = torch.stack([torch.tensor(d.pop("images")) for d in features])
-            IMAGE_LEN = int(images.shape[-1] / 16) * int(images.shape[-1] / 16) + 1
-
-        batch = self.tokenizer.pad(
-            features,
-            padding=self.padding,
-            max_length=self.max_length,
-            pad_to_multiple_of=self.pad_to_multiple_of,
-            # Conversion to tensors will fail if we have labels as they are not of the same length yet.
-            return_tensors="pt" if labels is None else None,
-        )
-
-        if images is not None:
-            batch["images"] = images
-            batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) and k == 'attention_mask' else v
-                     for k, v in batch.items()}
-            visual_attention_mask = torch.ones((len(batch['input_ids']), IMAGE_LEN), dtype=torch.long)
-            batch["attention_mask"] = torch.cat([batch['attention_mask'], visual_attention_mask], dim=1)
-
-        if labels is None:
-            return batch
-
-        has_bbox_input = "bbox" in features[0]
-        has_position_input = "position_ids" in features[0]
-        padding_idx=self.tokenizer.pad_token_id
-        sequence_length = torch.tensor(batch["input_ids"]).shape[1]
-        padding_side = self.tokenizer.padding_side
-        if padding_side == "right":
-            batch["labels"] = [label + [self.label_pad_token_id] * (sequence_length - len(label)) for label in labels]
-            if has_bbox_input:
-                batch["bbox"] = [bbox + [[0, 0, 0, 0]] * (sequence_length - len(bbox)) for bbox in batch["bbox"]]
-            if has_position_input:
-                batch["position_ids"] = [position_id + [padding_idx] * (sequence_length - len(position_id))
-                                          for position_id in batch["position_ids"]]
-
-        else:
-            batch["labels"] = [[self.label_pad_token_id] * (sequence_length - len(label)) + label for label in labels]
-            if has_bbox_input:
-                batch["bbox"] = [[[0, 0, 0, 0]] * (sequence_length - len(bbox)) + bbox for bbox in batch["bbox"]]
-            if has_position_input:
-                batch["position_ids"] = [[padding_idx] * (sequence_length - len(position_id))
-                                          + position_id for position_id in batch["position_ids"]]
-
-        if 'segment_ids' in batch:
-            assert 'position_ids' in batch
-            for i in range(len(batch['segment_ids'])):
-                batch['segment_ids'][i] = batch['segment_ids'][i] + [batch['segment_ids'][i][-1] + 1] * (sequence_length - len(batch['segment_ids'][i])) + [
-                    batch['segment_ids'][i][-1] + 2] * IMAGE_LEN
-
-        batch = {k: torch.tensor(v, dtype=torch.int64) if isinstance(v[0], list) else v for k, v in batch.items()}
-
-        if 'segment_ids' in batch:
-            valid_span = pre_calc_rel_mat(
-                segment_ids=batch['segment_ids']
-            )
-            batch['valid_span'] = valid_span
-            del batch['segment_ids']
-
-        if images is not None:
-            visual_labels = torch.ones((len(batch['input_ids']), IMAGE_LEN), dtype=torch.long) * -100
-            batch["labels"] = torch.cat([batch['labels'], visual_labels], dim=1)
-
-        return batch
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py
deleted file mode 100644
index 9f34042023042b10d52906d4ba5ca9c87e65a600..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/funsd.py
+++ /dev/null
@@ -1,136 +0,0 @@
-# coding=utf-8
-'''
-Reference: https://huggingface.co/datasets/nielsr/funsd/blob/main/funsd.py
-'''
-import json
-import os
-
-import datasets
-
-from .image_utils import load_image, normalize_bbox
-
-
-logger = datasets.logging.get_logger(__name__)
-
-
-_CITATION = """\
-@article{Jaume2019FUNSDAD,
-  title={FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents},
-  author={Guillaume Jaume and H. K. Ekenel and J. Thiran},
-  journal={2019 International Conference on Document Analysis and Recognition Workshops (ICDARW)},
-  year={2019},
-  volume={2},
-  pages={1-6}
-}
-"""
-
-_DESCRIPTION = """\
-https://guillaumejaume.github.io/FUNSD/
-"""
-
-
-class FunsdConfig(datasets.BuilderConfig):
-    """BuilderConfig for FUNSD"""
-
-    def __init__(self, **kwargs):
-        """BuilderConfig for FUNSD.
-
-        Args:
-          **kwargs: keyword arguments forwarded to super.
-        """
-        super(FunsdConfig, self).__init__(**kwargs)
-
-
-class Funsd(datasets.GeneratorBasedBuilder):
-    """Conll2003 dataset."""
-
-    BUILDER_CONFIGS = [
-        FunsdConfig(name="funsd", version=datasets.Version("1.0.0"), description="FUNSD dataset"),
-    ]
-
-    def _info(self):
-        return datasets.DatasetInfo(
-            description=_DESCRIPTION,
-            features=datasets.Features(
-                {
-                    "id": datasets.Value("string"),
-                    "tokens": datasets.Sequence(datasets.Value("string")),
-                    "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
-                    "ner_tags": datasets.Sequence(
-                        datasets.features.ClassLabel(
-                            names=["O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER"]
-                        )
-                    ),
-                    "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
-                    "image_path": datasets.Value("string"),
-                }
-            ),
-            supervised_keys=None,
-            homepage="https://guillaumejaume.github.io/FUNSD/",
-            citation=_CITATION,
-        )
-
-    def _split_generators(self, dl_manager):
-        """Returns SplitGenerators."""
-        downloaded_file = dl_manager.download_and_extract("https://guillaumejaume.github.io/FUNSD/dataset.zip")
-        return [
-            datasets.SplitGenerator(
-                name=datasets.Split.TRAIN, gen_kwargs={"filepath": f"{downloaded_file}/dataset/training_data/"}
-            ),
-            datasets.SplitGenerator(
-                name=datasets.Split.TEST, gen_kwargs={"filepath": f"{downloaded_file}/dataset/testing_data/"}
-            ),
-        ]
-
-    def get_line_bbox(self, bboxs):
-        x = [bboxs[i][j] for i in range(len(bboxs)) for j in range(0, len(bboxs[i]), 2)]
-        y = [bboxs[i][j] for i in range(len(bboxs)) for j in range(1, len(bboxs[i]), 2)]
-
-        x0, y0, x1, y1 = min(x), min(y), max(x), max(y)
-
-        assert x1 >= x0 and y1 >= y0
-        bbox = [[x0, y0, x1, y1] for _ in range(len(bboxs))]
-        return bbox
-
-    def _generate_examples(self, filepath):
-        logger.info("⏳ Generating examples from = %s", filepath)
-        ann_dir = os.path.join(filepath, "annotations")
-        img_dir = os.path.join(filepath, "images")
-        for guid, file in enumerate(sorted(os.listdir(ann_dir))):
-            tokens = []
-            bboxes = []
-            ner_tags = []
-
-            file_path = os.path.join(ann_dir, file)
-            with open(file_path, "r", encoding="utf8") as f:
-                data = json.load(f)
-            image_path = os.path.join(img_dir, file)
-            image_path = image_path.replace("json", "png")
-            image, size = load_image(image_path)
-            for item in data["form"]:
-                cur_line_bboxes = []
-                words, label = item["words"], item["label"]
-                words = [w for w in words if w["text"].strip() != ""]
-                if len(words) == 0:
-                    continue
-                if label == "other":
-                    for w in words:
-                        tokens.append(w["text"])
-                        ner_tags.append("O")
-                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
-                else:
-                    tokens.append(words[0]["text"])
-                    ner_tags.append("B-" + label.upper())
-                    cur_line_bboxes.append(normalize_bbox(words[0]["box"], size))
-                    for w in words[1:]:
-                        tokens.append(w["text"])
-                        ner_tags.append("I-" + label.upper())
-                        cur_line_bboxes.append(normalize_bbox(w["box"], size))
-                # by default: --segment_level_layout 1
-                # if do not want to use segment_level_layout, comment the following line
-                cur_line_bboxes = self.get_line_bbox(cur_line_bboxes)
-                # box = normalize_bbox(item["box"], size)
-                # cur_line_bboxes = [box for _ in range(len(words))]
-                bboxes.extend(cur_line_bboxes)
-            yield guid, {"id": str(guid), "tokens": tokens, "bboxes": bboxes, "ner_tags": ner_tags,
-                         "image": image, "image_path": image_path}
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py
deleted file mode 100644
index 90a4b34373980246d6397b95b91e84461f3f2580..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/image_utils.py
+++ /dev/null
@@ -1,284 +0,0 @@
-import torchvision.transforms.functional as F
-import warnings
-import math
-import random
-import numpy as np
-from PIL import Image
-import torch
-
-from detectron2.data.detection_utils import read_image
-from detectron2.data.transforms import ResizeTransform, TransformList
-
-def normalize_bbox(bbox, size):
-    return [
-        int(1000 * bbox[0] / size[0]),
-        int(1000 * bbox[1] / size[1]),
-        int(1000 * bbox[2] / size[0]),
-        int(1000 * bbox[3] / size[1]),
-    ]
-
-
-def load_image(image_path):
-    image = read_image(image_path, format="BGR")
-    h = image.shape[0]
-    w = image.shape[1]
-    img_trans = TransformList([ResizeTransform(h=h, w=w, new_h=224, new_w=224)])
-    image = torch.tensor(img_trans.apply_image(image).copy()).permute(2, 0, 1)  # copy to make it writeable
-    return image, (w, h)
-
-
-def crop(image, i, j, h, w, boxes=None):
-    cropped_image = F.crop(image, i, j, h, w)
-
-    if boxes is not None:
-        # Currently we cannot use this case since when some boxes is out of the cropped image,
-        # it may be better to drop out these boxes along with their text input (instead of min or clamp)
-        # which haven't been implemented here
-        max_size = torch.as_tensor([w, h], dtype=torch.float32)
-        cropped_boxes = torch.as_tensor(boxes) - torch.as_tensor([j, i, j, i])
-        cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
-        cropped_boxes = cropped_boxes.clamp(min=0)
-        boxes = cropped_boxes.reshape(-1, 4)
-
-    return cropped_image, boxes
-
-
-def resize(image, size, interpolation, boxes=None):
-    # It seems that we do not need to resize boxes here, since the boxes will be resized to 1000x1000 finally,
-    # which is compatible with a square image size of 224x224
-    rescaled_image = F.resize(image, size, interpolation)
-
-    if boxes is None:
-        return rescaled_image, None
-
-    ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image.size, image.size))
-    ratio_width, ratio_height = ratios
-
-    # boxes = boxes.copy()
-    scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
-
-    return rescaled_image, scaled_boxes
-
-
-def clamp(num, min_value, max_value):
-    return max(min(num, max_value), min_value)
-
-
-def get_bb(bb, page_size):
-    bbs = [float(j) for j in bb]
-    xs, ys = [], []
-    for i, b in enumerate(bbs):
-        if i % 2 == 0:
-            xs.append(b)
-        else:
-            ys.append(b)
-    (width, height) = page_size
-    return_bb = [
-        clamp(min(xs), 0, width - 1),
-        clamp(min(ys), 0, height - 1),
-        clamp(max(xs), 0, width - 1),
-        clamp(max(ys), 0, height - 1),
-    ]
-    return_bb = [
-            int(1000 * return_bb[0] / width),
-            int(1000 * return_bb[1] / height),
-            int(1000 * return_bb[2] / width),
-            int(1000 * return_bb[3] / height),
-        ]
-    return return_bb
-
-
-class ToNumpy:
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return np_img
-
-
-class ToTensor:
-
-    def __init__(self, dtype=torch.float32):
-        self.dtype = dtype
-
-    def __call__(self, pil_img):
-        np_img = np.array(pil_img, dtype=np.uint8)
-        if np_img.ndim < 3:
-            np_img = np.expand_dims(np_img, axis=-1)
-        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
-        return torch.from_numpy(np_img).to(dtype=self.dtype)
-
-
-_pil_interpolation_to_str = {
-    F.InterpolationMode.NEAREST: 'F.InterpolationMode.NEAREST',
-    F.InterpolationMode.BILINEAR: 'F.InterpolationMode.BILINEAR',
-    F.InterpolationMode.BICUBIC: 'F.InterpolationMode.BICUBIC',
-    F.InterpolationMode.LANCZOS: 'F.InterpolationMode.LANCZOS',
-    F.InterpolationMode.HAMMING: 'F.InterpolationMode.HAMMING',
-    F.InterpolationMode.BOX: 'F.InterpolationMode.BOX',
-}
-
-
-def _pil_interp(method):
-    if method == 'bicubic':
-        return F.InterpolationMode.BICUBIC
-    elif method == 'lanczos':
-        return F.InterpolationMode.LANCZOS
-    elif method == 'hamming':
-        return F.InterpolationMode.HAMMING
-    else:
-        # default bilinear, do we want to allow nearest?
-        return F.InterpolationMode.BILINEAR
-
-
-class Compose:
-    """Composes several transforms together. This transform does not support torchscript.
-    Please, see the note below.
-
-    Args:
-        transforms (list of ``Transform`` objects): list of transforms to compose.
-
-    Example:
-        >>> transforms.Compose([
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.PILToTensor(),
-        >>>     transforms.ConvertImageDtype(torch.float),
-        >>> ])
-
-    .. note::
-        In order to script the transformations, please use ``torch.nn.Sequential`` as below.
-
-        >>> transforms = torch.nn.Sequential(
-        >>>     transforms.CenterCrop(10),
-        >>>     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
-        >>> )
-        >>> scripted_transforms = torch.jit.script(transforms)
-
-        Make sure to use only scriptable transformations, i.e. that work with ``torch.Tensor``, does not require
-        `lambda` functions or ``PIL.Image``.
-
-    """
-
-    def __init__(self, transforms):
-        self.transforms = transforms
-
-    def __call__(self, img, augmentation=False, box=None):
-        for t in self.transforms:
-            img = t(img, augmentation, box)
-        return img
-
-
-class RandomResizedCropAndInterpolationWithTwoPic:
-    """Crop the given PIL Image to random size and aspect ratio with random interpolation.
-    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
-    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
-    is finally resized to given size.
-    This is popularly used to train the Inception networks.
-    Args:
-        size: expected output size of each edge
-        scale: range of size of the origin size cropped
-        ratio: range of aspect ratio of the origin aspect ratio cropped
-        interpolation: Default: PIL.Image.BILINEAR
-    """
-
-    def __init__(self, size, second_size=None, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
-                 interpolation='bilinear', second_interpolation='lanczos'):
-        if isinstance(size, tuple):
-            self.size = size
-        else:
-            self.size = (size, size)
-        if second_size is not None:
-            if isinstance(second_size, tuple):
-                self.second_size = second_size
-            else:
-                self.second_size = (second_size, second_size)
-        else:
-            self.second_size = None
-        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
-            warnings.warn("range should be of kind (min, max)")
-
-        self.interpolation = _pil_interp(interpolation)
-        self.second_interpolation = _pil_interp(second_interpolation)
-        self.scale = scale
-        self.ratio = ratio
-
-    @staticmethod
-    def get_params(img, scale, ratio):
-        """Get parameters for ``crop`` for a random sized crop.
-        Args:
-            img (PIL Image): Image to be cropped.
-            scale (tuple): range of size of the origin size cropped
-            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped
-        Returns:
-            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
-                sized crop.
-        """
-        area = img.size[0] * img.size[1]
-
-        for attempt in range(10):
-            target_area = random.uniform(*scale) * area
-            log_ratio = (math.log(ratio[0]), math.log(ratio[1]))
-            aspect_ratio = math.exp(random.uniform(*log_ratio))
-
-            w = int(round(math.sqrt(target_area * aspect_ratio)))
-            h = int(round(math.sqrt(target_area / aspect_ratio)))
-
-            if w <= img.size[0] and h <= img.size[1]:
-                i = random.randint(0, img.size[1] - h)
-                j = random.randint(0, img.size[0] - w)
-                return i, j, h, w
-
-        # Fallback to central crop
-        in_ratio = img.size[0] / img.size[1]
-        if in_ratio < min(ratio):
-            w = img.size[0]
-            h = int(round(w / min(ratio)))
-        elif in_ratio > max(ratio):
-            h = img.size[1]
-            w = int(round(h * max(ratio)))
-        else:  # whole image
-            w = img.size[0]
-            h = img.size[1]
-        i = (img.size[1] - h) // 2
-        j = (img.size[0] - w) // 2
-        return i, j, h, w
-
-    def __call__(self, img, augmentation=False, box=None):
-        """
-        Args:
-            img (PIL Image): Image to be cropped and resized.
-        Returns:
-            PIL Image: Randomly cropped and resized image.
-        """
-        if augmentation:
-            i, j, h, w = self.get_params(img, self.scale, self.ratio)
-            img = F.crop(img, i, j, h, w)
-            # img, box = crop(img, i, j, h, w, box)
-        img = F.resize(img, self.size, self.interpolation)
-        second_img = F.resize(img, self.second_size, self.second_interpolation) \
-            if self.second_size is not None else None
-        return img, second_img
-
-    def __repr__(self):
-        if isinstance(self.interpolation, (tuple, list)):
-            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
-        else:
-            interpolate_str = _pil_interpolation_to_str[self.interpolation]
-        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
-        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
-        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
-        format_string += ', interpolation={0}'.format(interpolate_str)
-        if self.second_size is not None:
-            format_string += ', second_size={0}'.format(self.second_size)
-            format_string += ', second_interpolation={0}'.format(_pil_interpolation_to_str[self.second_interpolation])
-        format_string += ')'
-        return format_string
-
-
-def pil_loader(path: str) -> Image.Image:
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, 'rb') as f:
-        img = Image.open(f)
-        return img.convert('RGB')
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py
deleted file mode 100644
index 7749ba5dd1d59a4e0c5baf4f2c27cffaae3e4e12..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/data/xfund.py
+++ /dev/null
@@ -1,213 +0,0 @@
-import os
-import json
-
-import torch
-from torch.utils.data.dataset import Dataset
-from torchvision import transforms
-from PIL import Image
-
-from .image_utils import Compose, RandomResizedCropAndInterpolationWithTwoPic
-
-XFund_label2ids = {
-    "O":0,
-    'B-HEADER':1,
-    'I-HEADER':2,
-    'B-QUESTION':3,
-    'I-QUESTION':4,
-    'B-ANSWER':5,
-    'I-ANSWER':6,
-}
-
-class xfund_dataset(Dataset):
-    def box_norm(self, box, width, height):
-        def clip(min_num, num, max_num):
-            return min(max(num, min_num), max_num)
-
-        x0, y0, x1, y1 = box
-        x0 = clip(0, int((x0 / width) * 1000), 1000)
-        y0 = clip(0, int((y0 / height) * 1000), 1000)
-        x1 = clip(0, int((x1 / width) * 1000), 1000)
-        y1 = clip(0, int((y1 / height) * 1000), 1000)
-        assert x1 >= x0
-        assert y1 >= y0
-        return [x0, y0, x1, y1]
-
-    def get_segment_ids(self, bboxs):
-        segment_ids = []
-        for i in range(len(bboxs)):
-            if i == 0:
-                segment_ids.append(0)
-            else:
-                if bboxs[i - 1] == bboxs[i]:
-                    segment_ids.append(segment_ids[-1])
-                else:
-                    segment_ids.append(segment_ids[-1] + 1)
-        return segment_ids
-
-    def get_position_ids(self, segment_ids):
-        position_ids = []
-        for i in range(len(segment_ids)):
-            if i == 0:
-                position_ids.append(2)
-            else:
-                if segment_ids[i] == segment_ids[i - 1]:
-                    position_ids.append(position_ids[-1] + 1)
-                else:
-                    position_ids.append(2)
-        return position_ids
-
-    def load_data(
-            self,
-            data_file,
-    ):
-        # re-org data format
-        total_data = {"id": [], "lines": [], "bboxes": [], "ner_tags": [], "image_path": []}
-        for i in range(len(data_file['documents'])):
-            width, height = data_file['documents'][i]['img']['width'], data_file['documents'][i]['img'][
-                'height']
-
-            cur_doc_lines, cur_doc_bboxes, cur_doc_ner_tags, cur_doc_image_path = [], [], [], []
-            for j in range(len(data_file['documents'][i]['document'])):
-                cur_item = data_file['documents'][i]['document'][j]
-                cur_doc_lines.append(cur_item['text'])
-                cur_doc_bboxes.append(self.box_norm(cur_item['box'], width=width, height=height))
-                cur_doc_ner_tags.append(cur_item['label'])
-            total_data['id'] += [len(total_data['id'])]
-            total_data['lines'] += [cur_doc_lines]
-            total_data['bboxes'] += [cur_doc_bboxes]
-            total_data['ner_tags'] += [cur_doc_ner_tags]
-            total_data['image_path'] += [data_file['documents'][i]['img']['fname']]
-
-        # tokenize text and get bbox/label
-        total_input_ids, total_bboxs, total_label_ids = [], [], []
-        for i in range(len(total_data['lines'])):
-            cur_doc_input_ids, cur_doc_bboxs, cur_doc_labels = [], [], []
-            for j in range(len(total_data['lines'][i])):
-                cur_input_ids = self.tokenizer(total_data['lines'][i][j], truncation=False, add_special_tokens=False, return_attention_mask=False)['input_ids']
-                if len(cur_input_ids) == 0: continue
-
-                cur_label = total_data['ner_tags'][i][j].upper()
-                if cur_label == 'OTHER':
-                    cur_labels = ["O"] * len(cur_input_ids)
-                    for k in range(len(cur_labels)):
-                        cur_labels[k] = self.label2ids[cur_labels[k]]
-                else:
-                    cur_labels = [cur_label] * len(cur_input_ids)
-                    cur_labels[0] = self.label2ids['B-' + cur_labels[0]]
-                    for k in range(1, len(cur_labels)):
-                        cur_labels[k] = self.label2ids['I-' + cur_labels[k]]
-                assert len(cur_input_ids) == len([total_data['bboxes'][i][j]] * len(cur_input_ids)) == len(cur_labels)
-                cur_doc_input_ids += cur_input_ids
-                cur_doc_bboxs += [total_data['bboxes'][i][j]] * len(cur_input_ids)
-                cur_doc_labels += cur_labels
-            assert len(cur_doc_input_ids) == len(cur_doc_bboxs) == len(cur_doc_labels)
-            assert len(cur_doc_input_ids) > 0
-
-            total_input_ids.append(cur_doc_input_ids)
-            total_bboxs.append(cur_doc_bboxs)
-            total_label_ids.append(cur_doc_labels)
-        assert len(total_input_ids) == len(total_bboxs) == len(total_label_ids)
-
-        # split text to several slices because of over-length
-        input_ids, bboxs, labels = [], [], []
-        segment_ids, position_ids = [], []
-        image_path = []
-        for i in range(len(total_input_ids)):
-            start = 0
-            cur_iter = 0
-            while start < len(total_input_ids[i]):
-                end = min(start + 510, len(total_input_ids[i]))
-
-                input_ids.append([self.tokenizer.cls_token_id] + total_input_ids[i][start: end] + [self.tokenizer.sep_token_id])
-                bboxs.append([[0, 0, 0, 0]] + total_bboxs[i][start: end] + [[1000, 1000, 1000, 1000]])
-                labels.append([-100] + total_label_ids[i][start: end] + [-100])
-
-                cur_segment_ids = self.get_segment_ids(bboxs[-1])
-                cur_position_ids = self.get_position_ids(cur_segment_ids)
-                segment_ids.append(cur_segment_ids)
-                position_ids.append(cur_position_ids)
-                image_path.append(os.path.join(self.args.data_dir, "images", total_data['image_path'][i]))
-
-                start = end
-                cur_iter += 1
-
-        assert len(input_ids) == len(bboxs) == len(labels) == len(segment_ids) == len(position_ids)
-        assert len(segment_ids) == len(image_path)
-
-        res = {
-            'input_ids': input_ids,
-            'bbox': bboxs,
-            'labels': labels,
-            'segment_ids': segment_ids,
-            'position_ids': position_ids,
-            'image_path': image_path,
-        }
-        return res
-
-    def __init__(
-            self,
-            args,
-            tokenizer,
-            mode
-    ):
-        self.args = args
-        self.mode = mode
-        self.cur_la = args.language
-        self.tokenizer = tokenizer
-        self.label2ids = XFund_label2ids
-
-
-        self.common_transform = Compose([
-            RandomResizedCropAndInterpolationWithTwoPic(
-                size=args.input_size, interpolation=args.train_interpolation,
-            ),
-        ])
-
-        self.patch_transform = transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize(
-                mean=torch.tensor((0.5, 0.5, 0.5)),
-                std=torch.tensor((0.5, 0.5, 0.5)))
-        ])
-
-        data_file = json.load(
-            open(os.path.join(args.data_dir, "{}.{}.json".format(self.cur_la, 'train' if mode == 'train' else 'val')),
-                 'r'))
-
-        self.feature = self.load_data(data_file)
-
-    def __len__(self):
-        return len(self.feature['input_ids'])
-
-    def __getitem__(self, index):
-        input_ids = self.feature["input_ids"][index]
-
-        # attention_mask = self.feature["attention_mask"][index]
-        attention_mask = [1] * len(input_ids)
-        labels = self.feature["labels"][index]
-        bbox = self.feature["bbox"][index]
-        segment_ids = self.feature['segment_ids'][index]
-        position_ids = self.feature['position_ids'][index]
-
-        img = pil_loader(self.feature['image_path'][index])
-        for_patches, _ = self.common_transform(img, augmentation=False)
-        patch = self.patch_transform(for_patches)
-
-        assert len(input_ids) == len(attention_mask) == len(labels) == len(bbox) == len(segment_ids)
-
-        res = {
-            "input_ids": input_ids,
-            "attention_mask": attention_mask,
-            "labels": labels,
-            "bbox": bbox,
-            "segment_ids": segment_ids,
-            "position_ids": position_ids,
-            "images": patch,
-        }
-        return res
-
-def pil_loader(path: str) -> Image.Image:
-    # open path as file to avoid ResourceWarning (https://github.com/python-pillow/Pillow/issues/835)
-    with open(path, 'rb') as f:
-        img = Image.open(f)
-        return img.convert('RGB')
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py
deleted file mode 100644
index 0b3100effb34547bbaba7503288db34374cad9ca..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/__init__.py
+++ /dev/null
@@ -1,7 +0,0 @@
-from .layoutlmv3 import (
-    LayoutLMv3Config,
-    LayoutLMv3ForTokenClassification,
-    LayoutLMv3ForQuestionAnswering,
-    LayoutLMv3ForSequenceClassification,
-    LayoutLMv3Tokenizer,
-)
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py
deleted file mode 100644
index e06a24b0ca9971cfe99dc9ef60ce8e495ff406bd..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/__init__.py
+++ /dev/null
@@ -1,24 +0,0 @@
-from transformers import AutoConfig, AutoModel, AutoModelForTokenClassification, \
-    AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoTokenizer
-from transformers.convert_slow_tokenizer import SLOW_TO_FAST_CONVERTERS, RobertaConverter
-
-from .configuration_layoutlmv3 import LayoutLMv3Config
-from .modeling_layoutlmv3 import (
-    LayoutLMv3ForTokenClassification,
-    LayoutLMv3ForQuestionAnswering,
-    LayoutLMv3ForSequenceClassification,
-    LayoutLMv3Model,
-)
-from .tokenization_layoutlmv3 import LayoutLMv3Tokenizer
-from .tokenization_layoutlmv3_fast import LayoutLMv3TokenizerFast
-
-
-#AutoConfig.register("layoutlmv3", LayoutLMv3Config)
-#AutoModel.register(LayoutLMv3Config, LayoutLMv3Model)
-#AutoModelForTokenClassification.register(LayoutLMv3Config, LayoutLMv3ForTokenClassification)
-#AutoModelForQuestionAnswering.register(LayoutLMv3Config, LayoutLMv3ForQuestionAnswering)
-#AutoModelForSequenceClassification.register(LayoutLMv3Config, LayoutLMv3ForSequenceClassification)
-#AutoTokenizer.register(
-#    LayoutLMv3Config, slow_tokenizer_class=LayoutLMv3Tokenizer, fast_tokenizer_class=LayoutLMv3TokenizerFast
-#)
-SLOW_TO_FAST_CONVERTERS.update({"LayoutLMv3Tokenizer": RobertaConverter})
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py
deleted file mode 100644
index d2c7b4d71b4d51504dee8bc10e50ea91bac00270..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/configuration_layoutlmv3.py
+++ /dev/null
@@ -1,60 +0,0 @@
-# coding=utf-8
-from transformers.models.bert.configuration_bert import BertConfig
-from transformers.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-LAYOUTLMV3_PRETRAINED_CONFIG_ARCHIVE_MAP = {
-    "layoutlmv3-base": "https://huggingface.co/microsoft/layoutlmv3-base/resolve/main/config.json",
-    "layoutlmv3-large": "https://huggingface.co/microsoft/layoutlmv3-large/resolve/main/config.json",
-    # See all LayoutLMv3 models at https://huggingface.co/models?filter=layoutlmv3
-}
-
-
-class LayoutLMv3Config(BertConfig):
-    model_type = "layoutlmv3"
-
-    def __init__(
-        self,
-        pad_token_id=1,
-        bos_token_id=0,
-        eos_token_id=2,
-        max_2d_position_embeddings=1024,
-        coordinate_size=None,
-        shape_size=None,
-        has_relative_attention_bias=False,
-        rel_pos_bins=32,
-        max_rel_pos=128,
-        has_spatial_attention_bias=False,
-        rel_2d_pos_bins=64,
-        max_rel_2d_pos=256,
-        visual_embed=True,
-        mim=False,
-        wpa_task=False,
-        discrete_vae_weight_path='',
-        discrete_vae_type='dall-e',
-        input_size=224,
-        second_input_size=112,
-        device='cuda',
-        **kwargs
-    ):
-        """Constructs RobertaConfig."""
-        super().__init__(pad_token_id=pad_token_id, bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
-        self.max_2d_position_embeddings = max_2d_position_embeddings
-        self.coordinate_size = coordinate_size
-        self.shape_size = shape_size
-        self.has_relative_attention_bias = has_relative_attention_bias
-        self.rel_pos_bins = rel_pos_bins
-        self.max_rel_pos = max_rel_pos
-        self.has_spatial_attention_bias = has_spatial_attention_bias
-        self.rel_2d_pos_bins = rel_2d_pos_bins
-        self.max_rel_2d_pos = max_rel_2d_pos
-        self.visual_embed = visual_embed
-        self.mim = mim
-        self.wpa_task = wpa_task
-        self.discrete_vae_weight_path = discrete_vae_weight_path
-        self.discrete_vae_type = discrete_vae_type
-        self.input_size = input_size
-        self.second_input_size = second_input_size
-        self.device = device
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py
deleted file mode 100644
index 113eb8eb1d123a4985c1894e0caab561b19f64c2..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/modeling_layoutlmv3.py
+++ /dev/null
@@ -1,1282 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc. team.
-# Copyright (c) 2018, NVIDIA CORPORATION.  All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""PyTorch LayoutLMv3 model. """
-import math
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import torch.utils.checkpoint
-from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
-
-from transformers import apply_chunking_to_forward
-from transformers.modeling_outputs import (
-    BaseModelOutputWithPastAndCrossAttentions,
-    BaseModelOutputWithPoolingAndCrossAttentions,
-    MaskedLMOutput,
-    TokenClassifierOutput,
-    QuestionAnsweringModelOutput,
-    SequenceClassifierOutput,
-)
-from transformers.modeling_utils import PreTrainedModel, find_pruneable_heads_and_indices, prune_linear_layer
-from transformers.models.roberta.modeling_roberta import (
-    RobertaIntermediate,
-    RobertaLMHead,
-    RobertaOutput,
-    RobertaSelfOutput,
-)
-from transformers.utils import logging
-
-from .configuration_layoutlmv3 import LayoutLMv3Config
-from timm.models.layers import to_2tuple
-
-
-logger = logging.get_logger(__name__)
-
-
-class PatchEmbed(nn.Module):
-    """ Image to Patch Embedding
-    """
-    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
-        super().__init__()
-        img_size = to_2tuple(img_size)
-        patch_size = to_2tuple(patch_size)
-        self.patch_shape = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
-        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-        # The following variables are used in detection mycheckpointer.py
-        self.num_patches = (img_size[1] // patch_size[1]) * (img_size[0] // patch_size[0])
-        self.num_patches_w = self.patch_shape[0]
-        self.num_patches_h = self.patch_shape[1]
-
-    def forward(self, x, position_embedding=None):
-        x = self.proj(x)
-
-        if position_embedding is not None:
-            # interpolate the position embedding to the corresponding size
-            position_embedding = position_embedding.view(1, self.patch_shape[0], self.patch_shape[1], -1).permute(0, 3, 1, 2)
-            Hp, Wp = x.shape[2], x.shape[3]
-            position_embedding = F.interpolate(position_embedding, size=(Hp, Wp), mode='bicubic')
-            x = x + position_embedding
-
-        x = x.flatten(2).transpose(1, 2)
-        return x
-
-class LayoutLMv3Embeddings(nn.Module):
-    """
-    Same as BertEmbeddings with a tiny tweak for positional embeddings indexing.
-    """
-
-    # Copied from transformers.models.bert.modeling_bert.BertEmbeddings.__init__
-    def __init__(self, config):
-        super().__init__()
-        self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id)
-        self.token_type_embeddings = nn.Embedding(config.type_vocab_size, config.hidden_size)
-
-        self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-        # position_ids (1, len position emb) is contiguous in memory and exported when serialized
-        self.register_buffer("position_ids", torch.arange(config.max_position_embeddings).expand((1, -1)))
-
-        # End copy
-        self.padding_idx = config.pad_token_id
-        self.position_embeddings = nn.Embedding(
-            config.max_position_embeddings, config.hidden_size, padding_idx=self.padding_idx
-        )
-
-        self.x_position_embeddings = nn.Embedding(config.max_2d_position_embeddings, config.coordinate_size)
-        self.y_position_embeddings = nn.Embedding(config.max_2d_position_embeddings, config.coordinate_size)
-        self.h_position_embeddings = nn.Embedding(config.max_2d_position_embeddings, config.shape_size)
-        self.w_position_embeddings = nn.Embedding(config.max_2d_position_embeddings, config.shape_size)
-
-    def _calc_spatial_position_embeddings(self, bbox):
-        try:
-            assert torch.all(0 <= bbox) and torch.all(bbox <= 1023)
-            left_position_embeddings = self.x_position_embeddings(bbox[:, :, 0])
-            upper_position_embeddings = self.y_position_embeddings(bbox[:, :, 1])
-            right_position_embeddings = self.x_position_embeddings(bbox[:, :, 2])
-            lower_position_embeddings = self.y_position_embeddings(bbox[:, :, 3])
-        except IndexError as e:
-            raise IndexError("The :obj:`bbox` coordinate values should be within 0-1000 range.") from e
-
-        h_position_embeddings = self.h_position_embeddings(torch.clip(bbox[:, :, 3] - bbox[:, :, 1], 0, 1023))
-        w_position_embeddings = self.w_position_embeddings(torch.clip(bbox[:, :, 2] - bbox[:, :, 0], 0, 1023))
-
-        # below is the difference between LayoutLMEmbeddingsV2 (torch.cat) and LayoutLMEmbeddingsV1 (add)
-        spatial_position_embeddings = torch.cat(
-            [
-                left_position_embeddings,
-                upper_position_embeddings,
-                right_position_embeddings,
-                lower_position_embeddings,
-                h_position_embeddings,
-                w_position_embeddings,
-            ],
-            dim=-1,
-        )
-        return spatial_position_embeddings
-
-    def create_position_ids_from_input_ids(self, input_ids, padding_idx, past_key_values_length=0):
-        """
-        Replace non-padding symbols with their position numbers. Position numbers begin at padding_idx+1. Padding symbols
-        are ignored. This is modified from fairseq's `utils.make_positions`.
-
-        Args:
-            x: torch.Tensor x:
-
-        Returns: torch.Tensor
-        """
-        # The series of casts and type-conversions here are carefully balanced to both work with ONNX export and XLA.
-        mask = input_ids.ne(padding_idx).int()
-        incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
-        return incremental_indices.long() + padding_idx
-
-    def forward(
-        self,
-        input_ids=None,
-        bbox=None,
-        token_type_ids=None,
-        position_ids=None,
-        inputs_embeds=None,
-        past_key_values_length=0,
-    ):
-        if position_ids is None:
-            if input_ids is not None:
-                # Create the position ids from the input token ids. Any padded tokens remain padded.
-                position_ids = self.create_position_ids_from_input_ids(
-                    input_ids, self.padding_idx, past_key_values_length).to(input_ids.device)
-            else:
-                position_ids = self.create_position_ids_from_inputs_embeds(inputs_embeds)
-
-        if input_ids is not None:
-            input_shape = input_ids.size()
-        else:
-            input_shape = inputs_embeds.size()[:-1]
-
-        if token_type_ids is None:
-            token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=self.position_ids.device)
-
-        if inputs_embeds is None:
-            inputs_embeds = self.word_embeddings(input_ids)
-        token_type_embeddings = self.token_type_embeddings(token_type_ids)
-
-        embeddings = inputs_embeds + token_type_embeddings
-        position_embeddings = self.position_embeddings(position_ids)
-        embeddings += position_embeddings
-
-        spatial_position_embeddings = self._calc_spatial_position_embeddings(bbox)
-
-        embeddings = embeddings + spatial_position_embeddings
-
-        embeddings = self.LayerNorm(embeddings)
-        embeddings = self.dropout(embeddings)
-        return embeddings
-
-    def create_position_ids_from_inputs_embeds(self, inputs_embeds):
-        """
-        We are provided embeddings directly. We cannot infer which are padded so just generate sequential position ids.
-
-        Args:
-            inputs_embeds: torch.Tensor≈
-
-        Returns: torch.Tensor
-        """
-        input_shape = inputs_embeds.size()[:-1]
-        sequence_length = input_shape[1]
-
-        position_ids = torch.arange(
-            self.padding_idx + 1, sequence_length + self.padding_idx + 1, dtype=torch.long, device=inputs_embeds.device
-        )
-        return position_ids.unsqueeze(0).expand(input_shape)
-
-
-class LayoutLMv3PreTrainedModel(PreTrainedModel):
-    """
-    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
-    models.
-    """
-
-    config_class = LayoutLMv3Config
-    base_model_prefix = "layoutlmv3"
-
-    # Copied from transformers.models.bert.modeling_bert.BertPreTrainedModel._init_weights
-    def _init_weights(self, module):
-        """Initialize the weights"""
-        if isinstance(module, nn.Linear):
-            # Slightly different from the TF version which uses truncated_normal for initialization
-            # cf https://github.com/pytorch/pytorch/pull/5617
-            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
-            if module.bias is not None:
-                module.bias.data.zero_()
-        elif isinstance(module, nn.Embedding):
-            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
-            if module.padding_idx is not None:
-                module.weight.data[module.padding_idx].zero_()
-        elif isinstance(module, nn.LayerNorm):
-            module.bias.data.zero_()
-            module.weight.data.fill_(1.0)
-
-
-class LayoutLMv3SelfAttention(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        if config.hidden_size % config.num_attention_heads != 0 and not hasattr(config, "embedding_size"):
-            raise ValueError(
-                f"The hidden size ({config.hidden_size}) is not a multiple of the number of attention "
-                f"heads ({config.num_attention_heads})"
-            )
-
-        self.num_attention_heads = config.num_attention_heads
-        self.attention_head_size = int(config.hidden_size / config.num_attention_heads)
-        self.all_head_size = self.num_attention_heads * self.attention_head_size
-
-        self.query = nn.Linear(config.hidden_size, self.all_head_size)
-        self.key = nn.Linear(config.hidden_size, self.all_head_size)
-        self.value = nn.Linear(config.hidden_size, self.all_head_size)
-
-        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
-        self.has_relative_attention_bias = config.has_relative_attention_bias
-        self.has_spatial_attention_bias = config.has_spatial_attention_bias
-
-    def transpose_for_scores(self, x):
-        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
-        x = x.view(*new_x_shape)
-        return x.permute(0, 2, 1, 3)
-
-    def cogview_attn(self, attention_scores, alpha=32):
-        '''
-        https://arxiv.org/pdf/2105.13290.pdf
-        Section 2.4 Stabilization of training: Precision Bottleneck Relaxation (PB-Relax).
-        A replacement of the original nn.Softmax(dim=-1)(attention_scores)
-        Seems the new attention_probs will result in a slower speed and a little bias
-        Can use torch.allclose(standard_attention_probs, cogview_attention_probs, atol=1e-08) for comparison
-        The smaller atol (e.g., 1e-08), the better.
-        '''
-        scaled_attention_scores = attention_scores / alpha
-        max_value = scaled_attention_scores.amax(dim=(-1)).unsqueeze(-1)
-        # max_value = scaled_attention_scores.amax(dim=(-2, -1)).unsqueeze(-1).unsqueeze(-1)
-        new_attention_scores = (scaled_attention_scores - max_value) * alpha
-        return nn.Softmax(dim=-1)(new_attention_scores)
-
-    def forward(
-        self,
-        hidden_states,
-        attention_mask=None,
-        head_mask=None,
-        encoder_hidden_states=None,
-        encoder_attention_mask=None,
-        past_key_value=None,
-        output_attentions=False,
-        rel_pos=None,
-        rel_2d_pos=None,
-    ):
-        mixed_query_layer = self.query(hidden_states)
-
-        # If this is instantiated as a cross-attention module, the keys
-        # and values come from an encoder; the attention mask needs to be
-        # such that the encoder's padding tokens are not attended to.
-        is_cross_attention = encoder_hidden_states is not None
-
-        if is_cross_attention and past_key_value is not None:
-            # reuse k,v, cross_attentions
-            key_layer = past_key_value[0]
-            value_layer = past_key_value[1]
-            attention_mask = encoder_attention_mask
-        elif is_cross_attention:
-            key_layer = self.transpose_for_scores(self.key(encoder_hidden_states))
-            value_layer = self.transpose_for_scores(self.value(encoder_hidden_states))
-            attention_mask = encoder_attention_mask
-        elif past_key_value is not None:
-            key_layer = self.transpose_for_scores(self.key(hidden_states))
-            value_layer = self.transpose_for_scores(self.value(hidden_states))
-            key_layer = torch.cat([past_key_value[0], key_layer], dim=2)
-            value_layer = torch.cat([past_key_value[1], value_layer], dim=2)
-        else:
-            key_layer = self.transpose_for_scores(self.key(hidden_states))
-            value_layer = self.transpose_for_scores(self.value(hidden_states))
-
-        query_layer = self.transpose_for_scores(mixed_query_layer)
-
-        # Take the dot product between "query" and "key" to get the raw attention scores.
-        # The attention scores QT K/√d could be significantly larger than input elements, and result in overflow.
-        # Changing the computational order into QT(K/√d) alleviates the problem. (https://arxiv.org/pdf/2105.13290.pdf)
-        attention_scores = torch.matmul(query_layer / math.sqrt(self.attention_head_size), key_layer.transpose(-1, -2))
-
-        if self.has_relative_attention_bias and self.has_spatial_attention_bias:
-            attention_scores += (rel_pos + rel_2d_pos) / math.sqrt(self.attention_head_size)
-        elif self.has_relative_attention_bias:
-            attention_scores += rel_pos / math.sqrt(self.attention_head_size)
-
-        # if self.has_relative_attention_bias:
-        #     attention_scores += rel_pos
-        # if self.has_spatial_attention_bias:
-        #     attention_scores += rel_2d_pos
-
-        # attention_scores = attention_scores / math.sqrt(self.attention_head_size)
-        if attention_mask is not None:
-            # Apply the attention mask is (precomputed for all layers in RobertaModel forward() function)
-            attention_scores = attention_scores + attention_mask
-
-        # Normalize the attention scores to probabilities.
-        # attention_probs = nn.Softmax(dim=-1)(attention_scores)  # comment the line below and use this line for speedup
-        attention_probs = self.cogview_attn(attention_scores)  # to stablize training
-        # assert torch.allclose(attention_probs, nn.Softmax(dim=-1)(attention_scores), atol=1e-8)
-
-        # This is actually dropping out entire tokens to attend to, which might
-        # seem a bit unusual, but is taken from the original Transformer paper.
-        attention_probs = self.dropout(attention_probs)
-
-        # Mask heads if we want to
-        if head_mask is not None:
-            attention_probs = attention_probs * head_mask
-
-        context_layer = torch.matmul(attention_probs, value_layer)
-
-        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
-        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
-        context_layer = context_layer.view(*new_context_layer_shape)
-
-        outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
-
-        return outputs
-
-
-class LayoutLMv3Attention(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.self = LayoutLMv3SelfAttention(config)
-        self.output = RobertaSelfOutput(config)
-        self.pruned_heads = set()
-
-    def prune_heads(self, heads):
-        if len(heads) == 0:
-            return
-        heads, index = find_pruneable_heads_and_indices(
-            heads, self.self.num_attention_heads, self.self.attention_head_size, self.pruned_heads
-        )
-
-        # Prune linear layers
-        self.self.query = prune_linear_layer(self.self.query, index)
-        self.self.key = prune_linear_layer(self.self.key, index)
-        self.self.value = prune_linear_layer(self.self.value, index)
-        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
-
-        # Update hyper params and store pruned heads
-        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
-        self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads
-        self.pruned_heads = self.pruned_heads.union(heads)
-
-    def forward(
-        self,
-        hidden_states,
-        attention_mask=None,
-        head_mask=None,
-        encoder_hidden_states=None,
-        encoder_attention_mask=None,
-        past_key_value=None,
-        output_attentions=False,
-        rel_pos=None,
-        rel_2d_pos=None,
-    ):
-        self_outputs = self.self(
-            hidden_states,
-            attention_mask,
-            head_mask,
-            encoder_hidden_states,
-            encoder_attention_mask,
-            past_key_value,
-            output_attentions,
-            rel_pos=rel_pos,
-            rel_2d_pos=rel_2d_pos,
-        )
-        attention_output = self.output(self_outputs[0], hidden_states)
-        outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
-        return outputs
-
-
-class LayoutLMv3Layer(nn.Module):
-    def __init__(self, config):
-        super().__init__()
-        self.chunk_size_feed_forward = config.chunk_size_feed_forward
-        self.seq_len_dim = 1
-        self.attention = LayoutLMv3Attention(config)
-        assert not config.is_decoder and not config.add_cross_attention, \
-            "This version do not support decoder. Please refer to RoBERTa for implementation of is_decoder."
-        self.intermediate = RobertaIntermediate(config)
-        self.output = RobertaOutput(config)
-
-    def forward(
-        self,
-        hidden_states,
-        attention_mask=None,
-        head_mask=None,
-        encoder_hidden_states=None,
-        encoder_attention_mask=None,
-        past_key_value=None,
-        output_attentions=False,
-        rel_pos=None,
-        rel_2d_pos=None,
-    ):
-        # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
-        self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
-        self_attention_outputs = self.attention(
-            hidden_states,
-            attention_mask,
-            head_mask,
-            output_attentions=output_attentions,
-            past_key_value=self_attn_past_key_value,
-            rel_pos=rel_pos,
-            rel_2d_pos=rel_2d_pos,
-        )
-        attention_output = self_attention_outputs[0]
-
-        outputs = self_attention_outputs[1:]  # add self attentions if we output attention weights
-
-        layer_output = apply_chunking_to_forward(
-            self.feed_forward_chunk, self.chunk_size_feed_forward, self.seq_len_dim, attention_output
-        )
-        outputs = (layer_output,) + outputs
-
-        return outputs
-
-    def feed_forward_chunk(self, attention_output):
-        intermediate_output = self.intermediate(attention_output)
-        layer_output = self.output(intermediate_output, attention_output)
-        return layer_output
-
-
-class LayoutLMv3Encoder(nn.Module):
-    def __init__(self, config, detection=False, out_features=None):
-        super().__init__()
-        self.config = config
-        self.detection = detection
-        self.layer = nn.ModuleList([LayoutLMv3Layer(config) for _ in range(config.num_hidden_layers)])
-        self.gradient_checkpointing = False
-
-        self.has_relative_attention_bias = config.has_relative_attention_bias
-        self.has_spatial_attention_bias = config.has_spatial_attention_bias
-
-        if self.has_relative_attention_bias:
-            self.rel_pos_bins = config.rel_pos_bins
-            self.max_rel_pos = config.max_rel_pos
-            self.rel_pos_onehot_size = config.rel_pos_bins
-            self.rel_pos_bias = nn.Linear(self.rel_pos_onehot_size, config.num_attention_heads, bias=False)
-
-        if self.has_spatial_attention_bias:
-            self.max_rel_2d_pos = config.max_rel_2d_pos
-            self.rel_2d_pos_bins = config.rel_2d_pos_bins
-            self.rel_2d_pos_onehot_size = config.rel_2d_pos_bins
-            self.rel_pos_x_bias = nn.Linear(self.rel_2d_pos_onehot_size, config.num_attention_heads, bias=False)
-            self.rel_pos_y_bias = nn.Linear(self.rel_2d_pos_onehot_size, config.num_attention_heads, bias=False)
-
-        if self.detection:
-            self.gradient_checkpointing = True
-            embed_dim = self.config.hidden_size
-            self.out_features = out_features
-            self.out_indices = [int(name[5:]) for name in out_features]
-            self.fpn1 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-                # nn.SyncBatchNorm(embed_dim),
-                nn.BatchNorm2d(embed_dim),
-                nn.GELU(),
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn2 = nn.Sequential(
-                nn.ConvTranspose2d(embed_dim, embed_dim, kernel_size=2, stride=2),
-            )
-
-            self.fpn3 = nn.Identity()
-
-            self.fpn4 = nn.MaxPool2d(kernel_size=2, stride=2)
-            self.ops = [self.fpn1, self.fpn2, self.fpn3, self.fpn4]
-
-    def relative_position_bucket(self, relative_position, bidirectional=True, num_buckets=32, max_distance=128):
-        ret = 0
-        if bidirectional:
-            num_buckets //= 2
-            ret += (relative_position > 0).long() * num_buckets
-            n = torch.abs(relative_position)
-        else:
-            n = torch.max(-relative_position, torch.zeros_like(relative_position))
-        # now n is in the range [0, inf)
-
-        # half of the buckets are for exact increments in positions
-        max_exact = num_buckets // 2
-        is_small = n < max_exact
-
-        # The other half of the buckets are for logarithmically bigger bins in positions up to max_distance
-        val_if_large = max_exact + (
-                torch.log(n.float() / max_exact) / math.log(max_distance / max_exact) * (num_buckets - max_exact)
-        ).to(torch.long)
-        val_if_large = torch.min(val_if_large, torch.full_like(val_if_large, num_buckets - 1))
-
-        ret += torch.where(is_small, n, val_if_large)
-        return ret
-
-    def _cal_1d_pos_emb(self, hidden_states, position_ids, valid_span):
-        VISUAL_NUM = 196 + 1
-
-        rel_pos_mat = position_ids.unsqueeze(-2) - position_ids.unsqueeze(-1)
-
-        if valid_span is not None:
-            # for the text part, if two words are not in the same line,
-            # set their distance to the max value (position_ids.shape[-1])
-            rel_pos_mat[(rel_pos_mat > 0) & (valid_span == False)] = position_ids.shape[1]
-            rel_pos_mat[(rel_pos_mat < 0) & (valid_span == False)] = -position_ids.shape[1]
-
-            # image-text, minimum distance
-            rel_pos_mat[:, -VISUAL_NUM:, :-VISUAL_NUM] = 0
-            rel_pos_mat[:, :-VISUAL_NUM, -VISUAL_NUM:] = 0
-
-        rel_pos = self.relative_position_bucket(
-            rel_pos_mat,
-            num_buckets=self.rel_pos_bins,
-            max_distance=self.max_rel_pos,
-        )
-        rel_pos = F.one_hot(rel_pos, num_classes=self.rel_pos_onehot_size).type_as(hidden_states)
-        rel_pos = self.rel_pos_bias(rel_pos).permute(0, 3, 1, 2)
-        rel_pos = rel_pos.contiguous()
-        return rel_pos
-
-    def _cal_2d_pos_emb(self, hidden_states, bbox):
-        position_coord_x = bbox[:, :, 0]
-        position_coord_y = bbox[:, :, 3]
-        rel_pos_x_2d_mat = position_coord_x.unsqueeze(-2) - position_coord_x.unsqueeze(-1)
-        rel_pos_y_2d_mat = position_coord_y.unsqueeze(-2) - position_coord_y.unsqueeze(-1)
-        rel_pos_x = self.relative_position_bucket(
-            rel_pos_x_2d_mat,
-            num_buckets=self.rel_2d_pos_bins,
-            max_distance=self.max_rel_2d_pos,
-        )
-        rel_pos_y = self.relative_position_bucket(
-            rel_pos_y_2d_mat,
-            num_buckets=self.rel_2d_pos_bins,
-            max_distance=self.max_rel_2d_pos,
-        )
-        rel_pos_x = F.one_hot(rel_pos_x, num_classes=self.rel_2d_pos_onehot_size).type_as(hidden_states)
-        rel_pos_y = F.one_hot(rel_pos_y, num_classes=self.rel_2d_pos_onehot_size).type_as(hidden_states)
-        rel_pos_x = self.rel_pos_x_bias(rel_pos_x).permute(0, 3, 1, 2)
-        rel_pos_y = self.rel_pos_y_bias(rel_pos_y).permute(0, 3, 1, 2)
-        rel_pos_x = rel_pos_x.contiguous()
-        rel_pos_y = rel_pos_y.contiguous()
-        rel_2d_pos = rel_pos_x + rel_pos_y
-        return rel_2d_pos
-
-    def forward(
-        self,
-        hidden_states,
-        bbox=None,
-        attention_mask=None,
-        head_mask=None,
-        encoder_hidden_states=None,
-        encoder_attention_mask=None,
-        past_key_values=None,
-        use_cache=None,
-        output_attentions=False,
-        output_hidden_states=False,
-        return_dict=True,
-        position_ids=None,
-        Hp=None,
-        Wp=None,
-        valid_span=None,
-    ):
-        all_hidden_states = () if output_hidden_states else None
-        all_self_attentions = () if output_attentions else None
-        all_cross_attentions = () if output_attentions and self.config.add_cross_attention else None
-
-        next_decoder_cache = () if use_cache else None
-
-        rel_pos = self._cal_1d_pos_emb(hidden_states, position_ids, valid_span) if self.has_relative_attention_bias else None
-        rel_2d_pos = self._cal_2d_pos_emb(hidden_states, bbox) if self.has_spatial_attention_bias else None
-
-        if self.detection:
-            feat_out = {}
-            j = 0
-
-        for i, layer_module in enumerate(self.layer):
-            if output_hidden_states:
-                all_hidden_states = all_hidden_states + (hidden_states,)
-
-            layer_head_mask = head_mask[i] if head_mask is not None else None
-            past_key_value = past_key_values[i] if past_key_values is not None else None
-
-            if self.gradient_checkpointing and self.training:
-
-                if use_cache:
-                    logger.warning(
-                        "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
-                    )
-                    use_cache = False
-
-                def create_custom_forward(module):
-                    def custom_forward(*inputs):
-                        return module(*inputs)
-                        # return module(*inputs, past_key_value, output_attentions, rel_pos, rel_2d_pos)
-                        # The above line will cause error:
-                        # RuntimeError: Trying to backward through the graph a second time
-                        # (or directly access saved tensors after they have already been freed).
-                    return custom_forward
-
-                layer_outputs = torch.utils.checkpoint.checkpoint(
-                    create_custom_forward(layer_module),
-                    hidden_states,
-                    attention_mask,
-                    layer_head_mask,
-                    encoder_hidden_states,
-                    encoder_attention_mask,
-                    past_key_value,
-                    output_attentions,
-                    rel_pos,
-                    rel_2d_pos
-                )
-            else:
-                layer_outputs = layer_module(
-                    hidden_states,
-                    attention_mask,
-                    layer_head_mask,
-                    encoder_hidden_states,
-                    encoder_attention_mask,
-                    past_key_value,
-                    output_attentions,
-                    rel_pos=rel_pos,
-                    rel_2d_pos=rel_2d_pos,
-                )
-
-            hidden_states = layer_outputs[0]
-            if use_cache:
-                next_decoder_cache += (layer_outputs[-1],)
-            if output_attentions:
-                all_self_attentions = all_self_attentions + (layer_outputs[1],)
-                if self.config.add_cross_attention:
-                    all_cross_attentions = all_cross_attentions + (layer_outputs[2],)
-
-            if self.detection and i in self.out_indices:
-                xp = hidden_states[:, -Hp*Wp:, :].permute(0, 2, 1).reshape(len(hidden_states), -1, Hp, Wp)
-                feat_out[self.out_features[j]] = self.ops[j](xp.contiguous())
-                j += 1
-
-        if self.detection:
-            return feat_out
-
-        if output_hidden_states:
-            all_hidden_states = all_hidden_states + (hidden_states,)
-
-        if not return_dict:
-            return tuple(
-                v
-                for v in [
-                    hidden_states,
-                    next_decoder_cache,
-                    all_hidden_states,
-                    all_self_attentions,
-                    all_cross_attentions,
-                ]
-                if v is not None
-            )
-        return BaseModelOutputWithPastAndCrossAttentions(
-            last_hidden_state=hidden_states,
-            past_key_values=next_decoder_cache,
-            hidden_states=all_hidden_states,
-            attentions=all_self_attentions,
-            cross_attentions=all_cross_attentions,
-        )
-
-
-class LayoutLMv3Model(LayoutLMv3PreTrainedModel):
-    """
-    """
-
-    _keys_to_ignore_on_load_missing = [r"position_ids"]
-
-    # Copied from transformers.models.bert.modeling_bert.BertModel.__init__ with Bert->Roberta
-    def __init__(self, config, detection=False, out_features=None, image_only=False):
-        super().__init__(config)
-        self.config = config
-        assert not config.is_decoder and not config.add_cross_attention, \
-            "This version do not support decoder. Please refer to RoBERTa for implementation of is_decoder."
-        self.detection = detection
-        if not self.detection:
-            self.image_only = False
-        else:
-            assert config.visual_embed
-            self.image_only = image_only
-
-        if not self.image_only:
-            self.embeddings = LayoutLMv3Embeddings(config)
-        self.encoder = LayoutLMv3Encoder(config, detection=detection, out_features=out_features)
-
-        if config.visual_embed:
-            embed_dim = self.config.hidden_size
-            # use the default pre-training parameters for fine-tuning (e.g., input_size)
-            # when the input_size is larger in fine-tuning, we will interpolate the position embedding in forward
-            self.patch_embed = PatchEmbed(embed_dim=embed_dim)
-
-            patch_size = 16
-            size = int(self.config.input_size / patch_size)
-            self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
-            self.pos_embed = nn.Parameter(torch.zeros(1, size * size + 1, embed_dim))
-            self.pos_drop = nn.Dropout(p=0.)
-
-            self.LayerNorm = nn.LayerNorm(config.hidden_size, eps=config.layer_norm_eps)
-            self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-            if self.config.has_relative_attention_bias or self.config.has_spatial_attention_bias:
-                self._init_visual_bbox(img_size=(size, size))
-
-            from functools import partial
-            norm_layer = partial(nn.LayerNorm, eps=1e-6)
-            self.norm = norm_layer(embed_dim)
-
-        self.init_weights()
-
-    def get_input_embeddings(self):
-        return self.embeddings.word_embeddings
-
-    def set_input_embeddings(self, value):
-        self.embeddings.word_embeddings = value
-
-    def _prune_heads(self, heads_to_prune):
-        """
-        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
-        class PreTrainedModel
-        """
-        for layer, heads in heads_to_prune.items():
-            self.encoder.layer[layer].attention.prune_heads(heads)
-
-    def _init_visual_bbox(self, img_size=(14, 14), max_len=1000):
-        visual_bbox_x = torch.div(torch.arange(0, max_len * (img_size[1] + 1), max_len),
-                                  img_size[1], rounding_mode='trunc')
-        visual_bbox_y = torch.div(torch.arange(0, max_len * (img_size[0] + 1), max_len),
-                                  img_size[0], rounding_mode='trunc')
-        visual_bbox = torch.stack(
-            [
-                visual_bbox_x[:-1].repeat(img_size[0], 1),
-                visual_bbox_y[:-1].repeat(img_size[1], 1).transpose(0, 1),
-                visual_bbox_x[1:].repeat(img_size[0], 1),
-                visual_bbox_y[1:].repeat(img_size[1], 1).transpose(0, 1),
-            ],
-            dim=-1,
-        ).view(-1, 4)
-
-        cls_token_box = torch.tensor([[0 + 1, 0 + 1, max_len - 1, max_len - 1]])
-        self.visual_bbox = torch.cat([cls_token_box, visual_bbox], dim=0)
-
-    def _calc_visual_bbox(self, device, dtype, bsz):  # , img_size=(14, 14), max_len=1000):
-        visual_bbox = self.visual_bbox.repeat(bsz, 1, 1)
-        visual_bbox = visual_bbox.to(device).type(dtype)
-        return visual_bbox
-
-    def forward_image(self, x):
-        if self.detection:
-            x = self.patch_embed(x, self.pos_embed[:, 1:, :] if self.pos_embed is not None else None)
-        else:
-            x = self.patch_embed(x)
-        batch_size, seq_len, _ = x.size()
-
-        cls_tokens = self.cls_token.expand(batch_size, -1, -1)  # stole cls_tokens impl from Phil Wang, thanks
-        if self.pos_embed is not None and self.detection:
-            cls_tokens = cls_tokens + self.pos_embed[:, :1, :]
-
-        x = torch.cat((cls_tokens, x), dim=1)
-        if self.pos_embed is not None and not self.detection:
-            x = x + self.pos_embed
-        x = self.pos_drop(x)
-
-        x = self.norm(x)
-        return x
-
-    # Copied from transformers.models.bert.modeling_bert.BertModel.forward
-    def forward(
-        self,
-        input_ids=None,
-        bbox=None,
-        attention_mask=None,
-        token_type_ids=None,
-        valid_span=None,
-        position_ids=None,
-        head_mask=None,
-        inputs_embeds=None,
-        encoder_hidden_states=None,
-        encoder_attention_mask=None,
-        past_key_values=None,
-        use_cache=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        images=None,
-    ):
-        r"""
-        encoder_hidden_states  (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length, hidden_size)`, `optional`):
-            Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention if
-            the model is configured as a decoder.
-        encoder_attention_mask (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
-            Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used in
-            the cross-attention if the model is configured as a decoder. Mask values selected in ``[0, 1]``:
-
-            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **masked**.
-        past_key_values (:obj:`tuple(tuple(torch.FloatTensor))` of length :obj:`config.n_layers` with each tuple having 4 tensors of shape :obj:`(batch_size, num_heads, sequence_length - 1, embed_size_per_head)`):
-            Contains precomputed key and value hidden states of the attention blocks. Can be used to speed up decoding.
-
-            If :obj:`past_key_values` are used, the user can optionally input only the last :obj:`decoder_input_ids`
-            (those that don't have their past key value states given to this model) of shape :obj:`(batch_size, 1)`
-            instead of all :obj:`decoder_input_ids` of shape :obj:`(batch_size, sequence_length)`.
-        use_cache (:obj:`bool`, `optional`):
-            If set to :obj:`True`, :obj:`past_key_values` key value states are returned and can be used to speed up
-            decoding (see :obj:`past_key_values`).
-        """
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        use_cache = False
-
-        # if input_ids is not None and inputs_embeds is not None:
-        #     raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
-        if input_ids is not None:
-            input_shape = input_ids.size()
-            batch_size, seq_length = input_shape
-            device = input_ids.device
-        elif inputs_embeds is not None:
-            input_shape = inputs_embeds.size()[:-1]
-            batch_size, seq_length = input_shape
-            device = inputs_embeds.device
-        elif images is not None:
-            batch_size = len(images)
-            device = images.device
-        else:
-            raise ValueError("You have to specify either input_ids or inputs_embeds or images")
-
-        if not self.image_only:
-            # past_key_values_length
-            past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
-
-            if attention_mask is None:
-                attention_mask = torch.ones(((batch_size, seq_length + past_key_values_length)), device=device)
-            if token_type_ids is None:
-                token_type_ids = torch.zeros(input_shape, dtype=torch.long, device=device)
-
-        # We can provide a self-attention mask of dimensions [batch_size, from_seq_length, to_seq_length]
-        # ourselves in which case we just need to make it broadcastable to all heads.
-        # extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, input_shape, device)
-
-        encoder_extended_attention_mask = None
-
-        # Prepare head mask if needed
-        # 1.0 in head_mask indicate we keep the head
-        # attention_probs has shape bsz x n_heads x N x N
-        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
-        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
-        head_mask = self.get_head_mask(head_mask, self.config.num_hidden_layers)
-
-        if not self.image_only:
-            if bbox is None:
-                bbox = torch.zeros(tuple(list(input_shape) + [4]), dtype=torch.long, device=device)
-
-            embedding_output = self.embeddings(
-                input_ids=input_ids,
-                bbox=bbox,
-                position_ids=position_ids,
-                token_type_ids=token_type_ids,
-                inputs_embeds=inputs_embeds,
-                past_key_values_length=past_key_values_length,
-            )
-
-        final_bbox = final_position_ids = None
-        Hp = Wp = None
-        if images is not None:
-            patch_size = 16
-            Hp, Wp = int(images.shape[2] / patch_size), int(images.shape[3] / patch_size)
-            visual_emb = self.forward_image(images)
-            if self.detection:
-                visual_attention_mask = torch.ones((batch_size, visual_emb.shape[1]), dtype=torch.long, device=device)
-                if self.image_only:
-                    attention_mask = visual_attention_mask
-                else:
-                    attention_mask = torch.cat([attention_mask, visual_attention_mask], dim=1)
-            elif self.image_only:
-                attention_mask = torch.ones((batch_size, visual_emb.shape[1]), dtype=torch.long, device=device)
-
-            if self.config.has_relative_attention_bias or self.config.has_spatial_attention_bias:
-                if self.config.has_spatial_attention_bias:
-                    visual_bbox = self._calc_visual_bbox(device, dtype=torch.long, bsz=batch_size)
-                    if self.image_only:
-                        final_bbox = visual_bbox
-                    else:
-                        final_bbox = torch.cat([bbox, visual_bbox], dim=1)
-
-                visual_position_ids = torch.arange(0, visual_emb.shape[1], dtype=torch.long, device=device).repeat(
-                    batch_size, 1)
-                if self.image_only:
-                    final_position_ids = visual_position_ids
-                else:
-                    position_ids = torch.arange(0, input_shape[1], device=device).unsqueeze(0)
-                    position_ids = position_ids.expand_as(input_ids)
-                    final_position_ids = torch.cat([position_ids, visual_position_ids], dim=1)
-
-            if self.image_only:
-                embedding_output = visual_emb
-            else:
-                embedding_output = torch.cat([embedding_output, visual_emb], dim=1)
-            embedding_output = self.LayerNorm(embedding_output)
-            embedding_output = self.dropout(embedding_output)
-        elif self.config.has_relative_attention_bias or self.config.has_spatial_attention_bias:
-            if self.config.has_spatial_attention_bias:
-                final_bbox = bbox
-            if self.config.has_relative_attention_bias:
-                position_ids = self.embeddings.position_ids[:, :input_shape[1]]
-                position_ids = position_ids.expand_as(input_ids)
-                final_position_ids = position_ids
-
-        extended_attention_mask: torch.Tensor = self.get_extended_attention_mask(attention_mask, None, device)
-
-        encoder_outputs = self.encoder(
-            embedding_output,
-            bbox=final_bbox,
-            position_ids=final_position_ids,
-            attention_mask=extended_attention_mask,
-            head_mask=head_mask,
-            encoder_hidden_states=encoder_hidden_states,
-            encoder_attention_mask=encoder_extended_attention_mask,
-            past_key_values=past_key_values,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-            Hp=Hp,
-            Wp=Wp,
-            valid_span=valid_span,
-        )
-
-        if self.detection:
-            return encoder_outputs
-
-        sequence_output = encoder_outputs[0]
-        pooled_output = None
-
-        if not return_dict:
-            return (sequence_output, pooled_output) + encoder_outputs[1:]
-
-        return BaseModelOutputWithPoolingAndCrossAttentions(
-            last_hidden_state=sequence_output,
-            pooler_output=pooled_output,
-            past_key_values=encoder_outputs.past_key_values,
-            hidden_states=encoder_outputs.hidden_states,
-            attentions=encoder_outputs.attentions,
-            cross_attentions=encoder_outputs.cross_attentions,
-        )
-
-
-class LayoutLMv3ClassificationHead(nn.Module):
-    """
-    Head for sentence-level classification tasks.
-    Reference: RobertaClassificationHead
-    """
-
-    def __init__(self, config, pool_feature=False):
-        super().__init__()
-        self.pool_feature = pool_feature
-        if pool_feature:
-            self.dense = nn.Linear(config.hidden_size*3, config.hidden_size)
-        else:
-            self.dense = nn.Linear(config.hidden_size, config.hidden_size)
-        classifier_dropout = (
-            config.classifier_dropout if config.classifier_dropout is not None else config.hidden_dropout_prob
-        )
-        self.dropout = nn.Dropout(classifier_dropout)
-        self.out_proj = nn.Linear(config.hidden_size, config.num_labels)
-
-    def forward(self, x):
-        # x = features[:, 0, :]  # take <s> token (equiv. to [CLS])
-        x = self.dropout(x)
-        x = self.dense(x)
-        x = torch.tanh(x)
-        x = self.dropout(x)
-        x = self.out_proj(x)
-        return x
-
-
-class LayoutLMv3ForTokenClassification(LayoutLMv3PreTrainedModel):
-    _keys_to_ignore_on_load_unexpected = [r"pooler"]
-    _keys_to_ignore_on_load_missing = [r"position_ids"]
-
-    def __init__(self, config):
-        super().__init__(config)
-        self.num_labels = config.num_labels
-
-        self.layoutlmv3 = LayoutLMv3Model(config)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-        if config.num_labels < 10:
-            self.classifier = nn.Linear(config.hidden_size, config.num_labels)
-        else:
-            self.classifier = LayoutLMv3ClassificationHead(config, pool_feature=False)
-
-        self.init_weights()
-
-    def forward(
-        self,
-        input_ids=None,
-        bbox=None,
-        attention_mask=None,
-        token_type_ids=None,
-        position_ids=None,
-        valid_span=None,
-        head_mask=None,
-        inputs_embeds=None,
-        labels=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        images=None,
-    ):
-        r"""
-        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size, sequence_length)`, `optional`):
-            Labels for computing the token classification loss. Indices should be in ``[0, ..., config.num_labels -
-            1]``.
-        """
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        outputs = self.layoutlmv3(
-            input_ids,
-            bbox=bbox,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            position_ids=position_ids,
-            head_mask=head_mask,
-            inputs_embeds=inputs_embeds,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-            images=images,
-            valid_span=valid_span,
-        )
-
-        sequence_output = outputs[0]
-
-        sequence_output = self.dropout(sequence_output)
-        logits = self.classifier(sequence_output)
-
-        loss = None
-        if labels is not None:
-            loss_fct = CrossEntropyLoss()
-            # Only keep active parts of the loss
-            if attention_mask is not None:
-                active_loss = attention_mask.view(-1) == 1
-                active_logits = logits.view(-1, self.num_labels)
-                active_labels = torch.where(
-                    active_loss, labels.view(-1), torch.tensor(loss_fct.ignore_index).type_as(labels)
-                )
-                loss = loss_fct(active_logits, active_labels)
-            else:
-                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
-
-        if not return_dict:
-            output = (logits,) + outputs[2:]
-            return ((loss,) + output) if loss is not None else output
-
-        return TokenClassifierOutput(
-            loss=loss,
-            logits=logits,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
-        )
-
-
-class LayoutLMv3ForQuestionAnswering(LayoutLMv3PreTrainedModel):
-    _keys_to_ignore_on_load_unexpected = [r"pooler"]
-    _keys_to_ignore_on_load_missing = [r"position_ids"]
-
-    def __init__(self, config):
-        super().__init__(config)
-        self.num_labels = config.num_labels
-
-        self.layoutlmv3 = LayoutLMv3Model(config)
-        # self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
-        self.qa_outputs = LayoutLMv3ClassificationHead(config, pool_feature=False)
-
-        self.init_weights()
-
-    def forward(
-        self,
-        input_ids=None,
-        attention_mask=None,
-        token_type_ids=None,
-        position_ids=None,
-        valid_span=None,
-        head_mask=None,
-        inputs_embeds=None,
-        start_positions=None,
-        end_positions=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        bbox=None,
-        images=None,
-    ):
-        r"""
-        start_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
-            Labels for position (index) of the start of the labelled span for computing the token classification loss.
-            Positions are clamped to the length of the sequence (:obj:`sequence_length`). Position outside of the
-            sequence are not taken into account for computing the loss.
-        end_positions (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
-            Labels for position (index) of the end of the labelled span for computing the token classification loss.
-            Positions are clamped to the length of the sequence (:obj:`sequence_length`). Position outside of the
-            sequence are not taken into account for computing the loss.
-        """
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        outputs = self.layoutlmv3(
-            input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            position_ids=position_ids,
-            head_mask=head_mask,
-            inputs_embeds=inputs_embeds,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-            bbox=bbox,
-            images=images,
-            valid_span=valid_span,
-        )
-
-        sequence_output = outputs[0]
-
-        logits = self.qa_outputs(sequence_output)
-        start_logits, end_logits = logits.split(1, dim=-1)
-        start_logits = start_logits.squeeze(-1).contiguous()
-        end_logits = end_logits.squeeze(-1).contiguous()
-
-        total_loss = None
-        if start_positions is not None and end_positions is not None:
-            # If we are on multi-GPU, split add a dimension
-            if len(start_positions.size()) > 1:
-                start_positions = start_positions.squeeze(-1)
-            if len(end_positions.size()) > 1:
-                end_positions = end_positions.squeeze(-1)
-            # sometimes the start/end positions are outside our model inputs, we ignore these terms
-            ignored_index = start_logits.size(1)
-            start_positions = start_positions.clamp(0, ignored_index)
-            end_positions = end_positions.clamp(0, ignored_index)
-
-            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
-            start_loss = loss_fct(start_logits, start_positions)
-            end_loss = loss_fct(end_logits, end_positions)
-            total_loss = (start_loss + end_loss) / 2
-
-        if not return_dict:
-            output = (start_logits, end_logits) + outputs[2:]
-            return ((total_loss,) + output) if total_loss is not None else output
-
-        return QuestionAnsweringModelOutput(
-            loss=total_loss,
-            start_logits=start_logits,
-            end_logits=end_logits,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
-        )
-
-
-class LayoutLMv3ForSequenceClassification(LayoutLMv3PreTrainedModel):
-    _keys_to_ignore_on_load_missing = [r"position_ids"]
-
-    def __init__(self, config):
-        super().__init__(config)
-        self.num_labels = config.num_labels
-        self.config = config
-        self.layoutlmv3 = LayoutLMv3Model(config)
-        self.classifier = LayoutLMv3ClassificationHead(config, pool_feature=False)
-
-        self.init_weights()
-
-    def forward(
-        self,
-        input_ids=None,
-        attention_mask=None,
-        token_type_ids=None,
-        position_ids=None,
-        valid_span=None,
-        head_mask=None,
-        inputs_embeds=None,
-        labels=None,
-        output_attentions=None,
-        output_hidden_states=None,
-        return_dict=None,
-        bbox=None,
-        images=None,
-    ):
-        r"""
-        labels (:obj:`torch.LongTensor` of shape :obj:`(batch_size,)`, `optional`):
-            Labels for computing the sequence classification/regression loss. Indices should be in :obj:`[0, ...,
-            config.num_labels - 1]`. If :obj:`config.num_labels == 1` a regression loss is computed (Mean-Square loss),
-            If :obj:`config.num_labels > 1` a classification loss is computed (Cross-Entropy).
-        """
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        outputs = self.layoutlmv3(
-            input_ids,
-            attention_mask=attention_mask,
-            token_type_ids=token_type_ids,
-            position_ids=position_ids,
-            head_mask=head_mask,
-            inputs_embeds=inputs_embeds,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-            bbox=bbox,
-            images=images,
-            valid_span=valid_span,
-        )
-
-        sequence_output = outputs[0][:, 0, :]
-        logits = self.classifier(sequence_output)
-
-        loss = None
-        if labels is not None:
-            if self.config.problem_type is None:
-                if self.num_labels == 1:
-                    self.config.problem_type = "regression"
-                elif self.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
-                    self.config.problem_type = "single_label_classification"
-                else:
-                    self.config.problem_type = "multi_label_classification"
-
-            if self.config.problem_type == "regression":
-                loss_fct = MSELoss()
-                if self.num_labels == 1:
-                    loss = loss_fct(logits.squeeze(), labels.squeeze())
-                else:
-                    loss = loss_fct(logits, labels)
-            elif self.config.problem_type == "single_label_classification":
-                loss_fct = CrossEntropyLoss()
-                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
-            elif self.config.problem_type == "multi_label_classification":
-                loss_fct = BCEWithLogitsLoss()
-                loss = loss_fct(logits, labels)
-
-        if not return_dict:
-            output = (logits,) + outputs[2:]
-            return ((loss,) + output) if loss is not None else output
-
-        return SequenceClassifierOutput(
-            loss=loss,
-            logits=logits,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
-        )
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py
deleted file mode 100644
index f340d3c6aca04b6567614e6aa221f7c542239305..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3.py
+++ /dev/null
@@ -1,32 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Tokenization classes for LayoutLMv3, refer to RoBERTa."""
-
-from transformers.models.roberta import RobertaTokenizer
-from transformers.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-VOCAB_FILES_NAMES = {
-    "vocab_file": "vocab.json",
-    "merges_file": "merges.txt",
-}
-
-class LayoutLMv3Tokenizer(RobertaTokenizer):
-    vocab_files_names = VOCAB_FILES_NAMES
-    # pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
-    # max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
-    model_input_names = ["input_ids", "attention_mask"]
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py
deleted file mode 100644
index 9fd75ff1d3bd7725025114e99320afd80823e9d0..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/layoutlmft/models/layoutlmv3/tokenization_layoutlmv3_fast.py
+++ /dev/null
@@ -1,34 +0,0 @@
-# coding=utf-8
-# Copyright 2018 The Open AI Team Authors and The HuggingFace Inc. team.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Fast Tokenization classes for LayoutLMv3, refer to RoBERTa."""
-
-
-from transformers.models.roberta.tokenization_roberta_fast import RobertaTokenizerFast
-from transformers.utils import logging
-
-from .tokenization_layoutlmv3 import LayoutLMv3Tokenizer
-
-
-logger = logging.get_logger(__name__)
-
-VOCAB_FILES_NAMES = {"vocab_file": "vocab.json", "merges_file": "merges.txt", "tokenizer_file": "tokenizer.json"}
-
-
-class LayoutLMv3TokenizerFast(RobertaTokenizerFast):
-    vocab_files_names = VOCAB_FILES_NAMES
-    # pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
-    # max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
-    model_input_names = ["input_ids", "attention_mask"]
-    slow_tokenizer_class = LayoutLMv3Tokenizer
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py
deleted file mode 100644
index a624d60d5a75902f3c44d3dfbe1ef350cddf7427..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/model_init.py
+++ /dev/null
@@ -1,151 +0,0 @@
-from .visualizer import Visualizer
-from .rcnn_vl import *
-from .backbone import *
-
-from detectron2.config import get_cfg
-from detectron2.config import CfgNode as CN
-from detectron2.data import MetadataCatalog, DatasetCatalog
-from detectron2.data.datasets import register_coco_instances
-from detectron2.engine import DefaultTrainer, default_argument_parser, default_setup, launch, DefaultPredictor
-
-
-def add_vit_config(cfg):
-    """
-    Add config for VIT.
-    """
-    _C = cfg
-
-    _C.MODEL.VIT = CN()
-
-    # CoaT model name.
-    _C.MODEL.VIT.NAME = ""
-
-    # Output features from CoaT backbone.
-    _C.MODEL.VIT.OUT_FEATURES = ["layer3", "layer5", "layer7", "layer11"]
-
-    _C.MODEL.VIT.IMG_SIZE = [224, 224]
-
-    _C.MODEL.VIT.POS_TYPE = "shared_rel"
-
-    _C.MODEL.VIT.DROP_PATH = 0.
-
-    _C.MODEL.VIT.MODEL_KWARGS = "{}"
-
-    _C.SOLVER.OPTIMIZER = "ADAMW"
-
-    _C.SOLVER.BACKBONE_MULTIPLIER = 1.0
-
-    _C.AUG = CN()
-
-    _C.AUG.DETR = False
-
-    _C.MODEL.IMAGE_ONLY = True
-    _C.PUBLAYNET_DATA_DIR_TRAIN = ""
-    _C.PUBLAYNET_DATA_DIR_TEST = ""
-    _C.FOOTNOTE_DATA_DIR_TRAIN = ""
-    _C.FOOTNOTE_DATA_DIR_VAL = ""
-    _C.SCIHUB_DATA_DIR_TRAIN = ""
-    _C.SCIHUB_DATA_DIR_TEST = ""
-    _C.JIAOCAI_DATA_DIR_TRAIN = ""
-    _C.JIAOCAI_DATA_DIR_TEST = ""
-    _C.ICDAR_DATA_DIR_TRAIN = ""
-    _C.ICDAR_DATA_DIR_TEST = ""
-    _C.M6DOC_DATA_DIR_TEST = ""
-    _C.DOCSTRUCTBENCH_DATA_DIR_TEST = ""
-    _C.DOCSTRUCTBENCHv2_DATA_DIR_TEST = ""
-    _C.CACHE_DIR = ""
-    _C.MODEL.CONFIG_PATH = ""
-
-    # effective update steps would be MAX_ITER/GRADIENT_ACCUMULATION_STEPS
-    # maybe need to set MAX_ITER *= GRADIENT_ACCUMULATION_STEPS
-    _C.SOLVER.GRADIENT_ACCUMULATION_STEPS = 1
-
-
-def setup(args, device):
-    """
-    Create configs and perform basic setups.
-    """
-    cfg = get_cfg()
-
-    # add_coat_config(cfg)
-    add_vit_config(cfg)
-    cfg.merge_from_file(args.config_file)
-    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.2  # set threshold for this model
-    cfg.merge_from_list(args.opts)
-
-    # 使用统一的device配置
-    cfg.MODEL.DEVICE = device
-
-    cfg.freeze()
-    default_setup(cfg, args)
-
-    #@todo 可以删掉这块？
-    # register_coco_instances(
-    #     "scihub_train",
-    #     {},
-    #     cfg.SCIHUB_DATA_DIR_TRAIN + ".json",
-    #     cfg.SCIHUB_DATA_DIR_TRAIN
-    # )
-
-    return cfg
-
-
-class DotDict(dict):
-    def __init__(self, *args, **kwargs):
-        super(DotDict, self).__init__(*args, **kwargs)
-
-    def __getattr__(self, key):
-        if key not in self.keys():
-            return None
-        value = self[key]
-        if isinstance(value, dict):
-            value = DotDict(value)
-        return value
-
-    def __setattr__(self, key, value):
-        self[key] = value
-
-
-class Layoutlmv3_Predictor(object):
-    def __init__(self, weights, config_file, device):
-        layout_args = {
-            "config_file": config_file,
-            "resume": False,
-            "eval_only": False,
-            "num_gpus": 1,
-            "num_machines": 1,
-            "machine_rank": 0,
-            "dist_url": "tcp://127.0.0.1:57823",
-            "opts": ["MODEL.WEIGHTS", weights],
-        }
-        layout_args = DotDict(layout_args)
-
-        cfg = setup(layout_args, device)
-        self.mapping = ["title", "plain text", "abandon", "figure", "figure_caption", "table", "table_caption",
-                        "table_footnote", "isolate_formula", "formula_caption"]
-        MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes = self.mapping
-        self.predictor = DefaultPredictor(cfg)
-
-    def __call__(self, image, ignore_catids=[]):
-        # page_layout_result = {
-        #     "layout_dets": []
-        # }
-        layout_dets = []
-        outputs = self.predictor(image)
-        boxes = outputs["instances"].to("cpu")._fields["pred_boxes"].tensor.tolist()
-        labels = outputs["instances"].to("cpu")._fields["pred_classes"].tolist()
-        scores = outputs["instances"].to("cpu")._fields["scores"].tolist()
-        for bbox_idx in range(len(boxes)):
-            if labels[bbox_idx] in ignore_catids:
-                continue
-            layout_dets.append({
-                "category_id": labels[bbox_idx],
-                "poly": [
-                    boxes[bbox_idx][0], boxes[bbox_idx][1],
-                    boxes[bbox_idx][2], boxes[bbox_idx][1],
-                    boxes[bbox_idx][2], boxes[bbox_idx][3],
-                    boxes[bbox_idx][0], boxes[bbox_idx][3],
-                ],
-                "score": scores[bbox_idx]
-            })
-        return layout_dets
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py
deleted file mode 100644
index 46b2e16102e8782eb675b518b7d870dc8d007ba8..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/rcnn_vl.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-import logging
-import numpy as np
-from typing import Dict, List, Optional, Tuple
-import torch
-from torch import nn
-
-from detectron2.config import configurable
-from detectron2.structures import ImageList, Instances
-from detectron2.utils.events import get_event_storage
-
-from detectron2.modeling.backbone import Backbone, build_backbone
-from detectron2.modeling.meta_arch.build import META_ARCH_REGISTRY
-
-from detectron2.modeling.meta_arch import GeneralizedRCNN
-
-from detectron2.modeling.postprocessing import detector_postprocess
-from detectron2.modeling.roi_heads.fast_rcnn import fast_rcnn_inference_single_image
-from contextlib import contextmanager
-from itertools import count
-
-@META_ARCH_REGISTRY.register()
-class VLGeneralizedRCNN(GeneralizedRCNN):
-    """
-    Generalized R-CNN. Any models that contains the following three components:
-    1. Per-image feature extraction (aka backbone)
-    2. Region proposal generation
-    3. Per-region feature extraction and prediction
-    """
-
-    def forward(self, batched_inputs: List[Dict[str, torch.Tensor]]):
-        """
-        Args:
-            batched_inputs: a list, batched outputs of :class:`DatasetMapper` .
-                Each item in the list contains the inputs for one image.
-                For now, each item in the list is a dict that contains:
-
-                * image: Tensor, image in (C, H, W) format.
-                * instances (optional): groundtruth :class:`Instances`
-                * proposals (optional): :class:`Instances`, precomputed proposals.
-
-                Other information that's included in the original dicts, such as:
-
-                * "height", "width" (int): the output resolution of the model, used in inference.
-                  See :meth:`postprocess` for details.
-
-        Returns:
-            list[dict]:
-                Each dict is the output for one input image.
-                The dict contains one key "instances" whose value is a :class:`Instances`.
-                The :class:`Instances` object has the following keys:
-                "pred_boxes", "pred_classes", "scores", "pred_masks", "pred_keypoints"
-        """
-        if not self.training:
-            return self.inference(batched_inputs)
-
-        images = self.preprocess_image(batched_inputs)
-        if "instances" in batched_inputs[0]:
-            gt_instances = [x["instances"].to(self.device) for x in batched_inputs]
-        else:
-            gt_instances = None
-
-        # features = self.backbone(images.tensor)
-        input = self.get_batch(batched_inputs, images)
-        features = self.backbone(input)
-
-        if self.proposal_generator is not None:
-            proposals, proposal_losses = self.proposal_generator(images, features, gt_instances)
-        else:
-            assert "proposals" in batched_inputs[0]
-            proposals = [x["proposals"].to(self.device) for x in batched_inputs]
-            proposal_losses = {}
-
-        _, detector_losses = self.roi_heads(images, features, proposals, gt_instances)
-        if self.vis_period > 0:
-            storage = get_event_storage()
-            if storage.iter % self.vis_period == 0:
-                self.visualize_training(batched_inputs, proposals)
-
-        losses = {}
-        losses.update(detector_losses)
-        losses.update(proposal_losses)
-        return losses
-
-    def inference(
-        self,
-        batched_inputs: List[Dict[str, torch.Tensor]],
-        detected_instances: Optional[List[Instances]] = None,
-        do_postprocess: bool = True,
-    ):
-        """
-        Run inference on the given inputs.
-
-        Args:
-            batched_inputs (list[dict]): same as in :meth:`forward`
-            detected_instances (None or list[Instances]): if not None, it
-                contains an `Instances` object per image. The `Instances`
-                object contains "pred_boxes" and "pred_classes" which are
-                known boxes in the image.
-                The inference will then skip the detection of bounding boxes,
-                and only predict other per-ROI outputs.
-            do_postprocess (bool): whether to apply post-processing on the outputs.
-
-        Returns:
-            When do_postprocess=True, same as in :meth:`forward`.
-            Otherwise, a list[Instances] containing raw network outputs.
-        """
-        assert not self.training
-
-        images = self.preprocess_image(batched_inputs)
-        # features = self.backbone(images.tensor)
-        input = self.get_batch(batched_inputs, images)
-        features = self.backbone(input)
-
-        if detected_instances is None:
-            if self.proposal_generator is not None:
-                proposals, _ = self.proposal_generator(images, features, None)
-            else:
-                assert "proposals" in batched_inputs[0]
-                proposals = [x["proposals"].to(self.device) for x in batched_inputs]
-
-            results, _ = self.roi_heads(images, features, proposals, None)
-        else:
-            detected_instances = [x.to(self.device) for x in detected_instances]
-            results = self.roi_heads.forward_with_given_boxes(features, detected_instances)
-
-        if do_postprocess:
-            assert not torch.jit.is_scripting(), "Scripting is not supported for postprocess."
-            return GeneralizedRCNN._postprocess(results, batched_inputs, images.image_sizes)
-        else:
-            return results
-
-    def get_batch(self, examples, images):
-        if len(examples) >= 1 and "bbox" not in examples[0]:  # image_only
-            return {"images": images.tensor}
-
-        return input
-
-    def _batch_inference(self, batched_inputs, detected_instances=None):
-        """
-        Execute inference on a list of inputs,
-        using batch size = self.batch_size (e.g., 2), instead of the length of the list.
-
-        Inputs & outputs have the same format as :meth:`GeneralizedRCNN.inference`
-        """
-        if detected_instances is None:
-            detected_instances = [None] * len(batched_inputs)
-
-        outputs = []
-        inputs, instances = [], []
-        for idx, input, instance in zip(count(), batched_inputs, detected_instances):
-            inputs.append(input)
-            instances.append(instance)
-            if len(inputs) == 2 or idx == len(batched_inputs) - 1:
-                outputs.extend(
-                    self.inference(
-                        inputs,
-                        instances if instances[0] is not None else None,
-                        do_postprocess=True,  # False
-                    )
-                )
-                inputs, instances = [], []
-        return outputs
diff --git a/magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py b/magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py
deleted file mode 100644
index 8185984e66f0267be6368317c60dc543dcb69e87..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/layout/layoutlmv3/visualizer.py
+++ /dev/null
@@ -1,1236 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates.
-import colorsys
-import logging
-import math
-import numpy as np
-from enum import Enum, unique
-import cv2
-import matplotlib as mpl
-import matplotlib.colors as mplc
-import matplotlib.figure as mplfigure
-import pycocotools.mask as mask_util
-import torch
-from matplotlib.backends.backend_agg import FigureCanvasAgg
-from PIL import Image
-
-from detectron2.data import MetadataCatalog
-from detectron2.structures import BitMasks, Boxes, BoxMode, Keypoints, PolygonMasks, RotatedBoxes
-from detectron2.utils.file_io import PathManager
-
-from detectron2.utils.colormap import random_color
-
-import pdb
-
-logger = logging.getLogger(__name__)
-
-__all__ = ["ColorMode", "VisImage", "Visualizer"]
-
-
-_SMALL_OBJECT_AREA_THRESH = 1000
-_LARGE_MASK_AREA_THRESH = 120000
-_OFF_WHITE = (1.0, 1.0, 240.0 / 255)
-_BLACK = (0, 0, 0)
-_RED = (1.0, 0, 0)
-
-_KEYPOINT_THRESHOLD = 0.05
-
-#CLASS_NAMES = ["footnote", "footer", "header"]
-
-@unique
-class ColorMode(Enum):
-    """
-    Enum of different color modes to use for instance visualizations.
-    """
-
-    IMAGE = 0
-    """
-    Picks a random color for every instance and overlay segmentations with low opacity.
-    """
-    SEGMENTATION = 1
-    """
-    Let instances of the same category have similar colors
-    (from metadata.thing_colors), and overlay them with
-    high opacity. This provides more attention on the quality of segmentation.
-    """
-    IMAGE_BW = 2
-    """
-    Same as IMAGE, but convert all areas without masks to gray-scale.
-    Only available for drawing per-instance mask predictions.
-    """
-
-
-class GenericMask:
-    """
-    Attribute:
-        polygons (list[ndarray]): list[ndarray]: polygons for this mask.
-            Each ndarray has format [x, y, x, y, ...]
-        mask (ndarray): a binary mask
-    """
-
-    def __init__(self, mask_or_polygons, height, width):
-        self._mask = self._polygons = self._has_holes = None
-        self.height = height
-        self.width = width
-
-        m = mask_or_polygons
-        if isinstance(m, dict):
-            # RLEs
-            assert "counts" in m and "size" in m
-            if isinstance(m["counts"], list):  # uncompressed RLEs
-                h, w = m["size"]
-                assert h == height and w == width
-                m = mask_util.frPyObjects(m, h, w)
-            self._mask = mask_util.decode(m)[:, :]
-            return
-
-        if isinstance(m, list):  # list[ndarray]
-            self._polygons = [np.asarray(x).reshape(-1) for x in m]
-            return
-
-        if isinstance(m, np.ndarray):  # assumed to be a binary mask
-            assert m.shape[1] != 2, m.shape
-            assert m.shape == (
-                height,
-                width,
-            ), f"mask shape: {m.shape}, target dims: {height}, {width}"
-            self._mask = m.astype("uint8")
-            return
-
-        raise ValueError("GenericMask cannot handle object {} of type '{}'".format(m, type(m)))
-
-    @property
-    def mask(self):
-        if self._mask is None:
-            self._mask = self.polygons_to_mask(self._polygons)
-        return self._mask
-
-    @property
-    def polygons(self):
-        if self._polygons is None:
-            self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
-        return self._polygons
-
-    @property
-    def has_holes(self):
-        if self._has_holes is None:
-            if self._mask is not None:
-                self._polygons, self._has_holes = self.mask_to_polygons(self._mask)
-            else:
-                self._has_holes = False  # if original format is polygon, does not have holes
-        return self._has_holes
-
-    def mask_to_polygons(self, mask):
-        # cv2.RETR_CCOMP flag retrieves all the contours and arranges them to a 2-level
-        # hierarchy. External contours (boundary) of the object are placed in hierarchy-1.
-        # Internal contours (holes) are placed in hierarchy-2.
-        # cv2.CHAIN_APPROX_NONE flag gets vertices of polygons from contours.
-        mask = np.ascontiguousarray(mask)  # some versions of cv2 does not support incontiguous arr
-        res = cv2.findContours(mask.astype("uint8"), cv2.RETR_CCOMP, cv2.CHAIN_APPROX_NONE)
-        hierarchy = res[-1]
-        if hierarchy is None:  # empty mask
-            return [], False
-        has_holes = (hierarchy.reshape(-1, 4)[:, 3] >= 0).sum() > 0
-        res = res[-2]
-        res = [x.flatten() for x in res]
-        # These coordinates from OpenCV are integers in range [0, W-1 or H-1].
-        # We add 0.5 to turn them into real-value coordinate space. A better solution
-        # would be to first +0.5 and then dilate the returned polygon by 0.5.
-        res = [x + 0.5 for x in res if len(x) >= 6]
-        return res, has_holes
-
-    def polygons_to_mask(self, polygons):
-        rle = mask_util.frPyObjects(polygons, self.height, self.width)
-        rle = mask_util.merge(rle)
-        return mask_util.decode(rle)[:, :]
-
-    def area(self):
-        return self.mask.sum()
-
-    def bbox(self):
-        p = mask_util.frPyObjects(self.polygons, self.height, self.width)
-        p = mask_util.merge(p)
-        bbox = mask_util.toBbox(p)
-        bbox[2] += bbox[0]
-        bbox[3] += bbox[1]
-        return bbox
-
-
-class _PanopticPrediction:
-    """
-    Unify different panoptic annotation/prediction formats
-    """
-
-    def __init__(self, panoptic_seg, segments_info, metadata=None):
-        if segments_info is None:
-            assert metadata is not None
-            # If "segments_info" is None, we assume "panoptic_img" is a
-            # H*W int32 image storing the panoptic_id in the format of
-            # category_id * label_divisor + instance_id. We reserve -1 for
-            # VOID label.
-            label_divisor = metadata.label_divisor
-            segments_info = []
-            for panoptic_label in np.unique(panoptic_seg.numpy()):
-                if panoptic_label == -1:
-                    # VOID region.
-                    continue
-                pred_class = panoptic_label // label_divisor
-                isthing = pred_class in metadata.thing_dataset_id_to_contiguous_id.values()
-                segments_info.append(
-                    {
-                        "id": int(panoptic_label),
-                        "category_id": int(pred_class),
-                        "isthing": bool(isthing),
-                    }
-                )
-        del metadata
-
-        self._seg = panoptic_seg
-
-        self._sinfo = {s["id"]: s for s in segments_info}  # seg id -> seg info
-        segment_ids, areas = torch.unique(panoptic_seg, sorted=True, return_counts=True)
-        areas = areas.numpy()
-        sorted_idxs = np.argsort(-areas)
-        self._seg_ids, self._seg_areas = segment_ids[sorted_idxs], areas[sorted_idxs]
-        self._seg_ids = self._seg_ids.tolist()
-        for sid, area in zip(self._seg_ids, self._seg_areas):
-            if sid in self._sinfo:
-                self._sinfo[sid]["area"] = float(area)
-
-    def non_empty_mask(self):
-        """
-        Returns:
-            (H, W) array, a mask for all pixels that have a prediction
-        """
-        empty_ids = []
-        for id in self._seg_ids:
-            if id not in self._sinfo:
-                empty_ids.append(id)
-        if len(empty_ids) == 0:
-            return np.zeros(self._seg.shape, dtype=np.uint8)
-        assert (
-            len(empty_ids) == 1
-        ), ">1 ids corresponds to no labels. This is currently not supported"
-        return (self._seg != empty_ids[0]).numpy().astype(np.bool)
-
-    def semantic_masks(self):
-        for sid in self._seg_ids:
-            sinfo = self._sinfo.get(sid)
-            if sinfo is None or sinfo["isthing"]:
-                # Some pixels (e.g. id 0 in PanopticFPN) have no instance or semantic predictions.
-                continue
-            yield (self._seg == sid).numpy().astype(np.bool), sinfo
-
-    def instance_masks(self):
-        for sid in self._seg_ids:
-            sinfo = self._sinfo.get(sid)
-            if sinfo is None or not sinfo["isthing"]:
-                continue
-            mask = (self._seg == sid).numpy().astype(np.bool)
-            if mask.sum() > 0:
-                yield mask, sinfo
-
-
-def _create_text_labels(classes, scores, class_names, is_crowd=None):
-    """
-    Args:
-        classes (list[int] or None):
-        scores (list[float] or None):
-        class_names (list[str] or None):
-        is_crowd (list[bool] or None):
-
-    Returns:
-        list[str] or None
-    """
-    #class_names = CLASS_NAMES
-    labels = None
-    if classes is not None:
-        if class_names is not None and len(class_names) > 0:
-            labels = [class_names[i] for i in classes]
-        else:
-            labels = [str(i) for i in classes]
-            
-    if scores is not None:
-        if labels is None:
-            labels = ["{:.0f}%".format(s * 100) for s in scores]
-        else:
-            labels = ["{} {:.0f}%".format(l, s * 100) for l, s in zip(labels, scores)]
-    if labels is not None and is_crowd is not None:
-        labels = [l + ("|crowd" if crowd else "") for l, crowd in zip(labels, is_crowd)]
-    return labels
-
-
-class VisImage:
-    def __init__(self, img, scale=1.0):
-        """
-        Args:
-            img (ndarray): an RGB image of shape (H, W, 3) in range [0, 255].
-            scale (float): scale the input image
-        """
-        self.img = img
-        self.scale = scale
-        self.width, self.height = img.shape[1], img.shape[0]
-        self._setup_figure(img)
-
-    def _setup_figure(self, img):
-        """
-        Args:
-            Same as in :meth:`__init__()`.
-
-        Returns:
-            fig (matplotlib.pyplot.figure): top level container for all the image plot elements.
-            ax (matplotlib.pyplot.Axes): contains figure elements and sets the coordinate system.
-        """
-        fig = mplfigure.Figure(frameon=False)
-        self.dpi = fig.get_dpi()
-        # add a small 1e-2 to avoid precision lost due to matplotlib's truncation
-        # (https://github.com/matplotlib/matplotlib/issues/15363)
-        fig.set_size_inches(
-            (self.width * self.scale + 1e-2) / self.dpi,
-            (self.height * self.scale + 1e-2) / self.dpi,
-        )
-        self.canvas = FigureCanvasAgg(fig)
-        # self.canvas = mpl.backends.backend_cairo.FigureCanvasCairo(fig)
-        ax = fig.add_axes([0.0, 0.0, 1.0, 1.0])
-        ax.axis("off")
-        self.fig = fig
-        self.ax = ax
-        self.reset_image(img)
-
-    def reset_image(self, img):
-        """
-        Args:
-            img: same as in __init__
-        """
-        img = img.astype("uint8")
-        self.ax.imshow(img, extent=(0, self.width, self.height, 0), interpolation="nearest")
-
-    def save(self, filepath):
-        """
-        Args:
-            filepath (str): a string that contains the absolute path, including the file name, where
-                the visualized image will be saved.
-        """
-        self.fig.savefig(filepath)
-
-    def get_image(self):
-        """
-        Returns:
-            ndarray:
-                the visualized image of shape (H, W, 3) (RGB) in uint8 type.
-                The shape is scaled w.r.t the input image using the given `scale` argument.
-        """
-        canvas = self.canvas
-        s, (width, height) = canvas.print_to_buffer()
-        # buf = io.BytesIO()  # works for cairo backend
-        # canvas.print_rgba(buf)
-        # width, height = self.width, self.height
-        # s = buf.getvalue()
-
-        buffer = np.frombuffer(s, dtype="uint8")
-
-        img_rgba = buffer.reshape(height, width, 4)
-        rgb, alpha = np.split(img_rgba, [3], axis=2)
-        return rgb.astype("uint8")
-
-
-class Visualizer:
-    """
-    Visualizer that draws data about detection/segmentation on images.
-
-    It contains methods like `draw_{text,box,circle,line,binary_mask,polygon}`
-    that draw primitive objects to images, as well as high-level wrappers like
-    `draw_{instance_predictions,sem_seg,panoptic_seg_predictions,dataset_dict}`
-    that draw composite data in some pre-defined style.
-
-    Note that the exact visualization style for the high-level wrappers are subject to change.
-    Style such as color, opacity, label contents, visibility of labels, or even the visibility
-    of objects themselves (e.g. when the object is too small) may change according
-    to different heuristics, as long as the results still look visually reasonable.
-
-    To obtain a consistent style, you can implement custom drawing functions with the
-    abovementioned primitive methods instead. If you need more customized visualization
-    styles, you can process the data yourself following their format documented in
-    tutorials (:doc:`/tutorials/models`, :doc:`/tutorials/datasets`). This class does not
-    intend to satisfy everyone's preference on drawing styles.
-
-    This visualizer focuses on high rendering quality rather than performance. It is not
-    designed to be used for real-time applications.
-    """
-
-    # TODO implement a fast, rasterized version using OpenCV
-
-    def __init__(self, img_rgb, metadata=None, scale=1.0, instance_mode=ColorMode.IMAGE):
-        """
-        Args:
-            img_rgb: a numpy array of shape (H, W, C), where H and W correspond to
-                the height and width of the image respectively. C is the number of
-                color channels. The image is required to be in RGB format since that
-                is a requirement of the Matplotlib library. The image is also expected
-                to be in the range [0, 255].
-            metadata (Metadata): dataset metadata (e.g. class names and colors)
-            instance_mode (ColorMode): defines one of the pre-defined style for drawing
-                instances on an image.
-        """
-        self.img = np.asarray(img_rgb).clip(0, 255).astype(np.uint8)
-        if metadata is None:
-            metadata = MetadataCatalog.get("__nonexist__")
-        self.metadata = metadata
-        self.output = VisImage(self.img, scale=scale)
-        self.cpu_device = torch.device("cpu")
-
-        # too small texts are useless, therefore clamp to 9
-        self._default_font_size = max(
-            np.sqrt(self.output.height * self.output.width) // 90, 10 // scale
-        )
-        self._instance_mode = instance_mode
-        self.keypoint_threshold = _KEYPOINT_THRESHOLD
-
-    def draw_instance_predictions(self, predictions):
-        """
-        Draw instance-level prediction results on an image.
-
-        Args:
-            predictions (Instances): the output of an instance detection/segmentation
-                model. Following fields will be used to draw:
-                "pred_boxes", "pred_classes", "scores", "pred_masks" (or "pred_masks_rle").
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        boxes = predictions.pred_boxes if predictions.has("pred_boxes") else None
-        scores = predictions.scores if predictions.has("scores") else None
-        classes = predictions.pred_classes.tolist() if predictions.has("pred_classes") else None
-        labels = _create_text_labels(classes, scores, self.metadata.get("thing_classes", None))
-        keypoints = predictions.pred_keypoints if predictions.has("pred_keypoints") else None
-
-        if predictions.has("pred_masks"):
-            masks = np.asarray(predictions.pred_masks)
-            masks = [GenericMask(x, self.output.height, self.output.width) for x in masks]
-        else:
-            masks = None
-
-        if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
-            colors = [
-                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in classes
-            ]
-            alpha = 0.8
-        else:
-            colors = None
-            alpha = 0.5
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.reset_image(
-                self._create_grayscale_image(
-                    (predictions.pred_masks.any(dim=0) > 0).numpy()
-                    if predictions.has("pred_masks")
-                    else None
-                )
-            )
-            alpha = 0.3
-
-        self.overlay_instances(
-            masks=masks,
-            boxes=boxes,
-            labels=labels,
-            keypoints=keypoints,
-            assigned_colors=colors,
-            alpha=alpha,
-        )
-        return self.output
-
-    def draw_sem_seg(self, sem_seg, area_threshold=None, alpha=0.8):
-        """
-        Draw semantic segmentation predictions/labels.
-
-        Args:
-            sem_seg (Tensor or ndarray): the segmentation of shape (H, W).
-                Each value is the integer label of the pixel.
-            area_threshold (int): segments with less than `area_threshold` are not drawn.
-            alpha (float): the larger it is, the more opaque the segmentations are.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        if isinstance(sem_seg, torch.Tensor):
-            sem_seg = sem_seg.numpy()
-        labels, areas = np.unique(sem_seg, return_counts=True)
-        sorted_idxs = np.argsort(-areas).tolist()
-        labels = labels[sorted_idxs]
-        for label in filter(lambda l: l < len(self.metadata.stuff_classes), labels):
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[label]]
-            except (AttributeError, IndexError):
-                mask_color = None
-
-            binary_mask = (sem_seg == label).astype(np.uint8)
-            text = self.metadata.stuff_classes[label]
-            self.draw_binary_mask(
-                binary_mask,
-                color=mask_color,
-                edge_color=_OFF_WHITE,
-                text=text,
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-        return self.output
-
-    def draw_panoptic_seg(self, panoptic_seg, segments_info, area_threshold=None, alpha=0.7):
-        """
-        Draw panoptic prediction annotations or results.
-
-        Args:
-            panoptic_seg (Tensor): of shape (height, width) where the values are ids for each
-                segment.
-            segments_info (list[dict] or None): Describe each segment in `panoptic_seg`.
-                If it is a ``list[dict]``, each dict contains keys "id", "category_id".
-                If None, category id of each pixel is computed by
-                ``pixel // metadata.label_divisor``.
-            area_threshold (int): stuff segments with less than `area_threshold` are not drawn.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        pred = _PanopticPrediction(panoptic_seg, segments_info, self.metadata)
-
-        if self._instance_mode == ColorMode.IMAGE_BW:
-            self.output.reset_image(self._create_grayscale_image(pred.non_empty_mask()))
-
-        # draw mask for all semantic segments first i.e. "stuff"
-        for mask, sinfo in pred.semantic_masks():
-            category_idx = sinfo["category_id"]
-            try:
-                mask_color = [x / 255 for x in self.metadata.stuff_colors[category_idx]]
-            except AttributeError:
-                mask_color = None
-
-            text = self.metadata.stuff_classes[category_idx]
-            self.draw_binary_mask(
-                mask,
-                color=mask_color,
-                edge_color=_OFF_WHITE,
-                text=text,
-                alpha=alpha,
-                area_threshold=area_threshold,
-            )
-
-        # draw mask for all instances second
-        all_instances = list(pred.instance_masks())
-        if len(all_instances) == 0:
-            return self.output
-        masks, sinfo = list(zip(*all_instances))
-        category_ids = [x["category_id"] for x in sinfo]
-
-        try:
-            scores = [x["score"] for x in sinfo]
-        except KeyError:
-            scores = None
-        labels = _create_text_labels(
-            category_ids, scores, self.metadata.thing_classes, [x.get("iscrowd", 0) for x in sinfo]
-        )
-
-        try:
-            colors = [
-                self._jitter([x / 255 for x in self.metadata.thing_colors[c]]) for c in category_ids
-            ]
-        except AttributeError:
-            colors = None
-        self.overlay_instances(masks=masks, labels=labels, assigned_colors=colors, alpha=alpha)
-
-        return self.output
-
-    draw_panoptic_seg_predictions = draw_panoptic_seg  # backward compatibility
-
-    def draw_dataset_dict(self, dic):
-        """
-        Draw annotations/segmentaions in Detectron2 Dataset format.
-
-        Args:
-            dic (dict): annotation/segmentation data of one image, in Detectron2 Dataset format.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        annos = dic.get("annotations", None)
-        if annos:
-            if "segmentation" in annos[0]:
-                masks = [x["segmentation"] for x in annos]
-            else:
-                masks = None
-            if "keypoints" in annos[0]:
-                keypts = [x["keypoints"] for x in annos]
-                keypts = np.array(keypts).reshape(len(annos), -1, 3)
-            else:
-                keypts = None
-
-            boxes = [
-                BoxMode.convert(x["bbox"], x["bbox_mode"], BoxMode.XYXY_ABS)
-                if len(x["bbox"]) == 4
-                else x["bbox"]
-                for x in annos
-            ]
-
-            colors = None
-            category_ids = [x["category_id"] for x in annos]
-            if self._instance_mode == ColorMode.SEGMENTATION and self.metadata.get("thing_colors"):
-                colors = [
-                    self._jitter([x / 255 for x in self.metadata.thing_colors[c]])
-                    for c in category_ids
-                ]
-            names = self.metadata.get("thing_classes", None)
-            labels = _create_text_labels(
-                category_ids,
-                scores=None,
-                class_names=names,
-                is_crowd=[x.get("iscrowd", 0) for x in annos],
-            )
-            self.overlay_instances(
-                labels=labels, boxes=boxes, masks=masks, keypoints=keypts, assigned_colors=colors
-            )
-
-        sem_seg = dic.get("sem_seg", None)
-        if sem_seg is None and "sem_seg_file_name" in dic:
-            with PathManager.open(dic["sem_seg_file_name"], "rb") as f:
-                sem_seg = Image.open(f)
-                sem_seg = np.asarray(sem_seg, dtype="uint8")
-        if sem_seg is not None:
-            self.draw_sem_seg(sem_seg, area_threshold=0, alpha=0.5)
-
-        pan_seg = dic.get("pan_seg", None)
-        if pan_seg is None and "pan_seg_file_name" in dic:
-            with PathManager.open(dic["pan_seg_file_name"], "rb") as f:
-                pan_seg = Image.open(f)
-                pan_seg = np.asarray(pan_seg)
-                from panopticapi.utils import rgb2id
-
-                pan_seg = rgb2id(pan_seg)
-        if pan_seg is not None:
-            segments_info = dic["segments_info"]
-            pan_seg = torch.tensor(pan_seg)
-            self.draw_panoptic_seg(pan_seg, segments_info, area_threshold=0, alpha=0.5)
-        return self.output
-
-    def overlay_instances(
-        self,
-        *,
-        boxes=None,
-        labels=None,
-        masks=None,
-        keypoints=None,
-        assigned_colors=None,
-        alpha=0.5,
-    ):
-        """
-        Args:
-            boxes (Boxes, RotatedBoxes or ndarray): either a :class:`Boxes`,
-                or an Nx4 numpy array of XYXY_ABS format for the N objects in a single image,
-                or a :class:`RotatedBoxes`,
-                or an Nx5 numpy array of (x_center, y_center, width, height, angle_degrees) format
-                for the N objects in a single image,
-            labels (list[str]): the text to be displayed for each instance.
-            masks (masks-like object): Supported types are:
-
-                * :class:`detectron2.structures.PolygonMasks`,
-                  :class:`detectron2.structures.BitMasks`.
-                * list[list[ndarray]]: contains the segmentation masks for all objects in one image.
-                  The first level of the list corresponds to individual instances. The second
-                  level to all the polygon that compose the instance, and the third level
-                  to the polygon coordinates. The third level should have the format of
-                  [x0, y0, x1, y1, ..., xn, yn] (n >= 3).
-                * list[ndarray]: each ndarray is a binary mask of shape (H, W).
-                * list[dict]: each dict is a COCO-style RLE.
-            keypoints (Keypoint or array like): an array-like object of shape (N, K, 3),
-                where the N is the number of instances and K is the number of keypoints.
-                The last dimension corresponds to (x, y, visibility or score).
-            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
-                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
-                for full list of formats that the colors are accepted in.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        num_instances = 0
-        if boxes is not None:
-            boxes = self._convert_boxes(boxes)
-            num_instances = len(boxes)
-        if masks is not None:
-            masks = self._convert_masks(masks)
-            if num_instances:
-                assert len(masks) == num_instances
-            else:
-                num_instances = len(masks)
-        if keypoints is not None:
-            if num_instances:
-                assert len(keypoints) == num_instances
-            else:
-                num_instances = len(keypoints)
-            keypoints = self._convert_keypoints(keypoints)
-        if labels is not None:
-            assert len(labels) == num_instances
-        if assigned_colors is None:
-            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
-        if num_instances == 0:
-            return self.output
-        if boxes is not None and boxes.shape[1] == 5:
-            return self.overlay_rotated_instances(
-                boxes=boxes, labels=labels, assigned_colors=assigned_colors
-            )
-
-        # Display in largest to smallest order to reduce occlusion.
-        areas = None
-        if boxes is not None:
-            areas = np.prod(boxes[:, 2:] - boxes[:, :2], axis=1)
-        elif masks is not None:
-            areas = np.asarray([x.area() for x in masks])
-
-        if areas is not None:
-            sorted_idxs = np.argsort(-areas).tolist()
-            # Re-order overlapped instances in descending order.
-            boxes = boxes[sorted_idxs] if boxes is not None else None
-            labels = [labels[k] for k in sorted_idxs] if labels is not None else None
-            masks = [masks[idx] for idx in sorted_idxs] if masks is not None else None
-            assigned_colors = [assigned_colors[idx] for idx in sorted_idxs]
-            keypoints = keypoints[sorted_idxs] if keypoints is not None else None
-
-        for i in range(num_instances):
-            color = assigned_colors[i]
-            if boxes is not None:
-                self.draw_box(boxes[i], edge_color=color)
-
-            if masks is not None:
-                for segment in masks[i].polygons:
-                    self.draw_polygon(segment.reshape(-1, 2), color, alpha=alpha)
-
-            if labels is not None:
-                # first get a box
-                if boxes is not None:
-                    x0, y0, x1, y1 = boxes[i]
-                    text_pos = (x0, y0)  # if drawing boxes, put text on the box corner.
-                    horiz_align = "left"
-                elif masks is not None:
-                    # skip small mask without polygon
-                    if len(masks[i].polygons) == 0:
-                        continue
-
-                    x0, y0, x1, y1 = masks[i].bbox()
-
-                    # draw text in the center (defined by median) when box is not drawn
-                    # median is less sensitive to outliers.
-                    text_pos = np.median(masks[i].mask.nonzero(), axis=1)[::-1]
-                    horiz_align = "center"
-                else:
-                    continue  # drawing the box confidence for keypoints isn't very useful.
-                # for small objects, draw text at the side to avoid occlusion
-                instance_area = (y1 - y0) * (x1 - x0)
-                if (
-                    instance_area < _SMALL_OBJECT_AREA_THRESH * self.output.scale
-                    or y1 - y0 < 40 * self.output.scale
-                ):
-                    if y1 >= self.output.height - 5:
-                        text_pos = (x1, y0)
-                    else:
-                        text_pos = (x0, y1)
-
-                height_ratio = (y1 - y0) / np.sqrt(self.output.height * self.output.width)
-                lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-                font_size = (
-                    np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2)
-                    * 0.5
-                    * self._default_font_size
-                )
-                self.draw_text(
-                    labels[i],
-                    text_pos,
-                    color=lighter_color,
-                    horizontal_alignment=horiz_align,
-                    font_size=font_size,
-                )
-
-        # draw keypoints
-        if keypoints is not None:
-            for keypoints_per_instance in keypoints:
-                self.draw_and_connect_keypoints(keypoints_per_instance)
-
-        return self.output
-
-    def overlay_rotated_instances(self, boxes=None, labels=None, assigned_colors=None):
-        """
-        Args:
-            boxes (ndarray): an Nx5 numpy array of
-                (x_center, y_center, width, height, angle_degrees) format
-                for the N objects in a single image.
-            labels (list[str]): the text to be displayed for each instance.
-            assigned_colors (list[matplotlib.colors]): a list of colors, where each color
-                corresponds to each mask or box in the image. Refer to 'matplotlib.colors'
-                for full list of formats that the colors are accepted in.
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        num_instances = len(boxes)
-
-        if assigned_colors is None:
-            assigned_colors = [random_color(rgb=True, maximum=1) for _ in range(num_instances)]
-        if num_instances == 0:
-            return self.output
-
-        # Display in largest to smallest order to reduce occlusion.
-        if boxes is not None:
-            areas = boxes[:, 2] * boxes[:, 3]
-
-        sorted_idxs = np.argsort(-areas).tolist()
-        # Re-order overlapped instances in descending order.
-        boxes = boxes[sorted_idxs]
-        labels = [labels[k] for k in sorted_idxs] if labels is not None else None
-        colors = [assigned_colors[idx] for idx in sorted_idxs]
-
-        for i in range(num_instances):
-            self.draw_rotated_box_with_label(
-                boxes[i], edge_color=colors[i], label=labels[i] if labels is not None else None
-            )
-
-        return self.output
-
-    def draw_and_connect_keypoints(self, keypoints):
-        """
-        Draws keypoints of an instance and follows the rules for keypoint connections
-        to draw lines between appropriate keypoints. This follows color heuristics for
-        line color.
-
-        Args:
-            keypoints (Tensor): a tensor of shape (K, 3), where K is the number of keypoints
-                and the last dimension corresponds to (x, y, probability).
-
-        Returns:
-            output (VisImage): image object with visualizations.
-        """
-        visible = {}
-        keypoint_names = self.metadata.get("keypoint_names")
-        for idx, keypoint in enumerate(keypoints):
-            # draw keypoint
-            x, y, prob = keypoint
-            if prob > self.keypoint_threshold:
-                self.draw_circle((x, y), color=_RED)
-                if keypoint_names:
-                    keypoint_name = keypoint_names[idx]
-                    visible[keypoint_name] = (x, y)
-
-        if self.metadata.get("keypoint_connection_rules"):
-            for kp0, kp1, color in self.metadata.keypoint_connection_rules:
-                if kp0 in visible and kp1 in visible:
-                    x0, y0 = visible[kp0]
-                    x1, y1 = visible[kp1]
-                    color = tuple(x / 255.0 for x in color)
-                    self.draw_line([x0, x1], [y0, y1], color=color)
-
-        # draw lines from nose to mid-shoulder and mid-shoulder to mid-hip
-        # Note that this strategy is specific to person keypoints.
-        # For other keypoints, it should just do nothing
-        try:
-            ls_x, ls_y = visible["left_shoulder"]
-            rs_x, rs_y = visible["right_shoulder"]
-            mid_shoulder_x, mid_shoulder_y = (ls_x + rs_x) / 2, (ls_y + rs_y) / 2
-        except KeyError:
-            pass
-        else:
-            # draw line from nose to mid-shoulder
-            nose_x, nose_y = visible.get("nose", (None, None))
-            if nose_x is not None:
-                self.draw_line([nose_x, mid_shoulder_x], [nose_y, mid_shoulder_y], color=_RED)
-
-            try:
-                # draw line from mid-shoulder to mid-hip
-                lh_x, lh_y = visible["left_hip"]
-                rh_x, rh_y = visible["right_hip"]
-            except KeyError:
-                pass
-            else:
-                mid_hip_x, mid_hip_y = (lh_x + rh_x) / 2, (lh_y + rh_y) / 2
-                self.draw_line([mid_hip_x, mid_shoulder_x], [mid_hip_y, mid_shoulder_y], color=_RED)
-        return self.output
-
-    """
-    Primitive drawing functions:
-    """
-
-    def draw_text(
-        self,
-        text,
-        position,
-        *,
-        font_size=None,
-        color="g",
-        horizontal_alignment="center",
-        rotation=0,
-    ):
-        """
-        Args:
-            text (str): class label
-            position (tuple): a tuple of the x and y coordinates to place text on image.
-            font_size (int, optional): font of the text. If not provided, a font size
-                proportional to the image width is calculated and used.
-            color: color of the text. Refer to `matplotlib.colors` for full list
-                of formats that are accepted.
-            horizontal_alignment (str): see `matplotlib.text.Text`
-            rotation: rotation angle in degrees CCW
-
-        Returns:
-            output (VisImage): image object with text drawn.
-        """
-        if not font_size:
-            font_size = self._default_font_size
-
-        # since the text background is dark, we don't want the text to be dark
-        color = np.maximum(list(mplc.to_rgb(color)), 0.2)
-        color[np.argmax(color)] = max(0.8, np.max(color))
-
-        x, y = position
-        self.output.ax.text(
-            x,
-            y,
-            text,
-            size=font_size * self.output.scale,
-            family="sans-serif",
-            bbox={"facecolor": "black", "alpha": 0.8, "pad": 0.7, "edgecolor": "none"},
-            verticalalignment="top",
-            horizontalalignment=horizontal_alignment,
-            color=color,
-            zorder=10,
-            rotation=rotation,
-        )
-        return self.output
-
-    def draw_box(self, box_coord, alpha=0.5, edge_color="g", line_style="-"):
-        """
-        Args:
-            box_coord (tuple): a tuple containing x0, y0, x1, y1 coordinates, where x0 and y0
-                are the coordinates of the image's top left corner. x1 and y1 are the
-                coordinates of the image's bottom right corner.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
-                for full list of formats that are accepted.
-            line_style (string): the string to use to create the outline of the boxes.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        x0, y0, x1, y1 = box_coord
-        width = x1 - x0
-        height = y1 - y0
-
-        linewidth = max(self._default_font_size / 4, 1)
-
-        self.output.ax.add_patch(
-            mpl.patches.Rectangle(
-                (x0, y0),
-                width,
-                height,
-                fill=False,
-                edgecolor=edge_color,
-                linewidth=linewidth * self.output.scale,
-                alpha=alpha,
-                linestyle=line_style,
-            )
-        )
-        return self.output
-
-    def draw_rotated_box_with_label(
-        self, rotated_box, alpha=0.5, edge_color="g", line_style="-", label=None
-    ):
-        """
-        Draw a rotated box with label on its top-left corner.
-
-        Args:
-            rotated_box (tuple): a tuple containing (cnt_x, cnt_y, w, h, angle),
-                where cnt_x and cnt_y are the center coordinates of the box.
-                w and h are the width and height of the box. angle represents how
-                many degrees the box is rotated CCW with regard to the 0-degree box.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            edge_color: color of the outline of the box. Refer to `matplotlib.colors`
-                for full list of formats that are accepted.
-            line_style (string): the string to use to create the outline of the boxes.
-            label (string): label for rotated box. It will not be rendered when set to None.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        cnt_x, cnt_y, w, h, angle = rotated_box
-        area = w * h
-        # use thinner lines when the box is small
-        linewidth = self._default_font_size / (
-            6 if area < _SMALL_OBJECT_AREA_THRESH * self.output.scale else 3
-        )
-
-        theta = angle * math.pi / 180.0
-        c = math.cos(theta)
-        s = math.sin(theta)
-        rect = [(-w / 2, h / 2), (-w / 2, -h / 2), (w / 2, -h / 2), (w / 2, h / 2)]
-        # x: left->right ; y: top->down
-        rotated_rect = [(s * yy + c * xx + cnt_x, c * yy - s * xx + cnt_y) for (xx, yy) in rect]
-        for k in range(4):
-            j = (k + 1) % 4
-            self.draw_line(
-                [rotated_rect[k][0], rotated_rect[j][0]],
-                [rotated_rect[k][1], rotated_rect[j][1]],
-                color=edge_color,
-                linestyle="--" if k == 1 else line_style,
-                linewidth=linewidth,
-            )
-
-        if label is not None:
-            text_pos = rotated_rect[1]  # topleft corner
-
-            height_ratio = h / np.sqrt(self.output.height * self.output.width)
-            label_color = self._change_color_brightness(edge_color, brightness_factor=0.7)
-            font_size = (
-                np.clip((height_ratio - 0.02) / 0.08 + 1, 1.2, 2) * 0.5 * self._default_font_size
-            )
-            self.draw_text(label, text_pos, color=label_color, font_size=font_size, rotation=angle)
-
-        return self.output
-
-    def draw_circle(self, circle_coord, color, radius=3):
-        """
-        Args:
-            circle_coord (list(int) or tuple(int)): contains the x and y coordinates
-                of the center of the circle.
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            radius (int): radius of the circle.
-
-        Returns:
-            output (VisImage): image object with box drawn.
-        """
-        x, y = circle_coord
-        self.output.ax.add_patch(
-            mpl.patches.Circle(circle_coord, radius=radius, fill=True, color=color)
-        )
-        return self.output
-
-    def draw_line(self, x_data, y_data, color, linestyle="-", linewidth=None):
-        """
-        Args:
-            x_data (list[int]): a list containing x values of all the points being drawn.
-                Length of list should match the length of y_data.
-            y_data (list[int]): a list containing y values of all the points being drawn.
-                Length of list should match the length of x_data.
-            color: color of the line. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            linestyle: style of the line. Refer to `matplotlib.lines.Line2D`
-                for a full list of formats that are accepted.
-            linewidth (float or None): width of the line. When it's None,
-                a default value will be computed and used.
-
-        Returns:
-            output (VisImage): image object with line drawn.
-        """
-        if linewidth is None:
-            linewidth = self._default_font_size / 3
-        linewidth = max(linewidth, 1)
-        self.output.ax.add_line(
-            mpl.lines.Line2D(
-                x_data,
-                y_data,
-                linewidth=linewidth * self.output.scale,
-                color=color,
-                linestyle=linestyle,
-            )
-        )
-        return self.output
-
-    def draw_binary_mask(
-        self, binary_mask, color=None, *, edge_color=None, text=None, alpha=0.5, area_threshold=0
-    ):
-        """
-        Args:
-            binary_mask (ndarray): numpy array of shape (H, W), where H is the image height and
-                W is the image width. Each value in the array is either a 0 or 1 value of uint8
-                type.
-            color: color of the mask. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted. If None, will pick a random color.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted.
-            text (str): if None, will be drawn in the object's center of mass.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-            area_threshold (float): a connected component small than this will not be shown.
-
-        Returns:
-            output (VisImage): image object with mask drawn.
-        """
-        if color is None:
-            color = random_color(rgb=True, maximum=1)
-        color = mplc.to_rgb(color)
-
-        has_valid_segment = False
-        binary_mask = binary_mask.astype("uint8")  # opencv needs uint8
-        mask = GenericMask(binary_mask, self.output.height, self.output.width)
-        shape2d = (binary_mask.shape[0], binary_mask.shape[1])
-
-        if not mask.has_holes:
-            # draw polygons for regular masks
-            for segment in mask.polygons:
-                area = mask_util.area(mask_util.frPyObjects([segment], shape2d[0], shape2d[1]))
-                if area < (area_threshold or 0):
-                    continue
-                has_valid_segment = True
-                segment = segment.reshape(-1, 2)
-                self.draw_polygon(segment, color=color, edge_color=edge_color, alpha=alpha)
-        else:
-            # TODO: Use Path/PathPatch to draw vector graphics:
-            # https://stackoverflow.com/questions/8919719/how-to-plot-a-complex-polygon
-            rgba = np.zeros(shape2d + (4,), dtype="float32")
-            rgba[:, :, :3] = color
-            rgba[:, :, 3] = (mask.mask == 1).astype("float32") * alpha
-            has_valid_segment = True
-            self.output.ax.imshow(rgba, extent=(0, self.output.width, self.output.height, 0))
-
-        if text is not None and has_valid_segment:
-            # TODO sometimes drawn on wrong objects. the heuristics here can improve.
-            lighter_color = self._change_color_brightness(color, brightness_factor=0.7)
-            _num_cc, cc_labels, stats, centroids = cv2.connectedComponentsWithStats(binary_mask, 8)
-            largest_component_id = np.argmax(stats[1:, -1]) + 1
-
-            # draw text on the largest component, as well as other very large components.
-            for cid in range(1, _num_cc):
-                if cid == largest_component_id or stats[cid, -1] > _LARGE_MASK_AREA_THRESH:
-                    # median is more stable than centroid
-                    # center = centroids[largest_component_id]
-                    center = np.median((cc_labels == cid).nonzero(), axis=1)[::-1]
-                    self.draw_text(text, center, color=lighter_color)
-        return self.output
-
-    def draw_polygon(self, segment, color, edge_color=None, alpha=0.5):
-        """
-        Args:
-            segment: numpy array of shape Nx2, containing all the points in the polygon.
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            edge_color: color of the polygon edges. Refer to `matplotlib.colors` for a
-                full list of formats that are accepted. If not provided, a darker shade
-                of the polygon color will be used instead.
-            alpha (float): blending efficient. Smaller values lead to more transparent masks.
-
-        Returns:
-            output (VisImage): image object with polygon drawn.
-        """
-        if edge_color is None:
-            # make edge color darker than the polygon color
-            if alpha > 0.8:
-                edge_color = self._change_color_brightness(color, brightness_factor=-0.7)
-            else:
-                edge_color = color
-        edge_color = mplc.to_rgb(edge_color) + (1,)
-
-        polygon = mpl.patches.Polygon(
-            segment,
-            fill=True,
-            facecolor=mplc.to_rgb(color) + (alpha,),
-            edgecolor=edge_color,
-            linewidth=max(self._default_font_size // 15 * self.output.scale, 1),
-        )
-        self.output.ax.add_patch(polygon)
-        return self.output
-
-    """
-    Internal methods:
-    """
-
-    def _jitter(self, color):
-        """
-        Randomly modifies given color to produce a slightly different color than the color given.
-
-        Args:
-            color (tuple[double]): a tuple of 3 elements, containing the RGB values of the color
-                picked. The values in the list are in the [0.0, 1.0] range.
-
-        Returns:
-            jittered_color (tuple[double]): a tuple of 3 elements, containing the RGB values of the
-                color after being jittered. The values in the list are in the [0.0, 1.0] range.
-        """
-        color = mplc.to_rgb(color)
-        vec = np.random.rand(3)
-        # better to do it in another color space
-        vec = vec / np.linalg.norm(vec) * 0.5
-        res = np.clip(vec + color, 0, 1)
-        return tuple(res)
-
-    def _create_grayscale_image(self, mask=None):
-        """
-        Create a grayscale version of the original image.
-        The colors in masked area, if given, will be kept.
-        """
-        img_bw = self.img.astype("f4").mean(axis=2)
-        img_bw = np.stack([img_bw] * 3, axis=2)
-        if mask is not None:
-            img_bw[mask] = self.img[mask]
-        return img_bw
-
-    def _change_color_brightness(self, color, brightness_factor):
-        """
-        Depending on the brightness_factor, gives a lighter or darker color i.e. a color with
-        less or more saturation than the original color.
-
-        Args:
-            color: color of the polygon. Refer to `matplotlib.colors` for a full list of
-                formats that are accepted.
-            brightness_factor (float): a value in [-1.0, 1.0] range. A lightness factor of
-                0 will correspond to no change, a factor in [-1.0, 0) range will result in
-                a darker color and a factor in (0, 1.0] range will result in a lighter color.
-
-        Returns:
-            modified_color (tuple[double]): a tuple containing the RGB values of the
-                modified color. Each value in the tuple is in the [0.0, 1.0] range.
-        """
-        assert brightness_factor >= -1.0 and brightness_factor <= 1.0
-        color = mplc.to_rgb(color)
-        polygon_color = colorsys.rgb_to_hls(*mplc.to_rgb(color))
-        modified_lightness = polygon_color[1] + (brightness_factor * polygon_color[1])
-        modified_lightness = 0.0 if modified_lightness < 0.0 else modified_lightness
-        modified_lightness = 1.0 if modified_lightness > 1.0 else modified_lightness
-        modified_color = colorsys.hls_to_rgb(polygon_color[0], modified_lightness, polygon_color[2])
-        return modified_color
-
-    def _convert_boxes(self, boxes):
-        """
-        Convert different format of boxes to an NxB array, where B = 4 or 5 is the box dimension.
-        """
-        if isinstance(boxes, Boxes) or isinstance(boxes, RotatedBoxes):
-            return boxes.tensor.detach().numpy()
-        else:
-            return np.asarray(boxes)
-
-    def _convert_masks(self, masks_or_polygons):
-        """
-        Convert different format of masks or polygons to a tuple of masks and polygons.
-
-        Returns:
-            list[GenericMask]:
-        """
-
-        m = masks_or_polygons
-        if isinstance(m, PolygonMasks):
-            m = m.polygons
-        if isinstance(m, BitMasks):
-            m = m.tensor.numpy()
-        if isinstance(m, torch.Tensor):
-            m = m.numpy()
-        ret = []
-        for x in m:
-            if isinstance(x, GenericMask):
-                ret.append(x)
-            else:
-                ret.append(GenericMask(x, self.output.height, self.output.width))
-        return ret
-
-    def _convert_keypoints(self, keypoints):
-        if isinstance(keypoints, Keypoints):
-            keypoints = keypoints.tensor
-        keypoints = np.asarray(keypoints)
-        return keypoints
-
-    def get_output(self):
-        """
-        Returns:
-            output (VisImage): the image output containing the visualizations added
-            to the image.
-        """
-        return self.output
diff --git a/magic_pdf/model/sub_modules/mfd/__init__.py b/magic_pdf/model/sub_modules/mfd/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py b/magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py
deleted file mode 100644
index 23d230d00415997d71c14daf136779d32a02ee6b..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfd/yolov8/YOLOv8.py
+++ /dev/null
@@ -1,33 +0,0 @@
-from tqdm import tqdm
-from ultralytics import YOLO
-
-
-class YOLOv8MFDModel(object):
-    def __init__(self, weight, device="cpu"):
-        self.mfd_model = YOLO(weight)
-        self.device = device
-
-    def predict(self, image):
-        mfd_res = self.mfd_model.predict(
-            image, imgsz=1888, conf=0.25, iou=0.45, verbose=False, device=self.device
-        )[0]
-        return mfd_res
-
-    def batch_predict(self, images: list, batch_size: int) -> list:
-        images_mfd_res = []
-        # for index in range(0, len(images), batch_size):
-        for index in tqdm(range(0, len(images), batch_size), desc="MFD Predict"):
-            mfd_res = [
-                image_res.cpu()
-                for image_res in self.mfd_model.predict(
-                    images[index : index + batch_size],
-                    imgsz=1888,
-                    conf=0.25,
-                    iou=0.45,
-                    verbose=False,
-                    device=self.device,
-                )
-            ]
-            for image_res in mfd_res:
-                images_mfd_res.append(image_res)
-        return images_mfd_res
diff --git a/magic_pdf/model/sub_modules/mfd/yolov8/__init__.py b/magic_pdf/model/sub_modules/mfd/yolov8/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/mfr/__init__.py b/magic_pdf/model/sub_modules/mfr/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py b/magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py
deleted file mode 100644
index 6c3a1e1d8a58d5e0f4e178875803df85c123e0d0..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/Unimernet.py
+++ /dev/null
@@ -1,135 +0,0 @@
-import torch
-from torch.utils.data import DataLoader, Dataset
-from tqdm import tqdm
-
-
-class MathDataset(Dataset):
-    def __init__(self, image_paths, transform=None):
-        self.image_paths = image_paths
-        self.transform = transform
-
-    def __len__(self):
-        return len(self.image_paths)
-
-    def __getitem__(self, idx):
-        raw_image = self.image_paths[idx]
-        if self.transform:
-            image = self.transform(raw_image)
-            return image
-
-
-class UnimernetModel(object):
-    def __init__(self, weight_dir, cfg_path, _device_="cpu"):
-        from .unimernet_hf import UnimernetModel
-        if _device_.startswith("mps"):
-            self.model = UnimernetModel.from_pretrained(weight_dir, attn_implementation="eager")
-        else:
-            self.model = UnimernetModel.from_pretrained(weight_dir)
-        self.device = _device_
-        self.model.to(_device_)
-        if not _device_.startswith("cpu"):
-            self.model = self.model.to(dtype=torch.float16)
-        self.model.eval()
-
-    def predict(self, mfd_res, image):
-        formula_list = []
-        mf_image_list = []
-        for xyxy, conf, cla in zip(
-            mfd_res.boxes.xyxy.cpu(), mfd_res.boxes.conf.cpu(), mfd_res.boxes.cls.cpu()
-        ):
-            xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
-            new_item = {
-                "category_id": 13 + int(cla.item()),
-                "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-                "score": round(float(conf.item()), 2),
-                "latex": "",
-            }
-            formula_list.append(new_item)
-            bbox_img = image[ymin:ymax, xmin:xmax]
-            mf_image_list.append(bbox_img)
-
-        dataset = MathDataset(mf_image_list, transform=self.model.transform)
-        dataloader = DataLoader(dataset, batch_size=32, num_workers=0)
-        mfr_res = []
-        for mf_img in dataloader:
-            mf_img = mf_img.to(dtype=self.model.dtype)
-            mf_img = mf_img.to(self.device)
-            with torch.no_grad():
-                output = self.model.generate({"image": mf_img})
-            mfr_res.extend(output["fixed_str"])
-        for res, latex in zip(formula_list, mfr_res):
-            res["latex"] = latex
-        return formula_list
-
-    def batch_predict(self, images_mfd_res: list, images: list, batch_size: int = 64) -> list:
-        images_formula_list = []
-        mf_image_list = []
-        backfill_list = []
-        image_info = []  # Store (area, original_index, image) tuples
-
-        # Collect images with their original indices
-        for image_index in range(len(images_mfd_res)):
-            mfd_res = images_mfd_res[image_index]
-            np_array_image = images[image_index]
-            formula_list = []
-
-            for idx, (xyxy, conf, cla) in enumerate(zip(
-                    mfd_res.boxes.xyxy, mfd_res.boxes.conf, mfd_res.boxes.cls
-            )):
-                xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
-                new_item = {
-                    "category_id": 13 + int(cla.item()),
-                    "poly": [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
-                    "score": round(float(conf.item()), 2),
-                    "latex": "",
-                }
-                formula_list.append(new_item)
-                bbox_img = np_array_image[ymin:ymax, xmin:xmax]
-                area = (xmax - xmin) * (ymax - ymin)
-
-                curr_idx = len(mf_image_list)
-                image_info.append((area, curr_idx, bbox_img))
-                mf_image_list.append(bbox_img)
-
-            images_formula_list.append(formula_list)
-            backfill_list += formula_list
-
-        # Stable sort by area
-        image_info.sort(key=lambda x: x[0])  # sort by area
-        sorted_indices = [x[1] for x in image_info]
-        sorted_images = [x[2] for x in image_info]
-
-        # Create mapping for results
-        index_mapping = {new_idx: old_idx for new_idx, old_idx in enumerate(sorted_indices)}
-
-        # Create dataset with sorted images
-        dataset = MathDataset(sorted_images, transform=self.model.transform)
-        dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=0)
-
-        # Process batches and store results
-        mfr_res = []
-        # for mf_img in dataloader:
-
-        with tqdm(total=len(sorted_images), desc="MFR Predict") as pbar:
-            for index, mf_img in enumerate(dataloader):
-                mf_img = mf_img.to(dtype=self.model.dtype)
-                mf_img = mf_img.to(self.device)
-                with torch.no_grad():
-                    output = self.model.generate({"image": mf_img})
-                mfr_res.extend(output["fixed_str"])
-
-                # 更新进度条，每次增加batch_size，但要注意最后一个batch可能不足batch_size
-                current_batch_size = min(batch_size, len(sorted_images) - index * batch_size)
-                pbar.update(current_batch_size)
-
-        # Restore original order
-        unsorted_results = [""] * len(mfr_res)
-        for new_idx, latex in enumerate(mfr_res):
-            original_idx = index_mapping[new_idx]
-            unsorted_results[original_idx] = latex
-
-        # Fill results back
-        for res, latex in zip(backfill_list, unsorted_results):
-            res["latex"] = latex
-
-        return images_formula_list
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/__init__.py b/magic_pdf/model/sub_modules/mfr/unimernet/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py
deleted file mode 100644
index 772dcfa32813a2f7befe217ee5addd3e4e6ee28a..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/__init__.py
+++ /dev/null
@@ -1,13 +0,0 @@
-from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor
-from .unimer_mbart import UnimerMBartConfig, UnimerMBartModel, UnimerMBartForCausalLM
-from .modeling_unimernet import UnimernetModel
-
-__all__ = [
-    "UnimerSwinConfig",
-    "UnimerSwinModel",
-    "UnimerSwinImageProcessor",
-    "UnimerMBartConfig",
-    "UnimerMBartModel",
-    "UnimerMBartForCausalLM",
-    "UnimernetModel",
-]
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py
deleted file mode 100644
index a4a9bbb931b5dc12786babe3731d00586879de46..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/modeling_unimernet.py
+++ /dev/null
@@ -1,490 +0,0 @@
-import os
-import re
-import warnings
-from typing import Optional
-
-import torch
-from ftfy import fix_text
-from loguru import logger
-
-from transformers import AutoConfig, AutoModel, AutoModelForCausalLM, AutoTokenizer, PretrainedConfig, PreTrainedModel
-from transformers import VisionEncoderDecoderConfig, VisionEncoderDecoderModel
-from transformers.models.vision_encoder_decoder.modeling_vision_encoder_decoder import logger as base_model_logger
-
-from .unimer_swin import UnimerSwinConfig, UnimerSwinModel, UnimerSwinImageProcessor
-from .unimer_mbart import UnimerMBartConfig, UnimerMBartForCausalLM
-
-AutoConfig.register(UnimerSwinConfig.model_type, UnimerSwinConfig)
-AutoConfig.register(UnimerMBartConfig.model_type, UnimerMBartConfig)
-AutoModel.register(UnimerSwinConfig, UnimerSwinModel)
-AutoModelForCausalLM.register(UnimerMBartConfig, UnimerMBartForCausalLM)
-
-
-# TODO: rewrite tokenizer
-class TokenizerWrapper:
-    def __init__(self, tokenizer):
-        self.tokenizer = tokenizer
-        self.pad_token_id = self.tokenizer.pad_token_id
-        self.bos_token_id = self.tokenizer.bos_token_id
-        self.eos_token_id = self.tokenizer.eos_token_id
-
-    def __len__(self):
-        return len(self.tokenizer)
-
-    def tokenize(self, text, **kwargs):
-        return self.tokenizer(
-            text,
-            return_token_type_ids=False,
-            return_tensors="pt",
-            padding="longest",
-            truncation=True,
-            **kwargs,
-        )
-
-    def token2str(self, tokens) -> list:
-        generated_text = self.tokenizer.batch_decode(tokens, skip_special_tokens=True)
-        generated_text = [fix_text(text) for text in generated_text]
-        return generated_text
-
-    def detokenize(self, tokens):
-        toks = [self.tokenizer.convert_ids_to_tokens(tok) for tok in tokens]
-        for b in range(len(toks)):
-            for i in reversed(range(len(toks[b]))):
-                if toks[b][i] is None:
-                    toks[b][i] = ''
-                toks[b][i] = toks[b][i].replace('Ġ', ' ').strip()
-                if toks[b][i] in ([self.tokenizer.bos_token, self.tokenizer.eos_token, self.tokenizer.pad_token]):
-                    del toks[b][i]
-        return toks
-
-
-LEFT_PATTERN = re.compile(r'(\\left)(\S*)')
-RIGHT_PATTERN = re.compile(r'(\\right)(\S*)')
-LEFT_COUNT_PATTERN = re.compile(r'\\left(?![a-zA-Z])')
-RIGHT_COUNT_PATTERN = re.compile(r'\\right(?![a-zA-Z])')
-LEFT_RIGHT_REMOVE_PATTERN = re.compile(r'\\left\.?|\\right\.?')
-
-def fix_latex_left_right(s):
-    """
-    修复LaTeX中的\\left和\\right命令
-    1. 确保它们后面跟有效分隔符
-    2. 平衡\\left和\\right的数量
-    """
-    # 白名单分隔符
-    valid_delims_list = [r'(', r')', r'[', r']', r'{', r'}', r'/', r'|',
-                         r'\{', r'\}', r'\lceil', r'\rceil', r'\lfloor',
-                         r'\rfloor', r'\backslash', r'\uparrow', r'\downarrow',
-                         r'\Uparrow', r'\Downarrow', r'\|', r'\.']
-
-    # 为\left后缺失有效分隔符的情况添加点
-    def fix_delim(match, is_left=True):
-        cmd = match.group(1)  # \left 或 \right
-        rest = match.group(2) if len(match.groups()) > 1 else ""
-        if not rest or rest not in valid_delims_list:
-            return cmd + "."
-        return match.group(0)
-
-    # 使用更精确的模式匹配\left和\right命令
-    # 确保它们是独立的命令，不是其他命令的一部分
-    # 使用预编译正则和统一回调函数
-    s = LEFT_PATTERN.sub(lambda m: fix_delim(m, True), s)
-    s = RIGHT_PATTERN.sub(lambda m: fix_delim(m, False), s)
-
-    # 更精确地计算\left和\right的数量
-    left_count = len(LEFT_COUNT_PATTERN.findall(s))  # 不匹配\lefteqn等
-    right_count = len(RIGHT_COUNT_PATTERN.findall(s))  # 不匹配\rightarrow等
-
-    if left_count == right_count:
-        # 如果数量相等，检查是否在同一组
-        return fix_left_right_pairs(s)
-    else:
-        # 如果数量不等，移除所有\left和\right
-        # logger.debug(f"latex:{s}")
-        # logger.warning(f"left_count: {left_count}, right_count: {right_count}")
-        return LEFT_RIGHT_REMOVE_PATTERN.sub('', s)
-
-
-def fix_left_right_pairs(latex_formula):
-    """
-    检测并修复LaTeX公式中\\left和\\right不在同一组的情况
-
-    Args:
-        latex_formula (str): 输入的LaTeX公式
-
-    Returns:
-        str: 修复后的LaTeX公式
-    """
-    # 用于跟踪花括号嵌套层级
-    brace_stack = []
-    # 用于存储\left信息: (位置, 深度, 分隔符)
-    left_stack = []
-    # 存储需要调整的\right信息: (开始位置, 结束位置, 目标位置)
-    adjustments = []
-
-    i = 0
-    while i < len(latex_formula):
-        # 检查是否是转义字符
-        if i > 0 and latex_formula[i - 1] == '\\':
-            backslash_count = 0
-            j = i - 1
-            while j >= 0 and latex_formula[j] == '\\':
-                backslash_count += 1
-                j -= 1
-
-            if backslash_count % 2 == 1:
-                i += 1
-                continue
-
-        # 检测\left命令
-        if i + 5 < len(latex_formula) and latex_formula[i:i + 5] == "\\left" and i + 5 < len(latex_formula):
-            delimiter = latex_formula[i + 5]
-            left_stack.append((i, len(brace_stack), delimiter))
-            i += 6  # 跳过\left和分隔符
-            continue
-
-        # 检测\right命令
-        elif i + 6 < len(latex_formula) and latex_formula[i:i + 6] == "\\right" and i + 6 < len(latex_formula):
-            delimiter = latex_formula[i + 6]
-
-            if left_stack:
-                left_pos, left_depth, left_delim = left_stack.pop()
-
-                # 如果\left和\right不在同一花括号深度
-                if left_depth != len(brace_stack):
-                    # 找到\left所在花括号组的结束位置
-                    target_pos = find_group_end(latex_formula, left_pos, left_depth)
-                    if target_pos != -1:
-                        # 记录需要移动的\right
-                        adjustments.append((i, i + 7, target_pos))
-
-            i += 7  # 跳过\right和分隔符
-            continue
-
-        # 处理花括号
-        if latex_formula[i] == '{':
-            brace_stack.append(i)
-        elif latex_formula[i] == '}':
-            if brace_stack:
-                brace_stack.pop()
-
-        i += 1
-
-    # 应用调整，从后向前处理以避免索引变化
-    if not adjustments:
-        return latex_formula
-
-    result = list(latex_formula)
-    adjustments.sort(reverse=True, key=lambda x: x[0])
-
-    for start, end, target in adjustments:
-        # 提取\right部分
-        right_part = result[start:end]
-        # 从原位置删除
-        del result[start:end]
-        # 在目标位置插入
-        result.insert(target, ''.join(right_part))
-
-    return ''.join(result)
-
-
-def find_group_end(text, pos, depth):
-    """查找特定深度的花括号组的结束位置"""
-    current_depth = depth
-    i = pos
-
-    while i < len(text):
-        if text[i] == '{' and (i == 0 or not is_escaped(text, i)):
-            current_depth += 1
-        elif text[i] == '}' and (i == 0 or not is_escaped(text, i)):
-            current_depth -= 1
-            if current_depth < depth:
-                return i
-        i += 1
-
-    return -1  # 未找到对应结束位置
-
-
-def is_escaped(text, pos):
-    """检查字符是否被转义"""
-    backslash_count = 0
-    j = pos - 1
-    while j >= 0 and text[j] == '\\':
-        backslash_count += 1
-        j -= 1
-
-    return backslash_count % 2 == 1
-
-
-def fix_unbalanced_braces(latex_formula):
-    """
-    检测LaTeX公式中的花括号是否闭合，并删除无法配对的花括号
-
-    Args:
-        latex_formula (str): 输入的LaTeX公式
-
-    Returns:
-        str: 删除无法配对的花括号后的LaTeX公式
-    """
-    stack = []  # 存储左括号的索引
-    unmatched = set()  # 存储不匹配括号的索引
-    i = 0
-
-    while i < len(latex_formula):
-        # 检查是否是转义的花括号
-        if latex_formula[i] in ['{', '}']:
-            # 计算前面连续的反斜杠数量
-            backslash_count = 0
-            j = i - 1
-            while j >= 0 and latex_formula[j] == '\\':
-                backslash_count += 1
-                j -= 1
-
-            # 如果前面有奇数个反斜杠，则该花括号是转义的，不参与匹配
-            if backslash_count % 2 == 1:
-                i += 1
-                continue
-
-            # 否则，该花括号参与匹配
-            if latex_formula[i] == '{':
-                stack.append(i)
-            else:  # latex_formula[i] == '}'
-                if stack:  # 有对应的左括号
-                    stack.pop()
-                else:  # 没有对应的左括号
-                    unmatched.add(i)
-
-        i += 1
-
-    # 所有未匹配的左括号
-    unmatched.update(stack)
-
-    # 构建新字符串，删除不匹配的括号
-    return ''.join(char for i, char in enumerate(latex_formula) if i not in unmatched)
-
-
-def process_latex(input_string):
-    """
-        处理LaTeX公式中的反斜杠：
-        1. 如果\后跟特殊字符(#$%&~_^\\{})或空格，保持不变
-        2. 如果\后跟两个小写字母，保持不变
-        3. 其他情况，在\后添加空格
-
-        Args:
-            input_string (str): 输入的LaTeX公式
-
-        Returns:
-            str: 处理后的LaTeX公式
-        """
-
-    def replace_func(match):
-        # 获取\后面的字符
-        next_char = match.group(1)
-
-        # 如果是特殊字符或空格，保持不变
-        if next_char in "#$%&~_^|\\{} \t\n\r\v\f":
-            return match.group(0)
-
-        # 如果是字母，检查下一个字符
-        if 'a' <= next_char <= 'z' or 'A' <= next_char <= 'Z':
-            pos = match.start() + 2  # \x后的位置
-            if pos < len(input_string) and ('a' <= input_string[pos] <= 'z' or 'A' <= input_string[pos] <= 'Z'):
-                # 下一个字符也是字母，保持不变
-                return match.group(0)
-
-        # 其他情况，在\后添加空格
-        return '\\' + ' ' + next_char
-
-    # 匹配\后面跟一个字符的情况
-    pattern = r'\\(.)'
-
-    return re.sub(pattern, replace_func, input_string)
-
-# 常见的在KaTeX/MathJax中可用的数学环境
-ENV_TYPES = ['array', 'matrix', 'pmatrix', 'bmatrix', 'vmatrix',
-             'Bmatrix', 'Vmatrix', 'cases', 'aligned', 'gathered']
-ENV_BEGIN_PATTERNS = {env: re.compile(r'\\begin\{' + env + r'\}') for env in ENV_TYPES}
-ENV_END_PATTERNS = {env: re.compile(r'\\end\{' + env + r'\}') for env in ENV_TYPES}
-ENV_FORMAT_PATTERNS = {env: re.compile(r'\\begin\{' + env + r'\}\{([^}]*)\}') for env in ENV_TYPES}
-
-def fix_latex_environments(s):
-    """
-    检测LaTeX中环境（如array）的\\begin和\\end是否匹配
-    1. 如果缺少\\begin标签则在开头添加
-    2. 如果缺少\\end标签则在末尾添加
-    """
-    for env in ENV_TYPES:
-        begin_count = len(ENV_BEGIN_PATTERNS[env].findall(s))
-        end_count = len(ENV_END_PATTERNS[env].findall(s))
-
-        if begin_count != end_count:
-            if end_count > begin_count:
-                format_match = ENV_FORMAT_PATTERNS[env].search(s)
-                default_format = '{c}' if env == 'array' else ''
-                format_str = '{' + format_match.group(1) + '}' if format_match else default_format
-
-                missing_count = end_count - begin_count
-                begin_command = '\\begin{' + env + '}' + format_str + ' '
-                s = begin_command * missing_count + s
-            else:
-                missing_count = begin_count - end_count
-                s = s + (' \\end{' + env + '}') * missing_count
-
-    return s
-
-
-UP_PATTERN = re.compile(r'\\up([a-zA-Z]+)')
-COMMANDS_TO_REMOVE_PATTERN = re.compile(
-    r'\\(?:lefteqn|boldmath|ensuremath|centering|textsubscript|sides|textsl|textcent|emph|protect|null)')
-REPLACEMENTS_PATTERNS = {
-    re.compile(r'\\underbar'): r'\\underline',
-    re.compile(r'\\Bar'): r'\\hat',
-    re.compile(r'\\Hat'): r'\\hat',
-    re.compile(r'\\Tilde'): r'\\tilde',
-    re.compile(r'\\slash'): r'/',
-    re.compile(r'\\textperthousand'): r'‰',
-    re.compile(r'\\sun'): r'☉',
-    re.compile(r'\\textunderscore'): r'\\_',
-    re.compile(r'\\fint'): r'⨏',
-    re.compile(r'\\up '): r'\\ ',
-    re.compile(r'\\vline = '): r'\\models ',
-    re.compile(r'\\vDash '): r'\\models ',
-    re.compile(r'\\sq \\sqcup '): r'\\square ',
-}
-QQUAD_PATTERN = re.compile(r'\\qquad(?!\s)')
-
-def latex_rm_whitespace(s: str):
-    """Remove unnecessary whitespace from LaTeX code."""
-    s = fix_unbalanced_braces(s)
-    s = fix_latex_left_right(s)
-    s = fix_latex_environments(s)
-
-    # 使用预编译的正则表达式
-    s = UP_PATTERN.sub(
-        lambda m: m.group(0) if m.group(1) in ["arrow", "downarrow", "lus", "silon"] else f"\\{m.group(1)}", s
-    )
-    s = COMMANDS_TO_REMOVE_PATTERN.sub('', s)
-
-    # 应用所有替换
-    for pattern, replacement in REPLACEMENTS_PATTERNS.items():
-        s = pattern.sub(replacement, s)
-
-    # 处理LaTeX中的反斜杠和空格
-    s = process_latex(s)
-
-    # \qquad后补空格
-    s = QQUAD_PATTERN.sub(r'\\qquad ', s)
-
-    return s
-
-
-class UnimernetModel(VisionEncoderDecoderModel):
-    def __init__(
-        self,
-        config: Optional[PretrainedConfig] = None,
-        encoder: Optional[PreTrainedModel] = None,
-        decoder: Optional[PreTrainedModel] = None,
-    ):
-        # VisionEncoderDecoderModel's checking log has bug, disable for temp.
-        base_model_logger.disabled = True
-        try:
-            super().__init__(config, encoder, decoder)
-        finally:
-            base_model_logger.disabled = False
-
-        if not config or not hasattr(config, "_name_or_path"):
-            raise RuntimeError("config._name_or_path is required by UnimernetModel.")
-
-        model_path = config._name_or_path
-        self.transform = UnimerSwinImageProcessor()
-        self.tokenizer = TokenizerWrapper(AutoTokenizer.from_pretrained(model_path))
-        self._post_check()
-    
-    def _post_check(self):
-        tokenizer = self.tokenizer
-
-        if tokenizer.tokenizer.model_max_length != self.config.decoder.max_position_embeddings:
-            warnings.warn(
-                f"decoder.max_position_embeddings={self.config.decoder.max_position_embeddings}," +
-                f" but tokenizer.model_max_length={tokenizer.tokenizer.model_max_length}, will set" +
-                f" tokenizer.model_max_length to {self.config.decoder.max_position_embeddings}.")
-            tokenizer.tokenizer.model_max_length = self.config.decoder.max_position_embeddings
-
-        assert self.config.decoder.vocab_size == len(tokenizer)
-        assert self.config.decoder_start_token_id == tokenizer.bos_token_id
-        assert self.config.pad_token_id == tokenizer.pad_token_id
-
-    @classmethod
-    def from_checkpoint(cls, model_path: str, model_filename: str = "pytorch_model.pth", state_dict_strip_prefix="model.model."):
-        config = VisionEncoderDecoderConfig.from_pretrained(model_path)
-        config._name_or_path = model_path
-        config.encoder = UnimerSwinConfig(**vars(config.encoder))
-        config.decoder = UnimerMBartConfig(**vars(config.decoder))
-
-        encoder = UnimerSwinModel(config.encoder)
-        decoder = UnimerMBartForCausalLM(config.decoder)
-        model = cls(config, encoder, decoder)
-
-        # load model weights
-        model_file_path = os.path.join(model_path, model_filename)
-        checkpoint = torch.load(model_file_path, map_location="cpu", weights_only=True)
-        state_dict = checkpoint["model"] if "model" in checkpoint else checkpoint
-        if not state_dict:
-            raise RuntimeError("state_dict is empty.")
-        if state_dict_strip_prefix:
-            state_dict = {
-                k[len(state_dict_strip_prefix):] if k.startswith(state_dict_strip_prefix) else k: v
-                for k, v in state_dict.items()
-            }
-        missing_keys, unexpected_keys = model.load_state_dict(state_dict, strict=False)
-        if len(unexpected_keys) > 0:
-            warnings.warn("Unexpected key(s) in state_dict: {}.".format(", ".join(f'"{k}"' for k in unexpected_keys)))
-        if len(missing_keys) > 0:
-            raise RuntimeError("Missing key(s) in state_dict: {}.".format(", ".join(f'"{k}"' for k in missing_keys)))
-        return model
-
-    def forward_bak(self, samples):
-        pixel_values, text = samples["image"], samples["text_input"]
-
-        text_inputs = self.tokenizer.tokenize(text).to(pixel_values.device)
-        decoder_input_ids, decoder_attention_mask = text_inputs["input_ids"], text_inputs["attention_mask"]
-
-        num_channels = pixel_values.shape[1]
-        if num_channels == 1:
-            pixel_values = pixel_values.repeat(1, 3, 1, 1)
-
-        labels = decoder_input_ids * 1
-        labels = labels.masked_fill(labels == self.tokenizer.pad_token_id, -100)
-
-        loss = self.model(
-            pixel_values=pixel_values,
-            decoder_input_ids=decoder_input_ids[:, :-1],
-            decoder_attention_mask=decoder_attention_mask[:, :-1],
-            labels=labels[:, 1:],
-        ).loss
-        return {"loss": loss}
-
-    def generate(self, samples, do_sample: bool = False, temperature: float = 0.2, top_p: float = 0.95):
-        pixel_values = samples["image"]
-        num_channels = pixel_values.shape[1]
-        if num_channels == 1:
-            pixel_values = pixel_values.repeat(1, 3, 1, 1)
-        
-        kwargs = {}
-        if do_sample:
-            kwargs["temperature"] = temperature
-            kwargs["top_p"] = top_p
-        
-        outputs = super().generate(
-            pixel_values=pixel_values,
-            max_new_tokens=self.tokenizer.tokenizer.model_max_length, # required
-            decoder_start_token_id=self.tokenizer.tokenizer.bos_token_id,
-            do_sample=do_sample,
-            **kwargs,
-        )
-
-        outputs = outputs[:, 1:].cpu().numpy()
-        pred_tokens = self.tokenizer.detokenize(outputs)
-        pred_str = self.tokenizer.token2str(outputs)
-        fixed_str = [latex_rm_whitespace(s) for s in pred_str]
-        return {"pred_ids": outputs, "pred_tokens": pred_tokens, "pred_str": pred_str, "fixed_str": fixed_str}
-
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py
deleted file mode 100644
index 155a786bf087fad4c9707dd6a38d8f3a252937b3..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from .configuration_unimer_mbart import UnimerMBartConfig
-from .modeling_unimer_mbart import UnimerMBartModel, UnimerMBartForCausalLM
-
-__all__ = [
-    "UnimerMBartConfig",
-    "UnimerMBartModel",
-    "UnimerMBartForCausalLM",
-]
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py
deleted file mode 100644
index eef4a57d069104e0f45b93c02454e82e41d084c4..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/configuration_unimer_mbart.py
+++ /dev/null
@@ -1,163 +0,0 @@
-# coding=utf-8
-# Copyright 2021, The Facebook AI Research Team and The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""UnimerMBART model configuration"""
-
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-
-class UnimerMBartConfig(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a [`MBartModel`]. It is used to instantiate an MBART
-    model according to the specified arguments, defining the model architecture. Instantiating a configuration with the
-    defaults will yield a similar configuration to that of the MBART
-    [facebook/mbart-large-cc25](https://huggingface.co/facebook/mbart-large-cc25) architecture.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-
-
-    Args:
-        vocab_size (`int`, *optional*, defaults to 50265):
-            Vocabulary size of the MBART model. Defines the number of different tokens that can be represented by the
-            `inputs_ids` passed when calling [`MBartModel`] or [`TFMBartModel`].
-        d_model (`int`, *optional*, defaults to 1024):
-            Dimensionality of the layers and the pooler layer.
-        qk_squeeze (`int`, *optional*, defaults to 2):
-            Squeeze ratio for query/key's output dimension. See the [UniMERNet paper](https://arxiv.org/abs/2404.15254).
-            Squeeze Attention maps the query and key to a lower-dimensional space without excessive loss of information,
-            thereby accelerating the computation of attention.
-        encoder_layers (`int`, *optional*, defaults to 12):
-            Number of encoder layers.
-        decoder_layers (`int`, *optional*, defaults to 12):
-            Number of decoder layers.
-        encoder_attention_heads (`int`, *optional*, defaults to 16):
-            Number of attention heads for each attention layer in the Transformer encoder.
-        decoder_attention_heads (`int`, *optional*, defaults to 16):
-            Number of attention heads for each attention layer in the Transformer decoder.
-        decoder_ffn_dim (`int`, *optional*, defaults to 4096):
-            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
-        encoder_ffn_dim (`int`, *optional*, defaults to 4096):
-            Dimensionality of the "intermediate" (often named feed-forward) layer in decoder.
-        activation_function (`str` or `function`, *optional*, defaults to `"gelu"`):
-            The non-linear activation function (function or string) in the encoder and pooler. If string, `"gelu"`,
-            `"relu"`, `"silu"` and `"gelu_new"` are supported.
-        dropout (`float`, *optional*, defaults to 0.1):
-            The dropout probability for all fully connected layers in the embeddings, encoder, and pooler.
-        attention_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        activation_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for activations inside the fully connected layer.
-        classifier_dropout (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for classifier.
-        max_position_embeddings (`int`, *optional*, defaults to 1024):
-            The maximum sequence length that this model might ever be used with. Typically set this to something large
-            just in case (e.g., 512 or 1024 or 2048).
-        init_std (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        encoder_layerdrop (`float`, *optional*, defaults to 0.0):
-            The LayerDrop probability for the encoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
-            for more details.
-        decoder_layerdrop (`float`, *optional*, defaults to 0.0):
-            The LayerDrop probability for the decoder. See the [LayerDrop paper](see https://arxiv.org/abs/1909.11556)
-            for more details.
-        scale_embedding (`bool`, *optional*, defaults to `False`):
-            Scale embeddings by diving by sqrt(d_model).
-        use_cache (`bool`, *optional*, defaults to `True`):
-            Whether or not the model should return the last key/values attentions (not used by all models)
-        forced_eos_token_id (`int`, *optional*, defaults to 2):
-            The id of the token to force as the last generated token when `max_length` is reached. Usually set to
-            `eos_token_id`.
-
-    Example:
-
-    ```python
-    >>> from transformers import MBartConfig, MBartModel
-
-    >>> # Initializing a MBART facebook/mbart-large-cc25 style configuration
-    >>> configuration = MBartConfig()
-
-    >>> # Initializing a model (with random weights) from the facebook/mbart-large-cc25 style configuration
-    >>> model = MBartModel(configuration)
-
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```"""
-
-    model_type = "unimer-mbart"
-    keys_to_ignore_at_inference = ["past_key_values"]
-    attribute_map = {"num_attention_heads": "encoder_attention_heads", "hidden_size": "d_model"}
-
-    def __init__(
-        self,
-        vocab_size=50265,
-        max_position_embeddings=1024,
-        encoder_layers=12,
-        encoder_ffn_dim=4096,
-        encoder_attention_heads=16,
-        decoder_layers=12,
-        decoder_ffn_dim=4096,
-        decoder_attention_heads=16,
-        encoder_layerdrop=0.0,
-        decoder_layerdrop=0.0,
-        use_cache=True,
-        is_encoder_decoder=True,
-        activation_function="gelu",
-        d_model=1024,
-        qk_squeeze=2,
-        dropout=0.1,
-        attention_dropout=0.0,
-        activation_dropout=0.0,
-        init_std=0.02,
-        classifier_dropout=0.0,
-        scale_embedding=False,
-        pad_token_id=1,
-        bos_token_id=0,
-        eos_token_id=2,
-        forced_eos_token_id=2,
-        **kwargs,
-    ):
-        self.vocab_size = vocab_size
-        self.max_position_embeddings = max_position_embeddings
-        self.d_model = d_model
-        self.qk_squeeze = qk_squeeze
-        self.encoder_ffn_dim = encoder_ffn_dim
-        self.encoder_layers = encoder_layers
-        self.encoder_attention_heads = encoder_attention_heads
-        self.decoder_ffn_dim = decoder_ffn_dim
-        self.decoder_layers = decoder_layers
-        self.decoder_attention_heads = decoder_attention_heads
-        self.dropout = dropout
-        self.attention_dropout = attention_dropout
-        self.activation_dropout = activation_dropout
-        self.activation_function = activation_function
-        self.init_std = init_std
-        self.encoder_layerdrop = encoder_layerdrop
-        self.decoder_layerdrop = decoder_layerdrop
-        self.classifier_dropout = classifier_dropout
-        self.use_cache = use_cache
-        self.num_hidden_layers = encoder_layers
-        self.scale_embedding = scale_embedding  # scale factor will be sqrt(d_model) if True
-        super().__init__(
-            pad_token_id=pad_token_id,
-            bos_token_id=bos_token_id,
-            eos_token_id=eos_token_id,
-            is_encoder_decoder=is_encoder_decoder,
-            forced_eos_token_id=forced_eos_token_id,
-            **kwargs,
-        )
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py
deleted file mode 100644
index 08a5a049a2bad5b1ea3c61edc7d6aac01c4f95a1..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/modeling_unimer_mbart.py
+++ /dev/null
@@ -1,2351 +0,0 @@
-# coding=utf-8
-# Copyright 2021, The Facebook AI Research Team and The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""PyTorch UnimerMBART model."""
-
-import copy
-import math
-from dataclasses import dataclass
-from typing import List, Optional, Tuple, Union
-
-import torch
-import torch.nn.functional as F
-import torch.utils.checkpoint
-from torch import nn
-from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
-
-from transformers.activations import ACT2FN
-from transformers.modeling_attn_mask_utils import (
-    _prepare_4d_attention_mask,
-    _prepare_4d_attention_mask_for_sdpa,
-    _prepare_4d_causal_attention_mask,
-    _prepare_4d_causal_attention_mask_for_sdpa,
-)
-from transformers.modeling_outputs import (
-    BaseModelOutput,
-    BaseModelOutputWithPastAndCrossAttentions,
-    CausalLMOutputWithCrossAttentions,
-    Seq2SeqLMOutput,
-    Seq2SeqModelOutput,
-    Seq2SeqQuestionAnsweringModelOutput,
-    Seq2SeqSequenceClassifierOutput,
-)
-from transformers import GenerationMixin, PreTrainedModel
-from transformers.utils import (
-    add_code_sample_docstrings,
-    add_end_docstrings,
-    add_start_docstrings,
-    add_start_docstrings_to_model_forward,
-    is_flash_attn_2_available,
-    is_flash_attn_greater_or_equal_2_10,
-    logging,
-    replace_return_docstrings,
-)
-from .configuration_unimer_mbart import UnimerMBartConfig
-
-
-if is_flash_attn_2_available():
-    from flash_attn import flash_attn_func, flash_attn_varlen_func
-    from flash_attn.bert_padding import index_first_axis, pad_input, unpad_input  # noqa
-
-
-logger = logging.get_logger(__name__)
-
-_CHECKPOINT_FOR_DOC = "facebook/mbart-large-cc25"
-_CONFIG_FOR_DOC = "MBartConfig"
-
-# Base model docstring
-_EXPECTED_OUTPUT_SHAPE = [1, 8, 1024]
-
-
-# Copied from transformers.models.llama.modeling_llama._get_unpad_data
-def _get_unpad_data(attention_mask):
-    seqlens_in_batch = attention_mask.sum(dim=-1, dtype=torch.int32)
-    indices = torch.nonzero(attention_mask.flatten(), as_tuple=False).flatten()
-    max_seqlen_in_batch = seqlens_in_batch.max().item()
-    cu_seqlens = F.pad(torch.cumsum(seqlens_in_batch, dim=0, dtype=torch.int32), (1, 0))
-    return (
-        indices,
-        cu_seqlens,
-        max_seqlen_in_batch,
-    )
-
-
-def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int):
-    """
-    Shift input ids one token to the right, and wrap the last non pad token (the <LID> token) Note that MBart does not
-    have a single `decoder_start_token_id` in contrast to other Bart-like models.
-    """
-    prev_output_tokens = input_ids.clone()
-
-    if pad_token_id is None:
-        raise ValueError("self.model.config.pad_token_id has to be defined.")
-    # replace possible -100 values in labels by `pad_token_id`
-    prev_output_tokens.masked_fill_(prev_output_tokens == -100, pad_token_id)
-
-    index_of_eos = (prev_output_tokens.ne(pad_token_id).sum(dim=1) - 1).unsqueeze(-1)
-    decoder_start_tokens = prev_output_tokens.gather(1, index_of_eos).squeeze()
-    prev_output_tokens[:, 1:] = prev_output_tokens[:, :-1].clone()
-    prev_output_tokens[:, 0] = decoder_start_tokens
-
-    return prev_output_tokens
-
-@dataclass
-class CausalLMOutputWithCrossAttentionsAndCounting(CausalLMOutputWithCrossAttentions):
-    """
-    Base class for causal language model (or autoregressive) outputs.
-
-    Args:
-        loss (`torch.FloatTensor` of shape `(1,)`, *optional*, returned when `labels` is provided):
-            Language modeling loss (for next-token prediction).
-        logits (`torch.FloatTensor` of shape `(batch_size, sequence_length, config.vocab_size)`):
-            Prediction scores of the language modeling head (scores for each vocabulary token before SoftMax).
-        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
-            Tuple of `torch.FloatTensor` (one for the output of the embeddings, if the model has an embedding layer, +
-            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
-        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
-            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
-            sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-        cross_attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
-            Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
-            sequence_length)`.
-
-            Cross attentions weights after the attention softmax, used to compute the weighted average in the
-            cross-attention heads.
-        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
-            Tuple of `torch.FloatTensor` tuples of length `config.n_layers`, with each tuple containing the cached key,
-            value states of the self-attention and the cross-attention layers if model is used in encoder-decoder
-            setting. Only relevant if `config.is_decoder = True`.
-
-            Contains pre-computed hidden-states (key and values in the attention blocks) that can be used (see
-            `past_key_values` input) to speed up sequential decoding.
-        counting:
-            Counting
-    """
-    counting: Optional[torch.FloatTensor] = None
-
-# Copied from transformers.models.bart.modeling_bart.BartLearnedPositionalEmbedding with Bart->MBart
-class UnimerMBartLearnedPositionalEmbedding(nn.Embedding):
-    """
-    This module learns positional embeddings up to a fixed maximum size.
-    """
-
-    def __init__(self, num_embeddings: int, embedding_dim: int):
-        # MBart is set up so that if padding_idx is specified then offset the embedding ids by 2
-        # and adjust num_embeddings appropriately. Other models don't have this hack
-        self.offset = 2
-        super().__init__(num_embeddings + self.offset, embedding_dim)
-
-    def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0):
-        """`input_ids' shape is expected to be [bsz x seqlen]."""
-
-        bsz, seq_len = input_ids.shape[:2]
-        positions = torch.arange(
-            past_key_values_length, past_key_values_length + seq_len, dtype=torch.long, device=self.weight.device
-        ).expand(bsz, -1)
-
-        return super().forward(positions + self.offset)
-
-
-# Copied from transformers.models.bart.modeling_bart.BartScaledWordEmbedding with Bart->MBart
-class UnimerMBartScaledWordEmbedding(nn.Embedding):
-    """
-    This module overrides nn.Embeddings' forward by multiplying with embeddings scale.
-    """
-
-    def __init__(self, num_embeddings: int, embedding_dim: int, padding_idx: int, embed_scale: Optional[float] = 1.0):
-        super().__init__(num_embeddings, embedding_dim, padding_idx)
-        self.embed_scale = embed_scale
-
-    def forward(self, input_ids: torch.Tensor):
-        return super().forward(input_ids) * self.embed_scale
-
-
-# Copied from transformers.models.bart.modeling_bart.BartAttention with Bart->MBart
-class UnimerMBartAttention(nn.Module):
-    """Multi-headed attention from 'Attention Is All You Need' paper, with qk_squeeze"""
-
-    def __init__(
-        self,
-        embed_dim: int,
-        num_heads: int,
-        dropout: float = 0.0,
-        is_decoder: bool = False,
-        bias: bool = True,
-        is_causal: bool = False,
-        *,
-        config: UnimerMBartConfig,
-    ):
-        super().__init__()
-        self.embed_dim = embed_dim
-        self.num_heads = num_heads
-        self.dropout = dropout
-        self.head_dim = embed_dim // num_heads
-        self.config = config
-
-        if (self.head_dim * num_heads) != self.embed_dim:
-            raise ValueError(
-                f"embed_dim must be divisible by num_heads (got `embed_dim`: {self.embed_dim}"
-                f" and `num_heads`: {num_heads})."
-            )
-        
-        self.squeeze_dim = embed_dim // config.qk_squeeze
-        self.squeeze_head_dim = self.squeeze_dim // num_heads
-        self.scaling = self.squeeze_head_dim**-0.5
-        self.is_decoder = is_decoder
-        self.is_causal = is_causal
-
-        self.q_proj = nn.Linear(embed_dim, self.squeeze_dim, bias=bias)
-        self.k_proj = nn.Linear(embed_dim, self.squeeze_dim, bias=bias)
-        self.v_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
-        self.out_proj = nn.Linear(embed_dim, embed_dim, bias=bias)
-
-    def _shape_qk(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.squeeze_head_dim).transpose(1, 2).contiguous()
-
-    def _shape_v(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        key_value_states: Optional[torch.Tensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        layer_head_mask: Optional[torch.Tensor] = None,
-        output_attentions: bool = False,
-    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        """Input shape: Batch x Time x Channel"""
-
-        # if key_value_states are provided this layer is used as a cross-attention layer
-        # for the decoder
-        is_cross_attention = key_value_states is not None
-
-        bsz, tgt_len, _ = hidden_states.size()
-
-        # get query proj
-        query_states = self.q_proj(hidden_states) * self.scaling
-        # get key, value proj
-        # `past_key_value[0].shape[2] == key_value_states.shape[1]`
-        # is checking that the `sequence_length` of the `past_key_value` is the same as
-        # the provided `key_value_states` to support prefix tuning
-        if (
-            is_cross_attention
-            and past_key_value is not None
-            and past_key_value[0].shape[2] == key_value_states.shape[1]
-        ):
-            # reuse k,v, cross_attentions
-            key_states = past_key_value[0]
-            value_states = past_key_value[1]
-        elif is_cross_attention:
-            # cross_attentions
-            key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz)
-        elif past_key_value is not None:
-            # reuse k, v, self_attention
-            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
-            key_states = torch.cat([past_key_value[0], key_states], dim=2)
-            value_states = torch.cat([past_key_value[1], value_states], dim=2)
-        else:
-            # self_attention
-            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
-
-        if self.is_decoder:
-            # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
-            # Further calls to cross_attention layer can then reuse all cross-attention
-            # key/value_states (first "if" case)
-            # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
-            # all previous decoder key/value_states. Further calls to uni-directional self-attention
-            # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
-            # if encoder bi-directional self-attention `past_key_value` is always `None`
-            past_key_value = (key_states, value_states)
-
-        proj_shape = (bsz * self.num_heads, -1, self.squeeze_head_dim)
-        value_shape = (bsz * self.num_heads, -1, self.head_dim)
-        query_states = self._shape_qk(query_states, tgt_len, bsz).view(*proj_shape)
-        key_states = key_states.reshape(*proj_shape)
-        value_states = value_states.reshape(*value_shape)
-
-        src_len = key_states.size(1)
-        attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
-
-        if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
-            raise ValueError(
-                f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
-                f" {attn_weights.size()}"
-            )
-
-        if attention_mask is not None:
-            if attention_mask.size() != (bsz, 1, tgt_len, src_len):
-                raise ValueError(
-                    f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
-                )
-            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
-            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
-
-        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
-
-        if layer_head_mask is not None:
-            if layer_head_mask.size() != (self.num_heads,):
-                raise ValueError(
-                    f"Head mask for a single layer should be of size {(self.num_heads,)}, but is"
-                    f" {layer_head_mask.size()}"
-                )
-            attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
-            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
-
-        if output_attentions:
-            # this operation is a bit awkward, but it's required to
-            # make sure that attn_weights keeps its gradient.
-            # In order to do so, attn_weights have to be reshaped
-            # twice and have to be reused in the following
-            attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
-            attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
-        else:
-            attn_weights_reshaped = None
-
-        attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
-        attn_output = torch.bmm(attn_probs, value_states)
-
-        if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
-            raise ValueError(
-                f"`attn_output` should be of size {(bsz * self.num_heads, tgt_len, self.head_dim)}, but is"
-                f" {attn_output.size()}"
-            )
-
-        attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
-        attn_output = attn_output.transpose(1, 2)
-
-        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
-        # partitioned across GPUs when using tensor-parallelism.
-        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
-
-        attn_output = self.out_proj(attn_output)
-
-        return attn_output, attn_weights_reshaped, past_key_value
-
-
-# Copied from transformers.models.bart.modeling_bart.BartFlashAttention2 with Bart->MBart
-class UnimerMBartFlashAttention2(UnimerMBartAttention):
-    """
-    MBart flash attention module. This module inherits from `MBartSqueezeAttention` as the weights of the module stays
-    untouched. The only required change would be on the forward pass where it needs to correctly call the public API of
-    flash attention and deal with padding tokens in case the input contains any of them.
-    """
-
-    # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2.__init__
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        # TODO: Should be removed once Flash Attention for RoCm is bumped to 2.1.
-        # flash_attn<2.1 generates top-left aligned causal mask, while what is needed here is bottom-right alignement, that was made default for flash_attn>=2.1. This attribute is used to handle this difference. Reference: https://github.com/Dao-AILab/flash-attention/releases/tag/v2.1.0.
-        # Beware that with flash_attn<2.1, using q_seqlen != k_seqlen (except for the case q_seqlen == 1) produces a wrong mask (top-left).
-        self._flash_attn_uses_top_left_mask = not is_flash_attn_greater_or_equal_2_10()
-
-    # def _reshape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-    #     return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
-
-    def _shape_qk(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.squeeze_head_dim)
-
-    def _shape_v(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim)
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        key_value_states: Optional[torch.Tensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        layer_head_mask: Optional[torch.Tensor] = None,
-        output_attentions: bool = False,
-    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        # MBartFlashAttention2 attention does not support output_attentions
-        if output_attentions:
-            raise ValueError("MBartFlashAttention2 attention does not support output_attentions")
-
-        # if key_value_states are provided this layer is used as a cross-attention layer
-        # for the decoder
-        is_cross_attention = key_value_states is not None
-
-        bsz, q_len, _ = hidden_states.size()
-
-        # get query proj
-        query_states = self._shape_qk(self.q_proj(hidden_states), -1, bsz)
-
-        # get key, value proj
-        # `past_key_value[0].shape[2] == key_value_states.shape[1]`
-        # is checking that the `sequence_length` of the `past_key_value` is the same as
-        # the provided `key_value_states` to support prefix tuning
-        if (
-            is_cross_attention
-            and past_key_value is not None
-            and past_key_value[0].shape[2] == key_value_states.shape[1]
-        ):
-            # reuse k,v, cross_attentions
-            key_states = past_key_value[0].transpose(1, 2)
-            value_states = past_key_value[1].transpose(1, 2)
-        elif is_cross_attention:
-            # cross_attentions
-            key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz)
-        elif past_key_value is not None:
-            # reuse k, v, self_attention
-            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
-            key_states = torch.cat([past_key_value[0].transpose(1, 2), key_states], dim=1)
-            value_states = torch.cat([past_key_value[1].transpose(1, 2), value_states], dim=1)
-        else:
-            # self_attention
-            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
-
-        if self.is_decoder:
-            # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
-            # Further calls to cross_attention layer can then reuse all cross-attention
-            # key/value_states (first "if" case)
-            # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
-            # all previous decoder key/value_states. Further calls to uni-directional self-attention
-            # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
-            # if encoder bi-directional self-attention `past_key_value` is always `None`
-            past_key_value = (key_states.transpose(1, 2), value_states.transpose(1, 2))
-
-        kv_seq_len = key_states.shape[-2]
-        if past_key_value is not None:
-            kv_seq_len += past_key_value[0].shape[-2]
-
-        # In PEFT, usually we cast the layer norms in float32 for training stability reasons
-        # therefore the input hidden states gets silently casted in float32. Hence, we need
-        # cast them back in the correct dtype just to be sure everything works as expected.
-        # This might slowdown training & inference so it is recommended to not cast the LayerNorms
-        # in fp32. (LlamaRMSNorm handles it correctly)
-
-        input_dtype = query_states.dtype
-        if input_dtype == torch.float32:
-            if torch.is_autocast_enabled():
-                target_dtype = torch.get_autocast_gpu_dtype()
-            # Handle the case where the model is quantized
-            elif hasattr(self.config, "_pre_quantization_dtype"):
-                target_dtype = self.config._pre_quantization_dtype
-            else:
-                target_dtype = self.q_proj.weight.dtype
-
-            logger.warning_once(
-                f"The input hidden states seems to be silently casted in float32, this might be related to"
-                f" the fact you have upcasted embedding or layer norm layers in float32. We will cast back the input in"
-                f" {target_dtype}."
-            )
-
-            query_states = query_states.to(target_dtype)
-            key_states = key_states.to(target_dtype)
-            value_states = value_states.to(target_dtype)
-
-        attn_output = self._flash_attention_forward(
-            query_states, key_states, value_states, attention_mask, q_len, dropout=self.dropout
-        )
-
-        attn_output = attn_output.reshape(bsz, q_len, -1)
-        attn_output = self.out_proj(attn_output)
-
-        if not output_attentions:
-            attn_weights = None
-
-        return attn_output, attn_weights, past_key_value
-
-    # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._flash_attention_forward
-    def _flash_attention_forward(
-        self, query_states, key_states, value_states, attention_mask, query_length, dropout=0.0, softmax_scale=None
-    ):
-        """
-        Calls the forward method of Flash Attention - if the input hidden states contain at least one padding token
-        first unpad the input, then computes the attention scores and pad the final attention scores.
-
-        Args:
-            query_states (`torch.Tensor`):
-                Input query states to be passed to Flash Attention API
-            key_states (`torch.Tensor`):
-                Input key states to be passed to Flash Attention API
-            value_states (`torch.Tensor`):
-                Input value states to be passed to Flash Attention API
-            attention_mask (`torch.Tensor`):
-                The padding mask - corresponds to a tensor of size `(batch_size, seq_len)` where 0 stands for the
-                position of padding tokens and 1 for the position of non-padding tokens.
-            dropout (`float`):
-                Attention dropout
-            softmax_scale (`float`, *optional*):
-                The scaling of QK^T before applying softmax. Default to 1 / sqrt(head_dim)
-        """
-        if not self._flash_attn_uses_top_left_mask:
-            causal = self.is_causal
-        else:
-            # TODO: Remove the `query_length != 1` check once Flash Attention for RoCm is bumped to 2.1. For details, please see the comment in LlamaFlashAttention2 __init__.
-            causal = self.is_causal and query_length != 1
-
-        # Contains at least one padding token in the sequence
-        if attention_mask is not None:
-            batch_size = query_states.shape[0]
-
-            query_states, key_states, value_states, indices_q, cu_seq_lens, max_seq_lens = self._upad_input(
-                query_states, key_states, value_states, attention_mask, query_length
-            )
-
-            cu_seqlens_q, cu_seqlens_k = cu_seq_lens
-            max_seqlen_in_batch_q, max_seqlen_in_batch_k = max_seq_lens
-
-            attn_output_unpad = flash_attn_varlen_func(
-                query_states,
-                key_states,
-                value_states,
-                cu_seqlens_q=cu_seqlens_q,
-                cu_seqlens_k=cu_seqlens_k,
-                max_seqlen_q=max_seqlen_in_batch_q,
-                max_seqlen_k=max_seqlen_in_batch_k,
-                dropout_p=dropout,
-                softmax_scale=softmax_scale,
-                causal=causal,
-            )
-
-            attn_output = pad_input(attn_output_unpad, indices_q, batch_size, query_length)
-        else:
-            attn_output = flash_attn_func(
-                query_states, key_states, value_states, dropout, softmax_scale=softmax_scale, causal=causal
-            )
-
-        return attn_output
-
-    # Copied from transformers.models.llama.modeling_llama.LlamaFlashAttention2._upad_input
-    def _upad_input(self, query_layer, key_layer, value_layer, attention_mask, query_length):
-        indices_k, cu_seqlens_k, max_seqlen_in_batch_k = _get_unpad_data(attention_mask)
-        batch_size, kv_seq_len, num_key_value_heads, head_dim = key_layer.shape
-
-        key_layer = index_first_axis(
-            key_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
-        )
-        value_layer = index_first_axis(
-            value_layer.reshape(batch_size * kv_seq_len, num_key_value_heads, head_dim), indices_k
-        )
-        if query_length == kv_seq_len:
-            query_layer = index_first_axis(
-                query_layer.reshape(batch_size * kv_seq_len, self.num_heads, head_dim), indices_k
-            )
-            cu_seqlens_q = cu_seqlens_k
-            max_seqlen_in_batch_q = max_seqlen_in_batch_k
-            indices_q = indices_k
-        elif query_length == 1:
-            max_seqlen_in_batch_q = 1
-            cu_seqlens_q = torch.arange(
-                batch_size + 1, dtype=torch.int32, device=query_layer.device
-            )  # There is a memcpy here, that is very bad.
-            indices_q = cu_seqlens_q[:-1]
-            query_layer = query_layer.squeeze(1)
-        else:
-            # The -q_len: slice assumes left padding.
-            attention_mask = attention_mask[:, -query_length:]
-            query_layer, indices_q, cu_seqlens_q, max_seqlen_in_batch_q = unpad_input(query_layer, attention_mask)
-
-        return (
-            query_layer,
-            key_layer,
-            value_layer,
-            indices_q,
-            (cu_seqlens_q, cu_seqlens_k),
-            (max_seqlen_in_batch_q, max_seqlen_in_batch_k),
-        )
-
-class UnimerMBartSdpaAttention(UnimerMBartAttention):
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        key_value_states: Optional[torch.Tensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        layer_head_mask: Optional[torch.Tensor] = None,
-        output_attentions: bool = False,
-    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
-        """Input shape: Batch x Time x Channel"""
-        if output_attentions or layer_head_mask is not None:
-            # TODO: Improve this warning with e.g. `model.config._attn_implementation = "manual"` once this is implemented.
-            logger.warning(
-                "BartModel is using BartSdpaAttention, but `torch.nn.functional.scaled_dot_product_attention` does not support `output_attentions=True` or `layer_head_mask` not None. Falling back to the manual attention"
-                ' implementation, but specifying the manual implementation will be required from Transformers version v5.0.0 onwards. This warning can be removed using the argument `attn_implementation="eager"` when loading the model.'
-            )
-            return super().forward(
-                hidden_states,
-                key_value_states=key_value_states,
-                past_key_value=past_key_value,
-                attention_mask=attention_mask,
-                layer_head_mask=layer_head_mask,
-                output_attentions=output_attentions,
-            )
-
-        # if key_value_states are provided this layer is used as a cross-attention layer
-        # for the decoder
-        is_cross_attention = key_value_states is not None
-
-        bsz, tgt_len, _ = hidden_states.size()
-
-        # get query proj
-        query_states = self.q_proj(hidden_states)
-        # get key, value proj
-        # `past_key_value[0].shape[2] == key_value_states.shape[1]`
-        # is checking that the `sequence_length` of the `past_key_value` is the same as
-        # the provided `key_value_states` to support prefix tuning
-        if (
-            is_cross_attention
-            and past_key_value is not None
-            and past_key_value[0].shape[2] == key_value_states.shape[1]
-        ):
-            # reuse k,v, cross_attentions
-            key_states = past_key_value[0]
-            value_states = past_key_value[1]
-        elif is_cross_attention:
-            # cross_attentions
-            key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz)
-        elif past_key_value is not None:
-            # reuse k, v, self_attention
-            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
-            key_states = torch.cat([past_key_value[0], key_states], dim=2)
-            value_states = torch.cat([past_key_value[1], value_states], dim=2)
-        else:
-            # self_attention
-            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
-            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
-
-        if self.is_decoder:
-            # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
-            # Further calls to cross_attention layer can then reuse all cross-attention
-            # key/value_states (first "if" case)
-            # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
-            # all previous decoder key/value_states. Further calls to uni-directional self-attention
-            # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
-            # if encoder bi-directional self-attention `past_key_value` is always `None`
-            past_key_value = (key_states, value_states)
-
-        query_states = self._shape_qk(query_states, tgt_len, bsz)
-
-        # We dispatch to SDPA's Flash Attention or Efficient kernels via this `is_causal` if statement instead of an inline conditional assignment
-        # in SDPA to support both torch.compile's dynamic shapes and full graph options. An inline conditional prevents dynamic shapes from compiling.
-        # The tgt_len > 1 is necessary to match with AttentionMaskConverter.to_causal_4d that does not create a causal mask in case tgt_len == 1.
-        is_causal = True if self.is_causal and attention_mask is None and tgt_len > 1 else False
-
-        # NOTE: SDPA with memory-efficient backend is currently (torch==2.1.2) bugged when using non-contiguous inputs and a custom attn_mask,
-        # but we are fine here as `_shape` do call `.contiguous()`. Reference: https://github.com/pytorch/pytorch/issues/112577
-        attn_output = torch.nn.functional.scaled_dot_product_attention(
-            query_states,
-            key_states,
-            value_states,
-            attn_mask=attention_mask,
-            dropout_p=self.dropout if self.training else 0.0,
-            is_causal=is_causal,
-        )
-
-        if attn_output.size() != (bsz, self.num_heads, tgt_len, self.head_dim):
-            raise ValueError(
-                f"`attn_output` should be of size {(bsz, self.num_heads, tgt_len, self.head_dim)}, but is"
-                f" {attn_output.size()}"
-            )
-
-        attn_output = attn_output.transpose(1, 2)
-
-        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
-        # partitioned across GPUs when using tensor-parallelism.
-        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
-
-        attn_output = self.out_proj(attn_output)
-
-        return attn_output, None, past_key_value
-
-UNIMER_MBART_ATTENTION_CLASSES = {
-    "eager": UnimerMBartAttention,
-    "flash_attention_2": UnimerMBartFlashAttention2,
-    "sdpa": UnimerMBartSdpaAttention,
-}
-
-
-class UnimerMBartEncoderLayer(nn.Module):
-    def __init__(self, config: UnimerMBartConfig):
-        super().__init__()
-        self.embed_dim = config.d_model
-
-        self.self_attn = UNIMER_MBART_ATTENTION_CLASSES[config._attn_implementation](
-            embed_dim=self.embed_dim,
-            num_heads=config.encoder_attention_heads,
-            dropout=config.attention_dropout,
-            config=config,
-        )
-        self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
-        self.dropout = config.dropout
-        self.activation_fn = ACT2FN[config.activation_function]
-        self.activation_dropout = config.activation_dropout
-        self.fc1 = nn.Linear(self.embed_dim, config.encoder_ffn_dim)
-        self.fc2 = nn.Linear(config.encoder_ffn_dim, self.embed_dim)
-        self.final_layer_norm = nn.LayerNorm(self.embed_dim)
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: torch.Tensor,
-        layer_head_mask: torch.Tensor,
-        output_attentions: bool = False,
-    ) -> torch.Tensor:
-        """
-        Args:
-            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
-            attention_mask (`torch.FloatTensor`): attention mask of size
-                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
-            layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
-                `(encoder_attention_heads,)`.
-            output_attentions (`bool`, *optional*):
-                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
-                returned tensors for more detail.
-        """
-        residual = hidden_states
-        hidden_states = self.self_attn_layer_norm(hidden_states)
-        hidden_states, attn_weights, _ = self.self_attn(
-            hidden_states=hidden_states,
-            attention_mask=attention_mask,
-            layer_head_mask=layer_head_mask,
-            output_attentions=output_attentions,
-        )
-        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-        hidden_states = residual + hidden_states
-
-        residual = hidden_states
-        hidden_states = self.final_layer_norm(hidden_states)
-        hidden_states = self.activation_fn(self.fc1(hidden_states))
-        hidden_states = nn.functional.dropout(hidden_states, p=self.activation_dropout, training=self.training)
-        hidden_states = self.fc2(hidden_states)
-        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-        hidden_states = residual + hidden_states
-
-        if hidden_states.dtype == torch.float16 and (
-            torch.isinf(hidden_states).any() or torch.isnan(hidden_states).any()
-        ):
-            clamp_value = torch.finfo(hidden_states.dtype).max - 1000
-            hidden_states = torch.clamp(hidden_states, min=-clamp_value, max=clamp_value)
-
-        outputs = (hidden_states,)
-
-        if output_attentions:
-            outputs += (attn_weights,)
-
-        return outputs
-
-
-class UnimerMBartDecoderLayer(nn.Module):
-    def __init__(self, config: UnimerMBartConfig):
-        super().__init__()
-        self.embed_dim = config.d_model
-
-        self.self_attn = UNIMER_MBART_ATTENTION_CLASSES[config._attn_implementation](
-            embed_dim=self.embed_dim,
-            num_heads=config.decoder_attention_heads,
-            dropout=config.attention_dropout,
-            is_decoder=True,
-            is_causal=True,
-            config=config,
-        )
-        self.dropout = config.dropout
-        self.activation_fn = ACT2FN[config.activation_function]
-        self.activation_dropout = config.activation_dropout
-
-        self.self_attn_layer_norm = nn.LayerNorm(self.embed_dim)
-        self.encoder_attn = UNIMER_MBART_ATTENTION_CLASSES[config._attn_implementation](
-            self.embed_dim,
-            config.decoder_attention_heads,
-            dropout=config.attention_dropout,
-            is_decoder=True,
-            config=config,
-        )
-        self.encoder_attn_layer_norm = nn.LayerNorm(self.embed_dim)
-        self.fc1 = nn.Linear(self.embed_dim, config.decoder_ffn_dim)
-        self.fc2 = nn.Linear(config.decoder_ffn_dim, self.embed_dim)
-        self.final_layer_norm = nn.LayerNorm(self.embed_dim)
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.Tensor] = None,
-        encoder_hidden_states: Optional[torch.Tensor] = None,
-        encoder_attention_mask: Optional[torch.Tensor] = None,
-        layer_head_mask: Optional[torch.Tensor] = None,
-        cross_attn_layer_head_mask: Optional[torch.Tensor] = None,
-        past_key_value: Optional[Tuple[torch.Tensor]] = None,
-        output_attentions: Optional[bool] = False,
-        use_cache: Optional[bool] = True,
-    ) -> torch.Tensor:
-        """
-        Args:
-            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
-            attention_mask (`torch.FloatTensor`): attention mask of size
-                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
-            encoder_hidden_states (`torch.FloatTensor`):
-                cross attention input to the layer of shape `(batch, seq_len, embed_dim)`
-            encoder_attention_mask (`torch.FloatTensor`): encoder attention mask of size
-                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
-            layer_head_mask (`torch.FloatTensor`): mask for attention heads in a given layer of size
-                `(encoder_attention_heads,)`.
-            cross_attn_layer_head_mask (`torch.FloatTensor`): mask for cross-attention heads in a given layer of
-                size `(decoder_attention_heads,)`.
-            past_key_value (`Tuple(torch.FloatTensor)`): cached past key and value projection states
-            output_attentions (`bool`, *optional*):
-                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
-                returned tensors for more detail.
-        """
-        residual = hidden_states
-        hidden_states = self.self_attn_layer_norm(hidden_states)
-
-        # Self Attention
-        # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
-        self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
-        # add present self-attn cache to positions 1,2 of present_key_value tuple
-        hidden_states, self_attn_weights, present_key_value = self.self_attn(
-            hidden_states=hidden_states,
-            past_key_value=self_attn_past_key_value,
-            attention_mask=attention_mask,
-            layer_head_mask=layer_head_mask,
-            output_attentions=output_attentions,
-        )
-        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-        hidden_states = residual + hidden_states
-
-        # Cross-Attention Block
-        cross_attn_present_key_value = None
-        cross_attn_weights = None
-        if encoder_hidden_states is not None:
-            residual = hidden_states
-            hidden_states = self.encoder_attn_layer_norm(hidden_states)
-
-            # cross_attn cached key/values tuple is at positions 3,4 of present_key_value tuple
-            cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
-            hidden_states, cross_attn_weights, cross_attn_present_key_value = self.encoder_attn(
-                hidden_states=hidden_states,
-                key_value_states=encoder_hidden_states,
-                attention_mask=encoder_attention_mask,
-                layer_head_mask=cross_attn_layer_head_mask,
-                past_key_value=cross_attn_past_key_value,
-                output_attentions=output_attentions,
-            )
-            hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-            hidden_states = residual + hidden_states
-
-            # add cross-attn to positions 3,4 of present_key_value tuple
-            present_key_value = present_key_value + cross_attn_present_key_value
-
-        # Fully Connected
-        residual = hidden_states
-        hidden_states = self.final_layer_norm(hidden_states)
-        hidden_states = self.activation_fn(self.fc1(hidden_states))
-        hidden_states = nn.functional.dropout(hidden_states, p=self.activation_dropout, training=self.training)
-        hidden_states = self.fc2(hidden_states)
-        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-        hidden_states = residual + hidden_states
-
-        outputs = (hidden_states,)
-
-        if output_attentions:
-            outputs += (self_attn_weights, cross_attn_weights)
-
-        if use_cache:
-            outputs += (present_key_value,)
-
-        return outputs
-
-
-# Copied from transformers.models.bart.modeling_bart.BartClassificationHead with Bart->MBart
-class UnimerMBartClassificationHead(nn.Module):
-    """Head for sentence-level classification tasks."""
-
-    def __init__(
-        self,
-        input_dim: int,
-        inner_dim: int,
-        num_classes: int,
-        pooler_dropout: float,
-    ):
-        super().__init__()
-        self.dense = nn.Linear(input_dim, inner_dim)
-        self.dropout = nn.Dropout(p=pooler_dropout)
-        self.out_proj = nn.Linear(inner_dim, num_classes)
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = self.dense(hidden_states)
-        hidden_states = torch.tanh(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-        hidden_states = self.out_proj(hidden_states)
-        return hidden_states
-
-
-class UnimerMBartPreTrainedModel(PreTrainedModel):
-    config_class = UnimerMBartConfig
-    base_model_prefix = "model"
-    supports_gradient_checkpointing = True
-    _no_split_modules = ["MBartDecoderLayer", "MBartSqueezeAttention"]
-    _supports_flash_attn_2 = True
-    _supports_sdpa = True
-
-    def _init_weights(self, module):
-        std = self.config.init_std
-        if isinstance(module, nn.Linear):
-            module.weight.data.normal_(mean=0.0, std=std)
-            if module.bias is not None:
-                module.bias.data.zero_()
-        elif isinstance(module, nn.Embedding):
-            module.weight.data.normal_(mean=0.0, std=std)
-            if module.padding_idx is not None:
-                module.weight.data[module.padding_idx].zero_()
-
-    @property
-    def dummy_inputs(self):
-        pad_token = self.config.pad_token_id
-        input_ids = torch.tensor([[0, 6, 10, 4, 2], [0, 8, 12, 2, pad_token]], device=self.device)
-        dummy_inputs = {
-            "attention_mask": input_ids.ne(pad_token),
-            "input_ids": input_ids,
-        }
-        return dummy_inputs
-
-
-MBART_START_DOCSTRING = r"""
-    This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
-    library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
-    etc.)
-
-    This model is also a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) subclass.
-    Use it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage
-    and behavior.
-
-    Parameters:
-        config ([`MBartConfig`]):
-            Model configuration class with all the parameters of the model. Initializing with a config file does not
-            load the weights associated with the model, only the configuration. Check out the
-            [`~PreTrainedModel.from_pretrained`] method to load the model weights.
-"""
-
-MBART_GENERATION_EXAMPLE = r"""
-    Translation example:
-
-    ```python
-    >>> from transformers import AutoTokenizer, MBartForConditionalGeneration
-
-    >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-en-ro")
-    >>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-en-ro")
-
-    >>> example_english_phrase = "42 is the answer"
-    >>> inputs = tokenizer(example_english_phrase, return_tensors="pt")
-
-    >>> # Translate
-    >>> generated_ids = model.generate(**inputs, num_beams=4, max_length=5)
-    >>> tokenizer.batch_decode(generated_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]
-    '42 este răspuns'
-    ```
-
-    Mask filling example:
-
-    ```python
-    >>> from transformers import AutoTokenizer, MBartForConditionalGeneration
-
-    >>> model = MBartForConditionalGeneration.from_pretrained("facebook/mbart-large-cc25")
-    >>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")
-
-    >>> # de_DE is the language symbol id <LID> for German
-    >>> TXT = "</s> Meine Freunde sind <mask> nett aber sie essen zu viel Kuchen. </s> de_DE"
-
-    >>> input_ids = tokenizer([TXT], add_special_tokens=False, return_tensors="pt")["input_ids"]
-    >>> logits = model(input_ids).logits
-
-    >>> masked_index = (input_ids[0] == tokenizer.mask_token_id).nonzero().item()
-    >>> probs = logits[0, masked_index].softmax(dim=0)
-    >>> values, predictions = probs.topk(5)
-
-    >>> tokenizer.decode(predictions).split()
-    ['nett', 'sehr', 'ganz', 'nicht', 'so']
-    ```
-"""
-
-MBART_INPUTS_DOCSTRING = r"""
-    Args:
-        input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-            Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you provide
-            it.
-
-            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
-            [`PreTrainedTokenizer.__call__`] for details.
-
-            [What are input IDs?](../glossary#input-ids)
-        attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
-            Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
-
-            - 1 for tokens that are **not masked**,
-            - 0 for tokens that are **masked**.
-
-            [What are attention masks?](../glossary#attention-mask)
-        decoder_input_ids (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
-            Indices of decoder input sequence tokens in the vocabulary.
-
-            Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
-            [`PreTrainedTokenizer.__call__`] for details.
-
-            [What are decoder input IDs?](../glossary#decoder-input-ids)
-
-            MBart uses a specific language id token as the starting token for `decoder_input_ids` generation that
-            varies according to source and target language, *e.g.* 25004 for *en_XX*, and 25003 for *de_DE*. If
-            `past_key_values` is used, optionally only the last `decoder_input_ids` have to be input (see
-            `past_key_values`).
-
-            For translation and summarization training, `decoder_input_ids` should be provided. If no
-            `decoder_input_ids` is provided, the model will create this tensor by shifting the `input_ids` to the right
-            for denoising pre-training following the paper.
-        decoder_attention_mask (`torch.LongTensor` of shape `(batch_size, target_sequence_length)`, *optional*):
-            Default behavior: generate a tensor that ignores pad tokens in `decoder_input_ids`. Causal mask will also
-            be used by default.
-        head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
-            Mask to nullify selected heads of the attention modules in the encoder. Mask values selected in `[0, 1]`:
-
-            - 1 indicates the head is **not masked**,
-            - 0 indicates the head is **masked**.
-
-        decoder_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
-            Mask to nullify selected heads of the attention modules in the decoder. Mask values selected in `[0, 1]`:
-
-            - 1 indicates the head is **not masked**,
-            - 0 indicates the head is **masked**.
-
-        cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
-            Mask to nullify selected heads of the cross-attention modules in the decoder. Mask values selected in `[0,
-            1]`:
-
-            - 1 indicates the head is **not masked**,
-            - 0 indicates the head is **masked**.
-
-        encoder_outputs (`tuple(tuple(torch.FloatTensor)`, *optional*):
-            Tuple consists of (`last_hidden_state`, *optional*: `hidden_states`, *optional*: `attentions`)
-            `last_hidden_state` of shape `(batch_size, sequence_length, hidden_size)`, *optional*) is a sequence of
-            hidden-states at the output of the last layer of the encoder. Used in the cross-attention of the decoder.
-        past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
-            Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of shape
-            `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of shape
-            `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
-
-            Contains pre-computed hidden-states (key and values in the self-attention blocks and in the cross-attention
-            blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
-
-            If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those that
-            don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of all
-            `decoder_input_ids` of shape `(batch_size, sequence_length)`.
-        inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
-            Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
-            This is useful if you want more control over how to convert `input_ids` indices into associated vectors
-            than the model's internal embedding lookup matrix.
-        decoder_inputs_embeds (`torch.FloatTensor` of shape `(batch_size, target_sequence_length, hidden_size)`, *optional*):
-            Optionally, instead of passing `decoder_input_ids` you can choose to directly pass an embedded
-            representation. If `past_key_values` is used, optionally only the last `decoder_inputs_embeds` have to be
-            input (see `past_key_values`). This is useful if you want more control over how to convert
-            `decoder_input_ids` indices into associated vectors than the model's internal embedding lookup matrix.
-
-            If `decoder_input_ids` and `decoder_inputs_embeds` are both unset, `decoder_inputs_embeds` takes the value
-            of `inputs_embeds`.
-        use_cache (`bool`, *optional*):
-            If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding (see
-            `past_key_values`).
-        output_attentions (`bool`, *optional*):
-            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
-            tensors for more detail.
-        output_hidden_states (`bool`, *optional*):
-            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
-            more detail.
-        return_dict (`bool`, *optional*):
-            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
-"""
-
-
-class UnimerMBartEncoder(UnimerMBartPreTrainedModel):
-    """
-    Transformer encoder consisting of *config.encoder_layers* self attention layers. Each layer is a
-    [`MBartEncoderLayer`].
-
-    Args:
-        config: MBartConfig
-        embed_tokens (nn.Embedding): output embedding
-    """
-
-    def __init__(self, config: UnimerMBartConfig, embed_tokens: Optional[nn.Embedding] = None):
-        super().__init__(config)
-
-        self.dropout = config.dropout
-        self.layerdrop = config.encoder_layerdrop
-
-        embed_dim = config.d_model
-        self.padding_idx = config.pad_token_id
-        self.max_source_positions = config.max_position_embeddings
-        embed_scale = math.sqrt(embed_dim) if config.scale_embedding else 1.0
-
-        self.embed_tokens = UnimerMBartScaledWordEmbedding(
-            config.vocab_size, embed_dim, self.padding_idx, embed_scale=embed_scale
-        )
-
-        if embed_tokens is not None:
-            self.embed_tokens.weight = embed_tokens.weight
-
-        self.embed_positions = UnimerMBartLearnedPositionalEmbedding(
-            config.max_position_embeddings,
-            embed_dim,
-        )
-        self.layers = nn.ModuleList([UnimerMBartEncoderLayer(config) for _ in range(config.encoder_layers)])
-        self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
-        self._use_sdpa = config._attn_implementation == "sdpa"
-        self.layernorm_embedding = nn.LayerNorm(embed_dim)
-        self.layer_norm = nn.LayerNorm(config.d_model)
-
-        self.gradient_checkpointing = False
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def _backward_compatibility_gradient_checkpointing(self):
-        # Override to not delete the attribute from the config
-        if self.supports_gradient_checkpointing and getattr(self.config, "gradient_checkpointing", False):
-            self.gradient_checkpointing_enable()
-
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Tuple, BaseModelOutput]:
-        r"""
-        Args:
-            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-                Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
-                provide it.
-
-                Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
-                [`PreTrainedTokenizer.__call__`] for details.
-
-                [What are input IDs?](../glossary#input-ids)
-            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
-                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
-
-                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **masked**.
-
-                [What are attention masks?](../glossary#attention-mask)
-            head_mask (`torch.Tensor` of shape `(encoder_layers, encoder_attention_heads)`, *optional*):
-                Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
-
-                - 1 indicates the head is **not masked**,
-                - 0 indicates the head is **masked**.
-
-            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
-                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
-                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
-                than the model's internal embedding lookup matrix.
-            output_attentions (`bool`, *optional*):
-                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
-                returned tensors for more detail.
-            output_hidden_states (`bool`, *optional*):
-                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
-                for more detail.
-            return_dict (`bool`, *optional*):
-                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
-        """
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        # retrieve input_ids and inputs_embeds
-        if input_ids is not None and inputs_embeds is not None:
-            raise ValueError("You cannot specify both input_ids and inputs_embeds at the same time")
-        elif input_ids is not None:
-            input = input_ids
-            input_shape = input.shape
-            input_ids = input_ids.view(-1, input_shape[-1])
-        elif inputs_embeds is not None:
-            input = inputs_embeds[:, :, -1]
-        else:
-            raise ValueError("You have to specify either input_ids or inputs_embeds")
-
-        if inputs_embeds is None:
-            inputs_embeds = self.embed_tokens(input_ids)
-
-        embed_pos = self.embed_positions(input)
-
-        hidden_states = inputs_embeds + embed_pos.to(inputs_embeds.device)
-        hidden_states = self.layernorm_embedding(hidden_states)
-        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-
-        # expand attention_mask
-        if attention_mask is not None:
-            # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-            if self._use_flash_attention_2:
-                attention_mask = attention_mask if 0 in attention_mask else None
-            elif self._use_sdpa and head_mask is None and not output_attentions:
-                # output_attentions=True & head_mask can not be supported when using SDPA, fall back to
-                # the manual implementation that requires a 4D causal mask in all cases.
-                # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-                attention_mask = _prepare_4d_attention_mask_for_sdpa(attention_mask, inputs_embeds.dtype)
-            else:
-                # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-                attention_mask = _prepare_4d_attention_mask(attention_mask, inputs_embeds.dtype)
-
-        encoder_states = () if output_hidden_states else None
-        all_attentions = () if output_attentions else None
-
-        # check if head_mask has a correct number of layers specified if desired
-        if head_mask is not None:
-            if head_mask.size()[0] != len(self.layers):
-                raise ValueError(
-                    f"The head_mask should be specified for {len(self.layers)} layers, but it is for"
-                    f" {head_mask.size()[0]}."
-                )
-        for idx, encoder_layer in enumerate(self.layers):
-            if output_hidden_states:
-                encoder_states = encoder_states + (hidden_states,)
-            # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
-            to_drop = False
-            if self.training:
-                dropout_probability = torch.rand([])
-                if dropout_probability < self.layerdrop:  # skip the layer
-                    to_drop = True
-
-            if to_drop:
-                layer_outputs = (None, None)
-            else:
-                if self.gradient_checkpointing and self.training:
-                    layer_outputs = self._gradient_checkpointing_func(
-                        encoder_layer.__call__,
-                        hidden_states,
-                        attention_mask,
-                        (head_mask[idx] if head_mask is not None else None),
-                        output_attentions,
-                    )
-                else:
-                    layer_outputs = encoder_layer(
-                        hidden_states,
-                        attention_mask,
-                        layer_head_mask=(head_mask[idx] if head_mask is not None else None),
-                        output_attentions=output_attentions,
-                    )
-
-                hidden_states = layer_outputs[0]
-
-            if output_attentions:
-                all_attentions = all_attentions + (layer_outputs[1],)
-
-        hidden_states = self.layer_norm(hidden_states)
-
-        if output_hidden_states:
-            encoder_states = encoder_states + (hidden_states,)
-
-        if not return_dict:
-            return tuple(v for v in [hidden_states, encoder_states, all_attentions] if v is not None)
-        return BaseModelOutput(
-            last_hidden_state=hidden_states, hidden_states=encoder_states, attentions=all_attentions
-        )
-
-
-class UnimerMBartDecoder(UnimerMBartPreTrainedModel):
-    """
-    Transformer decoder consisting of *config.decoder_layers* layers. Each layer is a [`MBartDecoderLayer`]
-
-    Args:
-        config: MBartConfig
-        embed_tokens (nn.Embedding): output embedding
-    """
-
-    def __init__(self, config: UnimerMBartConfig, embed_tokens: Optional[nn.Embedding] = None):
-        super().__init__(config)
-        self.dropout = config.dropout
-        self.layerdrop = config.decoder_layerdrop
-        self.padding_idx = config.pad_token_id
-        self.max_target_positions = config.max_position_embeddings
-        embed_scale = math.sqrt(config.d_model) if config.scale_embedding else 1.0
-
-        self.embed_tokens = UnimerMBartScaledWordEmbedding(
-            config.vocab_size, config.d_model, self.padding_idx, embed_scale=embed_scale
-        )
-
-        if embed_tokens is not None:
-            self.embed_tokens.weight = embed_tokens.weight
-
-        self.embed_positions = UnimerMBartLearnedPositionalEmbedding(
-            config.max_position_embeddings,
-            config.d_model,
-        )
-        self.layers = nn.ModuleList([UnimerMBartDecoderLayer(config) for _ in range(config.decoder_layers)])
-        self._use_flash_attention_2 = config._attn_implementation == "flash_attention_2"
-        self._use_sdpa = config._attn_implementation == "sdpa"
-        self.layernorm_embedding = nn.LayerNorm(config.d_model)
-        self.layer_norm = nn.LayerNorm(config.d_model)
-
-        self.gradient_checkpointing = False
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def get_input_embeddings(self):
-        return self.embed_tokens
-
-    def set_input_embeddings(self, value):
-        self.embed_tokens = value
-
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        count_pred: Optional[torch.FloatTensor] = None,
-        encoder_hidden_states: Optional[torch.FloatTensor] = None,
-        encoder_attention_mask: Optional[torch.LongTensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        cross_attn_head_mask: Optional[torch.Tensor] = None,
-        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]:
-        r"""
-        Args:
-            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-                Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
-                provide it.
-
-                Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
-                [`PreTrainedTokenizer.__call__`] for details.
-
-                [What are input IDs?](../glossary#input-ids)
-            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
-                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
-
-                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **masked**.
-
-                [What are attention masks?](../glossary#attention-mask)
-            encoder_hidden_states (`torch.FloatTensor` of shape `(batch_size, encoder_sequence_length, hidden_size)`, *optional*):
-                Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
-                of the decoder.
-            encoder_attention_mask (`torch.LongTensor` of shape `(batch_size, encoder_sequence_length)`, *optional*):
-                Mask to avoid performing cross-attention on padding tokens indices of encoder input_ids. Mask values
-                selected in `[0, 1]`:
-
-                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **masked**.
-
-                [What are attention masks?](../glossary#attention-mask)
-            head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
-                Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
-
-                - 1 indicates the head is **not masked**,
-                - 0 indicates the head is **masked**.
-
-            cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
-                Mask to nullify selected heads of the cross-attention modules in the decoder to avoid performing
-                cross-attention on hidden heads. Mask values selected in `[0, 1]`:
-
-                - 1 indicates the head is **not masked**,
-                - 0 indicates the head is **masked**.
-
-            past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
-                Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
-                shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
-                shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`.
-
-                Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
-                cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
-
-                If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
-                that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
-                all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
-            inputs_embeds (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
-                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
-                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
-                than the model's internal embedding lookup matrix.
-            output_attentions (`bool`, *optional*):
-                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
-                returned tensors for more detail.
-            output_hidden_states (`bool`, *optional*):
-                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
-                for more detail.
-            return_dict (`bool`, *optional*):
-                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
-        """
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        use_cache = use_cache if use_cache is not None else self.config.use_cache
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        # retrieve input_ids and inputs_embeds
-        if input_ids is not None and inputs_embeds is not None:
-            raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
-        elif input_ids is not None:
-            input = input_ids
-            input_shape = input.size()
-            input_ids = input_ids.view(-1, input_shape[-1])
-        elif inputs_embeds is not None:
-            input_shape = inputs_embeds.size()[:-1]
-            input = inputs_embeds[:, :, -1]
-        else:
-            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
-
-        # past_key_values_length
-        past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
-
-        if inputs_embeds is None:
-            inputs_embeds = self.embed_tokens(input_ids)
-
-        if self._use_flash_attention_2:
-            # 2d mask is passed through the layers
-            attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
-        elif self._use_sdpa and not output_attentions and cross_attn_head_mask is None:
-            # output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on
-            # the manual implementation that requires a 4D causal mask in all cases.
-            attention_mask = _prepare_4d_causal_attention_mask_for_sdpa(
-                attention_mask,
-                input_shape,
-                inputs_embeds,
-                past_key_values_length,
-            )
-        else:
-            # 4d mask is passed through the layers
-            attention_mask = _prepare_4d_causal_attention_mask(
-                attention_mask, input_shape, inputs_embeds, past_key_values_length
-            )
-
-        # expand encoder attention mask
-        if encoder_hidden_states is not None and encoder_attention_mask is not None:
-            if self._use_flash_attention_2:
-                encoder_attention_mask = encoder_attention_mask if 0 in encoder_attention_mask else None
-            elif self._use_sdpa and cross_attn_head_mask is None and not output_attentions:
-                # output_attentions=True & cross_attn_head_mask can not be supported when using SDPA, and we fall back on
-                # the manual implementation that requires a 4D causal mask in all cases.
-                # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-                encoder_attention_mask = _prepare_4d_attention_mask_for_sdpa(
-                    encoder_attention_mask,
-                    inputs_embeds.dtype,
-                    tgt_len=input_shape[-1],
-                )
-            else:
-                # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
-                encoder_attention_mask = _prepare_4d_attention_mask(
-                    encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
-                )
-
-        # embed positions
-        positions = self.embed_positions(input, past_key_values_length)
-
-        hidden_states = inputs_embeds + positions.to(inputs_embeds.device)
-
-        # TODO: add counting context weight to hidden_states
-        if count_pred is not None:
-            count_context_weight = self.counting_context_weight(count_pred)
-            hidden_states = hidden_states + 0.5 * count_context_weight.unsqueeze(1)
-
-        hidden_states = self.layernorm_embedding(hidden_states)
-        hidden_states = nn.functional.dropout(hidden_states, p=self.dropout, training=self.training)
-
-        if self.gradient_checkpointing and self.training:
-            if use_cache:
-                logger.warning_once(
-                    "`use_cache=True` is incompatible with gradient checkpointing`. Setting `use_cache=False`..."
-                )
-                use_cache = False
-
-        # decoder layers
-        all_hidden_states = () if output_hidden_states else None
-        all_self_attns = () if output_attentions else None
-        all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None
-        next_decoder_cache = () if use_cache else None
-
-        # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
-        for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
-            if attn_mask is not None:
-                if attn_mask.size()[0] != len(self.layers):
-                    raise ValueError(
-                        f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
-                        f" {attn_mask.size()[0]}."
-                    )
-        for idx, decoder_layer in enumerate(self.layers):
-            # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
-            if output_hidden_states:
-                all_hidden_states += (hidden_states,)
-            if self.training:
-                dropout_probability = torch.rand([])
-                if dropout_probability < self.layerdrop:
-                    continue
-
-            past_key_value = past_key_values[idx] if past_key_values is not None else None
-
-            if self.gradient_checkpointing and self.training:
-                layer_outputs = self._gradient_checkpointing_func(
-                    decoder_layer.__call__,
-                    hidden_states,
-                    attention_mask,
-                    encoder_hidden_states,
-                    encoder_attention_mask,
-                    head_mask[idx] if head_mask is not None else None,
-                    cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None,
-                    None,
-                    output_attentions,
-                    use_cache,
-                )
-            else:
-                layer_outputs = decoder_layer(
-                    hidden_states,
-                    attention_mask=attention_mask,
-                    encoder_hidden_states=encoder_hidden_states,
-                    encoder_attention_mask=encoder_attention_mask,
-                    layer_head_mask=(head_mask[idx] if head_mask is not None else None),
-                    cross_attn_layer_head_mask=(
-                        cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None
-                    ),
-                    past_key_value=past_key_value,
-                    output_attentions=output_attentions,
-                    use_cache=use_cache,
-                )
-            hidden_states = layer_outputs[0]
-
-            if use_cache:
-                next_decoder_cache += (layer_outputs[3 if output_attentions else 1],)
-
-            if output_attentions:
-                all_self_attns += (layer_outputs[1],)
-
-                if encoder_hidden_states is not None:
-                    all_cross_attentions += (layer_outputs[2],)
-
-        hidden_states = self.layer_norm(hidden_states)
-
-        # add hidden states from the last decoder layer
-        if output_hidden_states:
-            all_hidden_states += (hidden_states,)
-
-        next_cache = next_decoder_cache if use_cache else None
-        if not return_dict:
-            return tuple(
-                v
-                for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_cross_attentions]
-                if v is not None
-            )
-        return BaseModelOutputWithPastAndCrossAttentions(
-            last_hidden_state=hidden_states,
-            past_key_values=next_cache,
-            hidden_states=all_hidden_states,
-            attentions=all_self_attns,
-            cross_attentions=all_cross_attentions,
-        )
-
-
-@add_start_docstrings(
-    "The bare MBART Model outputting raw hidden-states without any specific head on top.",
-    MBART_START_DOCSTRING,
-)
-class UnimerMBartModel(UnimerMBartPreTrainedModel):
-    _tied_weights_keys = ["encoder.embed_tokens.weight", "decoder.embed_tokens.weight"]
-
-    def __init__(self, config: UnimerMBartConfig):
-        super().__init__(config)
-
-        padding_idx, vocab_size = config.pad_token_id, config.vocab_size
-        self.shared = nn.Embedding(vocab_size, config.d_model, padding_idx)
-
-        self.encoder = UnimerMBartEncoder(config, self.shared)
-        self.decoder = UnimerMBartDecoder(config, self.shared)
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def get_input_embeddings(self):
-        return self.shared
-
-    def set_input_embeddings(self, value):
-        self.shared = value
-        self.encoder.embed_tokens = self.shared
-        self.decoder.embed_tokens = self.shared
-
-    def get_encoder(self):
-        return self.encoder
-
-    def get_decoder(self):
-        return self.decoder
-
-    def _tie_weights(self):
-        if self.config.tie_word_embeddings:
-            self._tie_or_clone_weights(self.encoder.embed_tokens, self.get_input_embeddings())
-            self._tie_or_clone_weights(self.decoder.embed_tokens, self.get_input_embeddings())
-
-    @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
-    @add_code_sample_docstrings(
-        checkpoint=_CHECKPOINT_FOR_DOC,
-        output_type=Seq2SeqModelOutput,
-        config_class=_CONFIG_FOR_DOC,
-        expected_output=_EXPECTED_OUTPUT_SHAPE,
-    )
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        decoder_input_ids: Optional[torch.LongTensor] = None,
-        decoder_attention_mask: Optional[torch.LongTensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        decoder_head_mask: Optional[torch.Tensor] = None,
-        cross_attn_head_mask: Optional[torch.Tensor] = None,
-        encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Seq2SeqModelOutput, Tuple[torch.FloatTensor]]:
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        use_cache = use_cache if use_cache is not None else self.config.use_cache
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        # different to other models, MBart automatically creates decoder_input_ids from
-        # input_ids if no decoder_input_ids are provided
-        if decoder_input_ids is None and decoder_inputs_embeds is None:
-            decoder_input_ids = shift_tokens_right(input_ids, self.config.pad_token_id)
-
-        if encoder_outputs is None:
-            encoder_outputs = self.encoder(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                head_mask=head_mask,
-                inputs_embeds=inputs_embeds,
-                output_attentions=output_attentions,
-                output_hidden_states=output_hidden_states,
-                return_dict=return_dict,
-            )
-        # If the user passed a tuple for encoder_outputs, we wrap it in a BaseModelOutput when return_dict=True
-        elif return_dict and not isinstance(encoder_outputs, BaseModelOutput):
-            encoder_outputs = BaseModelOutput(
-                last_hidden_state=encoder_outputs[0],
-                hidden_states=encoder_outputs[1] if len(encoder_outputs) > 1 else None,
-                attentions=encoder_outputs[2] if len(encoder_outputs) > 2 else None,
-            )
-
-        # decoder outputs consists of (dec_features, past_key_value, dec_hidden, dec_attn)
-        decoder_outputs = self.decoder(
-            input_ids=decoder_input_ids,
-            attention_mask=decoder_attention_mask,
-            encoder_hidden_states=encoder_outputs[0],
-            encoder_attention_mask=attention_mask,
-            head_mask=decoder_head_mask,
-            cross_attn_head_mask=cross_attn_head_mask,
-            past_key_values=past_key_values,
-            inputs_embeds=decoder_inputs_embeds,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-
-        if not return_dict:
-            return decoder_outputs + encoder_outputs
-
-        return Seq2SeqModelOutput(
-            last_hidden_state=decoder_outputs.last_hidden_state,
-            past_key_values=decoder_outputs.past_key_values,
-            decoder_hidden_states=decoder_outputs.hidden_states,
-            decoder_attentions=decoder_outputs.attentions,
-            cross_attentions=decoder_outputs.cross_attentions,
-            encoder_last_hidden_state=encoder_outputs.last_hidden_state,
-            encoder_hidden_states=encoder_outputs.hidden_states,
-            encoder_attentions=encoder_outputs.attentions,
-        )
-
-
-@add_start_docstrings(
-    "The MBART Model with a language modeling head. Can be used for summarization, after fine-tuning the pretrained models.",
-    MBART_START_DOCSTRING,
-)
-class UnimerMBartForConditionalGeneration(UnimerMBartPreTrainedModel, GenerationMixin):
-    base_model_prefix = "model"
-    _keys_to_ignore_on_load_missing = ["final_logits_bias"]
-    _tied_weights_keys = ["model.encoder.embed_tokens.weight", "model.decoder.embed_tokens.weight", "lm_head.weight"]
-
-    def __init__(self, config: UnimerMBartConfig):
-        super().__init__(config)
-        self.model = UnimerMBartModel(config)
-        self.register_buffer("final_logits_bias", torch.zeros((1, self.model.shared.num_embeddings)))
-        self.lm_head = nn.Linear(config.d_model, self.model.shared.num_embeddings, bias=False)
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def get_encoder(self):
-        return self.model.get_encoder()
-
-    def get_decoder(self):
-        return self.model.get_decoder()
-
-    def resize_token_embeddings(self, new_num_tokens: int, pad_to_multiple_of: Optional[int] = None) -> nn.Embedding:
-        new_embeddings = super().resize_token_embeddings(new_num_tokens, pad_to_multiple_of)
-        self._resize_final_logits_bias(new_embeddings.weight.shape[0])
-        return new_embeddings
-
-    def _resize_final_logits_bias(self, new_num_tokens: int) -> None:
-        old_num_tokens = self.final_logits_bias.shape[-1]
-        if new_num_tokens <= old_num_tokens:
-            new_bias = self.final_logits_bias[:, :new_num_tokens]
-        else:
-            extra_bias = torch.zeros((1, new_num_tokens - old_num_tokens), device=self.final_logits_bias.device)
-            new_bias = torch.cat([self.final_logits_bias, extra_bias], dim=1)
-        self.register_buffer("final_logits_bias", new_bias)
-
-    def get_output_embeddings(self):
-        return self.lm_head
-
-    def set_output_embeddings(self, new_embeddings):
-        self.lm_head = new_embeddings
-
-    @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
-    @replace_return_docstrings(output_type=Seq2SeqLMOutput, config_class=_CONFIG_FOR_DOC)
-    @add_end_docstrings(MBART_GENERATION_EXAMPLE)
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        decoder_input_ids: Optional[torch.LongTensor] = None,
-        decoder_attention_mask: Optional[torch.LongTensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        decoder_head_mask: Optional[torch.Tensor] = None,
-        cross_attn_head_mask: Optional[torch.Tensor] = None,
-        encoder_outputs: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
-        labels: Optional[torch.LongTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Seq2SeqLMOutput, Tuple[torch.FloatTensor]]:
-        r"""
-        labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
-            Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
-            config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
-            (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
-
-        Returns:
-
-        """
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        if labels is not None:
-            if use_cache:
-                logger.warning("The `use_cache` argument is changed to `False` since `labels` is provided.")
-            use_cache = False
-            if decoder_input_ids is None and decoder_inputs_embeds is None:
-                decoder_input_ids = shift_tokens_right(labels, self.config.pad_token_id)
-
-        outputs = self.model(
-            input_ids,
-            attention_mask=attention_mask,
-            decoder_input_ids=decoder_input_ids,
-            encoder_outputs=encoder_outputs,
-            decoder_attention_mask=decoder_attention_mask,
-            head_mask=head_mask,
-            decoder_head_mask=decoder_head_mask,
-            cross_attn_head_mask=cross_attn_head_mask,
-            past_key_values=past_key_values,
-            inputs_embeds=inputs_embeds,
-            decoder_inputs_embeds=decoder_inputs_embeds,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-        lm_logits = self.lm_head(outputs[0]) + self.final_logits_bias
-
-        masked_lm_loss = None
-        if labels is not None:
-            loss_fct = CrossEntropyLoss()
-            masked_lm_loss = loss_fct(lm_logits.view(-1, self.config.vocab_size), labels.view(-1))
-
-        if not return_dict:
-            output = (lm_logits,) + outputs[1:]
-            return ((masked_lm_loss,) + output) if masked_lm_loss is not None else output
-
-        return Seq2SeqLMOutput(
-            loss=masked_lm_loss,
-            logits=lm_logits,
-            past_key_values=outputs.past_key_values,
-            decoder_hidden_states=outputs.decoder_hidden_states,
-            decoder_attentions=outputs.decoder_attentions,
-            cross_attentions=outputs.cross_attentions,
-            encoder_last_hidden_state=outputs.encoder_last_hidden_state,
-            encoder_hidden_states=outputs.encoder_hidden_states,
-            encoder_attentions=outputs.encoder_attentions,
-        )
-
-    def prepare_inputs_for_generation(
-        self,
-        decoder_input_ids,
-        past_key_values=None,
-        attention_mask=None,
-        head_mask=None,
-        decoder_head_mask=None,
-        cross_attn_head_mask=None,
-        use_cache=None,
-        encoder_outputs=None,
-        **kwargs,
-    ):
-        # cut decoder_input_ids if past is used
-        if past_key_values is not None:
-            past_length = past_key_values[0][0].shape[2]
-
-            # Some generation methods already pass only the last input ID
-            if decoder_input_ids.shape[1] > past_length:
-                remove_prefix_length = past_length
-            else:
-                # Default to old behavior: keep only final ID
-                remove_prefix_length = decoder_input_ids.shape[1] - 1
-
-            decoder_input_ids = decoder_input_ids[:, remove_prefix_length:]
-
-        return {
-            "input_ids": None,  # encoder_outputs is defined. input_ids not needed
-            "encoder_outputs": encoder_outputs,
-            "past_key_values": past_key_values,
-            "decoder_input_ids": decoder_input_ids,
-            "attention_mask": attention_mask,
-            "head_mask": head_mask,
-            "decoder_head_mask": decoder_head_mask,
-            "cross_attn_head_mask": cross_attn_head_mask,
-            "use_cache": use_cache,  # change this to avoid caching (presumably for debugging)
-        }
-
-    def prepare_decoder_input_ids_from_labels(self, labels: torch.Tensor):
-        return shift_tokens_right(labels, self.config.pad_token_id)
-
-    @staticmethod
-    def _reorder_cache(past_key_values, beam_idx):
-        reordered_past = ()
-        for layer_past in past_key_values:
-            # cached cross_attention states don't have to be reordered -> they are always the same
-            reordered_past += (
-                tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past[:2])
-                + layer_past[2:],
-            )
-        return reordered_past
-
-
-@add_start_docstrings(
-    """
-    MBart model with a sequence classification/head on top (a linear layer on top of the pooled output) e.g. for GLUE
-    tasks.
-    """,
-    MBART_START_DOCSTRING,
-)
-class UnimerMBartForSequenceClassification(UnimerMBartPreTrainedModel):
-    _tied_weights_keys = ["model.encoder.embed_tokens.weight", "model.decoder.embed_tokens.weight"]
-
-    def __init__(self, config: UnimerMBartConfig, **kwargs):
-        super().__init__(config, **kwargs)
-        self.model = UnimerMBartModel(config)
-        self.classification_head = UnimerMBartClassificationHead(
-            config.d_model,
-            config.d_model,
-            config.num_labels,
-            config.classifier_dropout,
-        )
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
-    @add_code_sample_docstrings(
-        checkpoint=_CHECKPOINT_FOR_DOC,
-        output_type=Seq2SeqSequenceClassifierOutput,
-        config_class=_CONFIG_FOR_DOC,
-    )
-    # Copied from transformers.models.bart.modeling_bart.BartForSequenceClassification.forward
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        decoder_input_ids: Optional[torch.LongTensor] = None,
-        decoder_attention_mask: Optional[torch.LongTensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        decoder_head_mask: Optional[torch.Tensor] = None,
-        cross_attn_head_mask: Optional[torch.Tensor] = None,
-        encoder_outputs: Optional[List[torch.FloatTensor]] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
-        labels: Optional[torch.LongTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Tuple, Seq2SeqSequenceClassifierOutput]:
-        r"""
-        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
-            Labels for computing the sequence classification/regression loss. Indices should be in `[0, ...,
-            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
-        """
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-        if labels is not None:
-            use_cache = False
-
-        if input_ids is None and inputs_embeds is not None:
-            raise NotImplementedError(
-                f"Passing input embeddings is currently not supported for {self.__class__.__name__}"
-            )
-
-        outputs = self.model(
-            input_ids,
-            attention_mask=attention_mask,
-            decoder_input_ids=decoder_input_ids,
-            decoder_attention_mask=decoder_attention_mask,
-            head_mask=head_mask,
-            decoder_head_mask=decoder_head_mask,
-            cross_attn_head_mask=cross_attn_head_mask,
-            encoder_outputs=encoder_outputs,
-            inputs_embeds=inputs_embeds,
-            decoder_inputs_embeds=decoder_inputs_embeds,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-        hidden_states = outputs[0]  # last hidden state
-
-        eos_mask = input_ids.eq(self.config.eos_token_id).to(hidden_states.device)
-
-        if len(torch.unique_consecutive(eos_mask.sum(1))) > 1:
-            raise ValueError("All examples must have the same number of <eos> tokens.")
-        sentence_representation = hidden_states[eos_mask, :].view(hidden_states.size(0), -1, hidden_states.size(-1))[
-            :, -1, :
-        ]
-        logits = self.classification_head(sentence_representation)
-
-        loss = None
-        if labels is not None:
-            labels = labels.to(logits.device)
-            if self.config.problem_type is None:
-                if self.config.num_labels == 1:
-                    self.config.problem_type = "regression"
-                elif self.config.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
-                    self.config.problem_type = "single_label_classification"
-                else:
-                    self.config.problem_type = "multi_label_classification"
-
-            if self.config.problem_type == "regression":
-                loss_fct = MSELoss()
-                if self.config.num_labels == 1:
-                    loss = loss_fct(logits.squeeze(), labels.squeeze())
-                else:
-                    loss = loss_fct(logits, labels)
-            elif self.config.problem_type == "single_label_classification":
-                loss_fct = CrossEntropyLoss()
-                loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))
-            elif self.config.problem_type == "multi_label_classification":
-                loss_fct = BCEWithLogitsLoss()
-                loss = loss_fct(logits, labels)
-        if not return_dict:
-            output = (logits,) + outputs[1:]
-            return ((loss,) + output) if loss is not None else output
-
-        return Seq2SeqSequenceClassifierOutput(
-            loss=loss,
-            logits=logits,
-            past_key_values=outputs.past_key_values,
-            decoder_hidden_states=outputs.decoder_hidden_states,
-            decoder_attentions=outputs.decoder_attentions,
-            cross_attentions=outputs.cross_attentions,
-            encoder_last_hidden_state=outputs.encoder_last_hidden_state,
-            encoder_hidden_states=outputs.encoder_hidden_states,
-            encoder_attentions=outputs.encoder_attentions,
-        )
-
-
-@add_start_docstrings(
-    """
-    MBART Model with a span classification head on top for extractive question-answering tasks like SQuAD (a linear
-    layer on top of the hidden-states output to compute `span start logits` and `span end logits`).
-    """,
-    MBART_START_DOCSTRING,
-)
-class UnimerMBartForQuestionAnswering(UnimerMBartPreTrainedModel):
-    _tied_weights_keys = ["model.encoder.embed_tokens.weight", "model.decoder.embed_tokens.weight"]
-
-    def __init__(self, config):
-        super().__init__(config)
-
-        config.num_labels = 2
-        self.num_labels = config.num_labels
-
-        self.model = UnimerMBartModel(config)
-        self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels)
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    @add_start_docstrings_to_model_forward(MBART_INPUTS_DOCSTRING)
-    @add_code_sample_docstrings(
-        checkpoint=_CHECKPOINT_FOR_DOC,
-        output_type=Seq2SeqQuestionAnsweringModelOutput,
-        config_class=_CONFIG_FOR_DOC,
-    )
-    # Copied from transformers.models.bart.modeling_bart.BartForQuestionAnswering.forward
-    def forward(
-        self,
-        input_ids: torch.Tensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        decoder_input_ids: Optional[torch.LongTensor] = None,
-        decoder_attention_mask: Optional[torch.LongTensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        decoder_head_mask: Optional[torch.Tensor] = None,
-        cross_attn_head_mask: Optional[torch.Tensor] = None,
-        encoder_outputs: Optional[List[torch.FloatTensor]] = None,
-        start_positions: Optional[torch.LongTensor] = None,
-        end_positions: Optional[torch.LongTensor] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        decoder_inputs_embeds: Optional[torch.FloatTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Tuple, Seq2SeqQuestionAnsweringModelOutput]:
-        r"""
-        start_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
-            Labels for position (index) of the start of the labelled span for computing the token classification loss.
-            Positions are clamped to the length of the sequence (*sequence_length*). Position outside of the sequence
-            are not taken into account for computing the loss.
-        end_positions (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
-            Labels for position (index) of the end of the labelled span for computing the token classification loss.
-            Positions are clamped to the length of the sequence (*sequence_length*). Position outside of the sequence
-            are not taken into account for computing the loss.
-        """
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-        if start_positions is not None and end_positions is not None:
-            use_cache = False
-
-        outputs = self.model(
-            input_ids,
-            attention_mask=attention_mask,
-            decoder_input_ids=decoder_input_ids,
-            decoder_attention_mask=decoder_attention_mask,
-            head_mask=head_mask,
-            decoder_head_mask=decoder_head_mask,
-            cross_attn_head_mask=cross_attn_head_mask,
-            encoder_outputs=encoder_outputs,
-            inputs_embeds=inputs_embeds,
-            decoder_inputs_embeds=decoder_inputs_embeds,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-
-        sequence_output = outputs[0]
-
-        logits = self.qa_outputs(sequence_output)
-        start_logits, end_logits = logits.split(1, dim=-1)
-        start_logits = start_logits.squeeze(-1).contiguous()
-        end_logits = end_logits.squeeze(-1).contiguous()
-
-        total_loss = None
-        if start_positions is not None and end_positions is not None:
-            # If we are on multi-GPU, split add a dimension
-            if len(start_positions.size()) > 1:
-                start_positions = start_positions.squeeze(-1)
-            if len(end_positions.size()) > 1:
-                end_positions = end_positions.squeeze(-1)
-            # sometimes the start/end positions are outside our model inputs, we ignore these terms
-            ignored_index = start_logits.size(1)
-            start_positions = start_positions.clamp(0, ignored_index)
-            end_positions = end_positions.clamp(0, ignored_index)
-
-            loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
-            start_loss = loss_fct(start_logits, start_positions)
-            end_loss = loss_fct(end_logits, end_positions)
-            total_loss = (start_loss + end_loss) / 2
-
-        if not return_dict:
-            output = (
-                start_logits,
-                end_logits,
-            ) + outputs[1:]
-            return ((total_loss,) + output) if total_loss is not None else output
-
-        return Seq2SeqQuestionAnsweringModelOutput(
-            loss=total_loss,
-            start_logits=start_logits,
-            end_logits=end_logits,
-            past_key_values=outputs.past_key_values,
-            decoder_hidden_states=outputs.decoder_hidden_states,
-            decoder_attentions=outputs.decoder_attentions,
-            cross_attentions=outputs.cross_attentions,
-            encoder_last_hidden_state=outputs.encoder_last_hidden_state,
-            encoder_hidden_states=outputs.encoder_hidden_states,
-            encoder_attentions=outputs.encoder_attentions,
-        )
-
-
-# Copied from transformers.models.bart.modeling_bart.BartDecoderWrapper with Bart->MBart
-class UnimerMBartDecoderWrapper(UnimerMBartPreTrainedModel):
-    """
-    This wrapper class is a helper class to correctly load pretrained checkpoints when the causal language model is
-    used in combination with the [`EncoderDecoderModel`] framework.
-    """
-
-    def __init__(self, config):
-        super().__init__(config)
-        self.decoder = UnimerMBartDecoder(config)
-
-    def forward(self, *args, **kwargs):
-        return self.decoder(*args, **kwargs)
-
-
-# Copied from transformers.models.bart.modeling_bart.BartForCausalLM with Bart->MBart, facebook/bart-base->facebook/mbart-large-cc25
-class UnimerMBartForCausalLM(UnimerMBartPreTrainedModel, GenerationMixin):
-    _tied_weights_keys = ["lm_head.weight"]
-
-    def __init__(self, config):
-        config = copy.deepcopy(config)
-        config.is_decoder = True
-        config.is_encoder_decoder = False
-        super().__init__(config)
-        self.model = UnimerMBartDecoderWrapper(config)
-
-        self.lm_head = nn.Linear(config.hidden_size, config.vocab_size, bias=False)
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def get_input_embeddings(self):
-        return self.model.decoder.embed_tokens
-
-    def set_input_embeddings(self, value):
-        self.model.decoder.embed_tokens = value
-
-    def get_output_embeddings(self):
-        return self.lm_head
-
-    def set_output_embeddings(self, new_embeddings):
-        self.lm_head = new_embeddings
-
-    def set_decoder(self, decoder):
-        self.model.decoder = decoder
-
-    def get_decoder(self):
-        return self.model.decoder
-
-    @replace_return_docstrings(output_type=CausalLMOutputWithCrossAttentionsAndCounting, config_class=_CONFIG_FOR_DOC)
-    def forward(
-        self,
-        input_ids: torch.LongTensor = None,
-        attention_mask: Optional[torch.Tensor] = None,
-        encoder_hidden_states: Optional[torch.FloatTensor] = None,
-        encoder_attention_mask: Optional[torch.FloatTensor] = None,
-        head_mask: Optional[torch.Tensor] = None,
-        cross_attn_head_mask: Optional[torch.Tensor] = None,
-        past_key_values: Optional[List[torch.FloatTensor]] = None,
-        inputs_embeds: Optional[torch.FloatTensor] = None,
-        labels: Optional[torch.LongTensor] = None,
-        use_cache: Optional[bool] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        return_dict: Optional[bool] = None,
-        count_gt: Optional[torch.LongTensor] = None,
-    ) -> Union[Tuple, CausalLMOutputWithCrossAttentions]:
-        r"""
-        Args:
-            input_ids (`torch.LongTensor` of shape `(batch_size, sequence_length)`):
-                Indices of input sequence tokens in the vocabulary. Padding will be ignored by default should you
-                provide it.
-
-                Indices can be obtained using [`AutoTokenizer`]. See [`PreTrainedTokenizer.encode`] and
-                [`PreTrainedTokenizer.__call__`] for details.
-
-                [What are input IDs?](../glossary#input-ids)
-            attention_mask (`torch.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
-                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:
-
-                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **masked**.
-
-                [What are attention masks?](../glossary#attention-mask)
-            encoder_hidden_states  (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`, *optional*):
-                Sequence of hidden-states at the output of the last layer of the encoder. Used in the cross-attention
-                if the model is configured as a decoder.
-            encoder_attention_mask (`torch.FloatTensor` of shape `(batch_size, sequence_length)`, *optional*):
-                Mask to avoid performing attention on the padding token indices of the encoder input. This mask is used
-                in the cross-attention if the model is configured as a decoder. Mask values selected in `[0, 1]`:
-            head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
-                Mask to nullify selected heads of the attention modules. Mask values selected in `[0, 1]`:
-
-                - 1 indicates the head is **not masked**,
-                - 0 indicates the head is **masked**.
-
-            cross_attn_head_mask (`torch.Tensor` of shape `(decoder_layers, decoder_attention_heads)`, *optional*):
-                Mask to nullify selected heads of the cross-attention modules. Mask values selected in `[0, 1]`:
-
-                - 1 indicates the head is **not masked**,
-                - 0 indicates the head is **masked**.
-
-            past_key_values (`tuple(tuple(torch.FloatTensor))`, *optional*, returned when `use_cache=True` is passed or when `config.use_cache=True`):
-                Tuple of `tuple(torch.FloatTensor)` of length `config.n_layers`, with each tuple having 2 tensors of
-                shape `(batch_size, num_heads, sequence_length, embed_size_per_head)`) and 2 additional tensors of
-                shape `(batch_size, num_heads, encoder_sequence_length, embed_size_per_head)`. The two additional
-                tensors are only required when the model is used as a decoder in a Sequence to Sequence model.
-
-                Contains pre-computed hidden-states (key and values in the self-attention blocks and in the
-                cross-attention blocks) that can be used (see `past_key_values` input) to speed up sequential decoding.
-
-                If `past_key_values` are used, the user can optionally input only the last `decoder_input_ids` (those
-                that don't have their past key value states given to this model) of shape `(batch_size, 1)` instead of
-                all `decoder_input_ids` of shape `(batch_size, sequence_length)`.
-            labels (`torch.LongTensor` of shape `(batch_size, sequence_length)`, *optional*):
-                Labels for computing the masked language modeling loss. Indices should either be in `[0, ...,
-                config.vocab_size]` or -100 (see `input_ids` docstring). Tokens with indices set to `-100` are ignored
-                (masked), the loss is only computed for the tokens with labels in `[0, ..., config.vocab_size]`.
-            use_cache (`bool`, *optional*):
-                If set to `True`, `past_key_values` key value states are returned and can be used to speed up decoding
-                (see `past_key_values`).
-
-                - 1 for tokens that are **not masked**,
-                - 0 for tokens that are **masked**.
-            output_attentions (`bool`, *optional*):
-                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
-                returned tensors for more detail.
-            output_hidden_states (`bool`, *optional*):
-                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
-                for more detail.
-            return_dict (`bool`, *optional*):
-                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
-
-        Returns:
-
-        Example:
-
-        ```python
-        >>> from transformers import AutoTokenizer, MBartForCausalLM
-
-        >>> tokenizer = AutoTokenizer.from_pretrained("facebook/mbart-large-cc25")
-        >>> model = MBartForCausalLM.from_pretrained("facebook/mbart-large-cc25", add_cross_attention=False)
-        >>> assert model.config.is_decoder, f"{model.__class__} has to be configured as a decoder."
-        >>> inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
-        >>> outputs = model(**inputs)
-
-        >>> logits = outputs.logits
-        >>> expected_shape = [1, inputs.input_ids.shape[-1], model.config.vocab_size]
-        >>> list(logits.shape) == expected_shape
-        True
-        ```"""
-
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        count_pred = None
-
-        # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
-        outputs = self.model.decoder(
-            input_ids=input_ids,
-            attention_mask=attention_mask,
-            count_pred=count_pred,
-            encoder_hidden_states=encoder_hidden_states,
-            encoder_attention_mask=encoder_attention_mask,
-            head_mask=head_mask,
-            cross_attn_head_mask=cross_attn_head_mask,
-            past_key_values=past_key_values,
-            inputs_embeds=inputs_embeds,
-            use_cache=use_cache,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-
-        logits = self.lm_head(outputs[0])
-
-        loss = None
-        if labels is not None:
-            labels = labels.to(logits.device)
-            loss_fct = CrossEntropyLoss()
-            loss = loss_fct(logits.view(-1, self.config.vocab_size), labels.view(-1))
-
-        if not return_dict:
-            output = (logits,) + outputs[1:]
-            return (loss,) + output if loss is not None else output
-
-        return CausalLMOutputWithCrossAttentionsAndCounting(
-            loss=loss,
-            logits=logits,
-            past_key_values=outputs.past_key_values,
-            hidden_states=outputs.hidden_states,
-            attentions=outputs.attentions,
-            cross_attentions=outputs.cross_attentions,
-            counting=count_pred,
-        )
-
-    def prepare_inputs_for_generation(
-        self, input_ids, past_key_values=None, attention_mask=None, use_cache=None, **kwargs
-    ):
-        # if model is used as a decoder in encoder-decoder model, the decoder attention mask is created on the fly
-        if attention_mask is None:
-            attention_mask = input_ids.new_ones(input_ids.shape)
-
-        if past_key_values:
-            past_length = past_key_values[0][0].shape[2]
-
-            # Some generation methods already pass only the last input ID
-            if input_ids.shape[1] > past_length:
-                remove_prefix_length = past_length
-            else:
-                # Default to old behavior: keep only final ID
-                remove_prefix_length = input_ids.shape[1] - 1
-
-            input_ids = input_ids[:, remove_prefix_length:]
-        # first step, decoder_cached_states are empty
-        return {
-            "input_ids": input_ids,  # encoder_outputs is defined. input_ids not needed
-            "attention_mask": attention_mask,
-            "past_key_values": past_key_values,
-            "use_cache": use_cache,
-        }
-
-    @staticmethod
-    def _reorder_cache(past_key_values, beam_idx):
-        reordered_past = ()
-        for layer_past in past_key_values:
-            reordered_past += (
-                tuple(past_state.index_select(0, beam_idx.to(past_state.device)) for past_state in layer_past),
-            )
-        return reordered_past
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_mbart/tokenization_unimer_mbart.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py
deleted file mode 100644
index 0b91b3be3580e7b484deb4deae3dfe880e477906..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/__init__.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from .configuration_unimer_swin import UnimerSwinConfig
-from .modeling_unimer_swin import UnimerSwinModel
-from .image_processing_unimer_swin import UnimerSwinImageProcessor
-
-__all__ = [
-    "UnimerSwinConfig",
-    "UnimerSwinModel",
-    "UnimerSwinImageProcessor",
-]
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py
deleted file mode 100644
index 6c577e7c98dc6a9813af7c56ba15f78232283679..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/configuration_unimer_swin.py
+++ /dev/null
@@ -1,132 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""Donut Swin Transformer model configuration"""
-
-from transformers.configuration_utils import PretrainedConfig
-from transformers.utils import logging
-
-
-logger = logging.get_logger(__name__)
-
-
-class UnimerSwinConfig(PretrainedConfig):
-    r"""
-    This is the configuration class to store the configuration of a [`UnimerSwinModel`]. It is used to instantiate a
-    Donut model according to the specified arguments, defining the model architecture. Instantiating a configuration
-    with the defaults will yield a similar configuration to that of the Donut
-    [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) architecture.
-
-    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
-    documentation from [`PretrainedConfig`] for more information.
-
-    Args:
-        image_size (`int`, *optional*, defaults to 224):
-            The size (resolution) of each image.
-        patch_size (`int`, *optional*, defaults to 4):
-            The size (resolution) of each patch.
-        num_channels (`int`, *optional*, defaults to 3):
-            The number of input channels.
-        embed_dim (`int`, *optional*, defaults to 96):
-            Dimensionality of patch embedding.
-        depths (`list(int)`, *optional*, defaults to `[2, 2, 6, 2]`):
-            Depth of each layer in the Transformer encoder.
-        num_heads (`list(int)`, *optional*, defaults to `[3, 6, 12, 24]`):
-            Number of attention heads in each layer of the Transformer encoder.
-        window_size (`int`, *optional*, defaults to 7):
-            Size of windows.
-        mlp_ratio (`float`, *optional*, defaults to 4.0):
-            Ratio of MLP hidden dimensionality to embedding dimensionality.
-        qkv_bias (`bool`, *optional*, defaults to `True`):
-            Whether or not a learnable bias should be added to the queries, keys and values.
-        hidden_dropout_prob (`float`, *optional*, defaults to 0.0):
-            The dropout probability for all fully connected layers in the embeddings and encoder.
-        attention_probs_dropout_prob (`float`, *optional*, defaults to 0.0):
-            The dropout ratio for the attention probabilities.
-        drop_path_rate (`float`, *optional*, defaults to 0.1):
-            Stochastic depth rate.
-        hidden_act (`str` or `function`, *optional*, defaults to `"gelu"`):
-            The non-linear activation function (function or string) in the encoder. If string, `"gelu"`, `"relu"`,
-            `"selu"` and `"gelu_new"` are supported.
-        use_absolute_embeddings (`bool`, *optional*, defaults to `False`):
-            Whether or not to add absolute position embeddings to the patch embeddings.
-        initializer_range (`float`, *optional*, defaults to 0.02):
-            The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
-        layer_norm_eps (`float`, *optional*, defaults to 1e-05):
-            The epsilon used by the layer normalization layers.
-
-    Example:
-
-    ```python
-    >>> from transformers import UnimerSwinConfig, UnimerSwinModel
-
-    >>> # Initializing a Donut naver-clova-ix/donut-base style configuration
-    >>> configuration = UnimerSwinConfig()
-
-    >>> # Randomly initializing a model from the naver-clova-ix/donut-base style configuration
-    >>> model = UnimerSwinModel(configuration)
-
-    >>> # Accessing the model configuration
-    >>> configuration = model.config
-    ```"""
-
-    model_type = "unimer-swin"
-
-    attribute_map = {
-        "num_attention_heads": "num_heads",
-        "num_hidden_layers": "num_layers",
-    }
-
-    def __init__(
-        self,
-        image_size=224,
-        patch_size=4,
-        num_channels=3,
-        embed_dim=96,
-        depths=[2, 2, 6, 2],
-        num_heads=[3, 6, 12, 24],
-        window_size=7,
-        mlp_ratio=4.0,
-        qkv_bias=True,
-        hidden_dropout_prob=0.0,
-        attention_probs_dropout_prob=0.0,
-        drop_path_rate=0.1,
-        hidden_act="gelu",
-        use_absolute_embeddings=False,
-        initializer_range=0.02,
-        layer_norm_eps=1e-5,
-        **kwargs,
-    ):
-        super().__init__(**kwargs)
-
-        self.image_size = image_size
-        self.patch_size = patch_size
-        self.num_channels = num_channels
-        self.embed_dim = embed_dim
-        self.depths = depths
-        self.num_layers = len(depths)
-        self.num_heads = num_heads
-        self.window_size = window_size
-        self.mlp_ratio = mlp_ratio
-        self.qkv_bias = qkv_bias
-        self.hidden_dropout_prob = hidden_dropout_prob
-        self.attention_probs_dropout_prob = attention_probs_dropout_prob
-        self.drop_path_rate = drop_path_rate
-        self.hidden_act = hidden_act
-        self.use_absolute_embeddings = use_absolute_embeddings
-        self.layer_norm_eps = layer_norm_eps
-        self.initializer_range = initializer_range
-        # we set the hidden_size attribute in order to make Swin work with VisionEncoderDecoderModel
-        # this indicates the channel dimension after the last stage of the model
-        self.hidden_size = int(embed_dim * 2 ** (len(depths) - 1))
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py
deleted file mode 100644
index a16d2433751d294bf1aed2022c466cc6dbaef15b..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/image_processing_unimer_swin.py
+++ /dev/null
@@ -1,132 +0,0 @@
-from transformers.image_processing_utils import BaseImageProcessor
-import numpy as np
-import cv2
-import albumentations as alb
-from albumentations.pytorch import ToTensorV2
-
-
-# TODO: dereference cv2 if possible
-class UnimerSwinImageProcessor(BaseImageProcessor):
-    def __init__(
-            self,
-            image_size = (192, 672),
-        ):
-        self.input_size = [int(_) for _ in image_size]
-        assert len(self.input_size) == 2
-    
-        self.transform = alb.Compose(
-            [
-                alb.ToGray(),
-                alb.Normalize((0.7931, 0.7931, 0.7931), (0.1738, 0.1738, 0.1738)),
-                # alb.Sharpen()
-                ToTensorV2(),
-            ]
-        )
-
-    def __call__(self, item):
-        image = self.prepare_input(item)
-        return self.transform(image=image)['image'][:1]
-
-    @staticmethod
-    def crop_margin_numpy(img: np.ndarray) -> np.ndarray:
-        """Crop margins of image using NumPy operations"""
-        # Convert to grayscale if it's a color image
-        if len(img.shape) == 3 and img.shape[2] == 3:
-            gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-        else:
-            gray = img.copy()
-
-        # Normalize and threshold
-        if gray.max() == gray.min():
-            return img
-
-        normalized = (((gray - gray.min()) / (gray.max() - gray.min())) * 255).astype(np.uint8)
-        binary = 255 * (normalized < 200).astype(np.uint8)
-
-        # Find bounding box
-        coords = cv2.findNonZero(binary)  # Find all non-zero points (text)
-        x, y, w, h = cv2.boundingRect(coords)  # Find minimum spanning bounding box
-
-        # Return cropped image
-        return img[y:y + h, x:x + w]
-
-    def prepare_input(self, img, random_padding: bool = False):
-        """
-        Convert PIL Image or numpy array to properly sized and padded image after:
-            - crop margins
-            - resize while maintaining aspect ratio
-            - pad to target size
-        """
-        if img is None:
-            return None
-
-        # try:
-        #     img = self.crop_margin_numpy(img)
-        # except Exception:
-        #     # might throw an error for broken files
-        #     return None
-
-        if img.shape[0] == 0 or img.shape[1] == 0:
-            return None
-
-        # Get current dimensions
-        h, w = img.shape[:2]
-        target_h, target_w = self.input_size
-
-        # Calculate scale to preserve aspect ratio (equivalent to resize + thumbnail)
-        scale = min(target_h / h, target_w / w)
-
-        # Calculate new dimensions
-        new_h, new_w = int(h * scale), int(w * scale)
-
-        # Resize the image while preserving aspect ratio
-        resized_img = cv2.resize(img, (new_w, new_h))
-
-        # Calculate padding values using the existing method
-        delta_width = target_w - new_w
-        delta_height = target_h - new_h
-
-        pad_width, pad_height = self._get_padding_values(new_w, new_h, random_padding)
-
-        # Apply padding (convert PIL padding format to OpenCV format)
-        padding_color = [0, 0, 0] if len(img.shape) == 3 else [0]
-
-        padded_img = cv2.copyMakeBorder(
-            resized_img,
-            pad_height,  # top
-            delta_height - pad_height,  # bottom
-            pad_width,  # left
-            delta_width - pad_width,  # right
-            cv2.BORDER_CONSTANT,
-            value=padding_color
-        )
-
-        return padded_img
-
-    def _calculate_padding(self, new_w, new_h, random_padding):
-        """Calculate padding values for PIL images"""
-        delta_width = self.input_size[1] - new_w
-        delta_height = self.input_size[0] - new_h
-
-        pad_width, pad_height = self._get_padding_values(new_w, new_h, random_padding)
-
-        return (
-            pad_width,
-            pad_height,
-            delta_width - pad_width,
-            delta_height - pad_height,
-        )
-
-    def _get_padding_values(self, new_w, new_h, random_padding):
-        """Get padding values based on image dimensions and padding strategy"""
-        delta_width = self.input_size[1] - new_w
-        delta_height = self.input_size[0] - new_h
-
-        if random_padding:
-            pad_width = np.random.randint(low=0, high=delta_width + 1)
-            pad_height = np.random.randint(low=0, high=delta_height + 1)
-        else:
-            pad_width = delta_width // 2
-            pad_height = delta_height // 2
-
-        return pad_width, pad_height
diff --git a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py b/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py
deleted file mode 100644
index 1b808e8bdc2b2c760598ca5d0dbd2705e42f1072..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/mfr/unimernet/unimernet_hf/unimer_swin/modeling_unimer_swin.py
+++ /dev/null
@@ -1,1084 +0,0 @@
-# coding=utf-8
-# Copyright 2022 The HuggingFace Inc. team. All rights reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""PyTorch UnimerSwin Transformer model.
-
-This implementation is identical to a regular Swin Transformer, without final layer norm on top of the final hidden
-states."""
-
-import collections.abc
-import math
-from dataclasses import dataclass
-from typing import Optional, Tuple, Union
-
-import torch
-import torch.utils.checkpoint
-from torch import nn
-
-from transformers.activations import ACT2FN
-from transformers.modeling_utils import PreTrainedModel
-from transformers.pytorch_utils import find_pruneable_heads_and_indices, meshgrid, prune_linear_layer
-from transformers.utils import (
-    ModelOutput,
-    add_code_sample_docstrings,
-    add_start_docstrings,
-    add_start_docstrings_to_model_forward,
-    logging,
-    torch_int,
-)
-from .configuration_unimer_swin import UnimerSwinConfig
-
-
-logger = logging.get_logger(__name__)
-
-# General docstring
-_CONFIG_FOR_DOC = "UnimerSwinConfig"
-
-# Base docstring
-_CHECKPOINT_FOR_DOC = "https://huggingface.co/naver-clova-ix/donut-base"
-_EXPECTED_OUTPUT_SHAPE = [1, 49, 768]
-
-
-@dataclass
-# Copied from transformers.models.swin.modeling_swin.SwinEncoderOutput with Swin->UnimerSwin
-class UnimerSwinEncoderOutput(ModelOutput):
-    """
-    UnimerSwin encoder's outputs, with potential hidden states and attentions.
-
-    Args:
-        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
-            Sequence of hidden-states at the output of the last layer of the model.
-        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
-            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
-            shape `(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
-        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
-            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
-            sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
-            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
-            shape `(batch_size, hidden_size, height, width)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
-            include the spatial dimensions.
-    """
-
-    last_hidden_state: torch.FloatTensor = None
-    hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
-    attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
-    reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
-
-
-@dataclass
-# Copied from transformers.models.swin.modeling_swin.SwinModelOutput with Swin->UnimerSwin
-class UnimerSwinModelOutput(ModelOutput):
-    """
-    UnimerSwin model's outputs that also contains a pooling of the last hidden states.
-
-    Args:
-        last_hidden_state (`torch.FloatTensor` of shape `(batch_size, sequence_length, hidden_size)`):
-            Sequence of hidden-states at the output of the last layer of the model.
-        pooler_output (`torch.FloatTensor` of shape `(batch_size, hidden_size)`, *optional*, returned when `add_pooling_layer=True` is passed):
-            Average pooling of the last layer hidden-state.
-        hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
-            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
-            shape `(batch_size, sequence_length, hidden_size)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs.
-        attentions (`tuple(torch.FloatTensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
-            Tuple of `torch.FloatTensor` (one for each stage) of shape `(batch_size, num_heads, sequence_length,
-            sequence_length)`.
-
-            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
-            heads.
-        reshaped_hidden_states (`tuple(torch.FloatTensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
-            Tuple of `torch.FloatTensor` (one for the output of the embeddings + one for the output of each stage) of
-            shape `(batch_size, hidden_size, height, width)`.
-
-            Hidden-states of the model at the output of each layer plus the initial embedding outputs reshaped to
-            include the spatial dimensions.
-    """
-
-    last_hidden_state: torch.FloatTensor = None
-    pooler_output: Optional[torch.FloatTensor] = None
-    hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
-    attentions: Optional[Tuple[torch.FloatTensor, ...]] = None
-    reshaped_hidden_states: Optional[Tuple[torch.FloatTensor, ...]] = None
-
-
-# Copied from transformers.models.swin.modeling_swin.window_partition
-def window_partition(input_feature, window_size):
-    """
-    Partitions the given input into windows.
-    """
-    batch_size, height, width, num_channels = input_feature.shape
-    input_feature = input_feature.view(
-        batch_size, height // window_size, window_size, width // window_size, window_size, num_channels
-    )
-    windows = input_feature.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, window_size, window_size, num_channels)
-    return windows
-
-
-# Copied from transformers.models.swin.modeling_swin.window_reverse
-def window_reverse(windows, window_size, height, width):
-    """
-    Merges windows to produce higher resolution features.
-    """
-    num_channels = windows.shape[-1]
-    windows = windows.view(-1, height // window_size, width // window_size, window_size, window_size, num_channels)
-    windows = windows.permute(0, 1, 3, 2, 4, 5).contiguous().view(-1, height, width, num_channels)
-    return windows
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinEmbeddings with Swin->UnimerSwin
-class UnimerSwinEmbeddings(nn.Module):
-    """
-    Construct the patch and position embeddings. Optionally, also the mask token.
-    """
-
-    def __init__(self, config, use_mask_token=False):
-        super().__init__()
-
-        self.patch_embeddings = UnimerSwinPatchEmbeddings(config)
-        num_patches = self.patch_embeddings.num_patches
-        self.patch_grid = self.patch_embeddings.grid_size
-        self.mask_token = nn.Parameter(torch.zeros(1, 1, config.embed_dim)) if use_mask_token else None
-
-        if config.use_absolute_embeddings:
-            self.position_embeddings = nn.Parameter(torch.zeros(1, num_patches + 1, config.embed_dim))
-        else:
-            self.position_embeddings = None
-
-        ### code added. ###
-        if config.use_2d_embeddings:
-            self.row_embeddings = nn.Parameter(torch.zeros(1, self.patch_grid[0] + 1, config.embed_dim))
-            self.column_embeddings = nn.Parameter(torch.zeros(1, self.patch_grid[1] + 1, config.embed_dim))
-        else:
-            self.row_embeddings = None
-            self.column_embeddings = None
-        ######
-
-        self.norm = nn.LayerNorm(config.embed_dim)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-    def interpolate_pos_encoding(self, embeddings: torch.Tensor, height: int, width: int) -> torch.Tensor:
-        """
-        This method allows to interpolate the pre-trained position encodings, to be able to use the model on higher
-        resolution images.
-
-        Source:
-        https://github.com/facebookresearch/dino/blob/de9ee3df6cf39fac952ab558447af1fa1365362a/vision_transformer.py#L174
-        """
-
-        num_patches = embeddings.shape[1] - 1
-        num_positions = self.position_embeddings.shape[1] - 1
-        if num_patches == num_positions and height == width:
-            return self.position_embeddings
-        class_pos_embed = self.position_embeddings[:, 0]
-        patch_pos_embed = self.position_embeddings[:, 1:]
-        dim = embeddings.shape[-1]
-        h0 = height // self.config.patch_size
-        w0 = width // self.config.patch_size
-        # we add a small number to avoid floating point error in the interpolation
-        # see discussion at https://github.com/facebookresearch/dino/issues/8
-        h0, w0 = h0 + 0.1, w0 + 0.1
-        patch_pos_embed = patch_pos_embed.reshape(1, int(math.sqrt(num_positions)), int(math.sqrt(num_positions)), dim)
-        patch_pos_embed = patch_pos_embed.permute(0, 3, 1, 2)
-        patch_pos_embed = nn.functional.interpolate(
-            patch_pos_embed,
-            scale_factor=(h0 / math.sqrt(num_positions), w0 / math.sqrt(num_positions)),
-            mode="bicubic",
-            align_corners=False,
-        )
-        patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
-        return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
-
-    def forward(
-        self,
-        pixel_values: Optional[torch.FloatTensor],
-        bool_masked_pos: Optional[torch.BoolTensor] = None,
-        interpolate_pos_encoding: bool = False,
-    ) -> Tuple[torch.Tensor]:
-        _, num_channels, height, width = pixel_values.shape
-        embeddings, output_dimensions = self.patch_embeddings(pixel_values)
-        embeddings = self.norm(embeddings)
-        batch_size, seq_len, _ = embeddings.size()
-
-        if bool_masked_pos is not None:
-            mask_tokens = self.mask_token.expand(batch_size, seq_len, -1)
-            # replace the masked visual tokens by mask_tokens
-            mask = bool_masked_pos.unsqueeze(-1).type_as(mask_tokens)
-            embeddings = embeddings * (1.0 - mask) + mask_tokens * mask
-
-        if self.position_embeddings is not None:
-            # if interpolate_pos_encoding:
-            #     embeddings = embeddings + self.interpolate_pos_encoding(embeddings, height, width)
-            # else:
-            #     embeddings = embeddings + self.position_embeddings
-            embeddings = embeddings + self.position_embeddings[:, :seq_len, :] # code edited.
-
-        ### code added. ###
-        if self.row_embeddings is not None and self.column_embeddings is not None:
-            # Repeat the x position embeddings across the y axis like 0, 1, 2, 3, 0, 1, 2, 3, ...
-            row_embeddings = self.row_embeddings[:, :output_dimensions[0], :].repeat_interleave(output_dimensions[1], dim=1)
-            column_embeddings = self.column_embeddings[:, :output_dimensions[1], :].repeat(1, output_dimensions[0], 1)
-            embeddings = embeddings + row_embeddings + column_embeddings
-        ######
-
-        embeddings = self.dropout(embeddings)
-
-        return embeddings, output_dimensions
-
-class StemLayer(nn.Module):
-    r""" Stem layer of InternImage
-    Args:
-        in_chans (int): number of input channels
-        out_chans (int): number of output channels
-        act_layer (str): activation layer
-        norm_layer (str): normalization layer
-    """
-
-    def __init__(self, in_chans=3, out_chans=96, act_layer=nn.GELU, norm_layer='BN'):
-        super().__init__()
-        self.conv1 = nn.Conv2d(in_chans, out_chans // 2, kernel_size=3, stride=2, padding=1)
-        self.norm1 = self.build_norm_layer(out_chans // 2, norm_layer)
-        self.act = act_layer()
-        self.conv2 = nn.Conv2d(out_chans // 2, out_chans, kernel_size=3, stride=2, padding=1)
-
-    def build_norm_layer(self, dim, norm_layer):
-        layers = []
-        if norm_layer == 'BN':
-            layers.append(nn.BatchNorm2d(dim))
-        else:
-            raise NotImplementedError(f'build_norm_layer does not support {norm_layer}')
-        return nn.Sequential(*layers)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.norm1(x)
-        x = self.act(x)
-        x = self.conv2(x)
-        return x
-
-# Copied from transformers.models.swin.modeling_swin.SwinPatchEmbeddings with Swin->UnimerSwin
-class UnimerSwinPatchEmbeddings(nn.Module):
-    """
-    This class turns `pixel_values` of shape `(batch_size, num_channels, height, width)` into the initial
-    `hidden_states` (patch embeddings) of shape `(batch_size, seq_length, hidden_size)` to be consumed by a
-    Transformer.
-    """
-
-    def __init__(self, config):
-        super().__init__()
-        image_size, patch_size = config.image_size, config.patch_size
-        num_channels, hidden_size = config.num_channels, config.embed_dim
-        image_size = image_size if isinstance(image_size, collections.abc.Iterable) else (image_size, image_size)
-        patch_size = patch_size if isinstance(patch_size, collections.abc.Iterable) else (patch_size, patch_size)
-        num_patches = (image_size[1] // patch_size[1]) * (image_size[0] // patch_size[0])
-        self.image_size = image_size
-        self.patch_size = patch_size
-        self.num_channels = num_channels
-        self.num_patches = num_patches
-        self.grid_size = (image_size[0] // patch_size[0], image_size[1] // patch_size[1])
-
-        ### code edited. ###
-        # self.projection = nn.Conv2d(num_channels, hidden_size, kernel_size=patch_size, stride=patch_size)
-        self.projection = StemLayer(in_chans=num_channels, out_chans=hidden_size)
-        ###
-
-    def maybe_pad(self, pixel_values, height, width):
-        if width % self.patch_size[1] != 0:
-            pad_values = (0, self.patch_size[1] - width % self.patch_size[1])
-            pixel_values = nn.functional.pad(pixel_values, pad_values)
-        if height % self.patch_size[0] != 0:
-            pad_values = (0, 0, 0, self.patch_size[0] - height % self.patch_size[0])
-            pixel_values = nn.functional.pad(pixel_values, pad_values)
-        return pixel_values
-
-    def forward(self, pixel_values: Optional[torch.FloatTensor]) -> Tuple[torch.Tensor, Tuple[int]]:
-        _, num_channels, height, width = pixel_values.shape
-        # pad the input to be divisible by self.patch_size, if needed
-        pixel_values = self.maybe_pad(pixel_values, height, width)
-        embeddings = self.projection(pixel_values)
-        _, _, height, width = embeddings.shape
-        output_dimensions = (height, width)
-        embeddings = embeddings.flatten(2).transpose(1, 2)
-
-        return embeddings, output_dimensions
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinPatchMerging
-class UnimerSwinPatchMerging(nn.Module):
-    """
-    Patch Merging Layer.
-
-    Args:
-        input_resolution (`Tuple[int]`):
-            Resolution of input feature.
-        dim (`int`):
-            Number of input channels.
-        norm_layer (`nn.Module`, *optional*, defaults to `nn.LayerNorm`):
-            Normalization layer class.
-    """
-
-    def __init__(self, input_resolution: Tuple[int], dim: int, norm_layer: nn.Module = nn.LayerNorm) -> None:
-        super().__init__()
-        self.input_resolution = input_resolution
-        self.dim = dim
-        self.reduction = nn.Linear(4 * dim, 2 * dim, bias=False)
-        self.norm = norm_layer(4 * dim)
-
-    def maybe_pad(self, input_feature, height, width):
-        should_pad = (height % 2 == 1) or (width % 2 == 1)
-        if should_pad:
-            pad_values = (0, 0, 0, width % 2, 0, height % 2)
-            input_feature = nn.functional.pad(input_feature, pad_values)
-
-        return input_feature
-
-    def forward(self, input_feature: torch.Tensor, input_dimensions: Tuple[int, int]) -> torch.Tensor:
-        height, width = input_dimensions
-        # `dim` is height * width
-        batch_size, dim, num_channels = input_feature.shape
-
-        input_feature = input_feature.view(batch_size, height, width, num_channels)
-        # pad input to be disible by width and height, if needed
-        input_feature = self.maybe_pad(input_feature, height, width)
-        # [batch_size, height/2, width/2, num_channels]
-        input_feature_0 = input_feature[:, 0::2, 0::2, :]
-        # [batch_size, height/2, width/2, num_channels]
-        input_feature_1 = input_feature[:, 1::2, 0::2, :]
-        # [batch_size, height/2, width/2, num_channels]
-        input_feature_2 = input_feature[:, 0::2, 1::2, :]
-        # [batch_size, height/2, width/2, num_channels]
-        input_feature_3 = input_feature[:, 1::2, 1::2, :]
-        # batch_size height/2 width/2 4*num_channels
-        input_feature = torch.cat([input_feature_0, input_feature_1, input_feature_2, input_feature_3], -1)
-        input_feature = input_feature.view(batch_size, -1, 4 * num_channels)  # batch_size height/2*width/2 4*C
-
-        input_feature = self.norm(input_feature)
-        input_feature = self.reduction(input_feature)
-
-        return input_feature
-
-
-# Copied from transformers.models.beit.modeling_beit.drop_path
-def drop_path(input: torch.Tensor, drop_prob: float = 0.0, training: bool = False) -> torch.Tensor:
-    """
-    Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-
-    Comment by Ross Wightman: This is the same as the DropConnect impl I created for EfficientNet, etc networks,
-    however, the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for changing the
-    layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use 'survival rate' as the
-    argument.
-    """
-    if drop_prob == 0.0 or not training:
-        return input
-    keep_prob = 1 - drop_prob
-    shape = (input.shape[0],) + (1,) * (input.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
-    random_tensor = keep_prob + torch.rand(shape, dtype=input.dtype, device=input.device)
-    random_tensor.floor_()  # binarize
-    output = input.div(keep_prob) * random_tensor
-    return output
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinDropPath
-class UnimerSwinDropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks)."""
-
-    def __init__(self, drop_prob: Optional[float] = None) -> None:
-        super().__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        return drop_path(hidden_states, self.drop_prob, self.training)
-
-    def extra_repr(self) -> str:
-        return "p={}".format(self.drop_prob)
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinSelfAttention with Swin->UnimerSwin
-class UnimerSwinSelfAttention(nn.Module):
-    def __init__(self, config, dim, num_heads, window_size):
-        super().__init__()
-        if dim % num_heads != 0:
-            raise ValueError(
-                f"The hidden size ({dim}) is not a multiple of the number of attention heads ({num_heads})"
-            )
-
-        self.num_attention_heads = num_heads
-        self.attention_head_size = int(dim / num_heads)
-        self.all_head_size = self.num_attention_heads * self.attention_head_size
-        self.window_size = (
-            window_size if isinstance(window_size, collections.abc.Iterable) else (window_size, window_size)
-        )
-
-        self.relative_position_bias_table = nn.Parameter(
-            torch.zeros((2 * self.window_size[0] - 1) * (2 * self.window_size[1] - 1), num_heads)
-        )
-
-        # get pair-wise relative position index for each token inside the window
-        coords_h = torch.arange(self.window_size[0])
-        coords_w = torch.arange(self.window_size[1])
-        coords = torch.stack(meshgrid([coords_h, coords_w], indexing="ij"))
-        coords_flatten = torch.flatten(coords, 1)
-        relative_coords = coords_flatten[:, :, None] - coords_flatten[:, None, :]
-        relative_coords = relative_coords.permute(1, 2, 0).contiguous()
-        relative_coords[:, :, 0] += self.window_size[0] - 1
-        relative_coords[:, :, 1] += self.window_size[1] - 1
-        relative_coords[:, :, 0] *= 2 * self.window_size[1] - 1
-        relative_position_index = relative_coords.sum(-1)
-        self.register_buffer("relative_position_index", relative_position_index)
-
-        self.query = nn.Linear(self.all_head_size, self.all_head_size, bias=config.qkv_bias)
-        self.key = nn.Linear(self.all_head_size, self.all_head_size, bias=config.qkv_bias)
-        self.value = nn.Linear(self.all_head_size, self.all_head_size, bias=config.qkv_bias)
-
-        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
-
-    def transpose_for_scores(self, x):
-        new_x_shape = x.size()[:-1] + (self.num_attention_heads, self.attention_head_size)
-        x = x.view(new_x_shape)
-        return x.permute(0, 2, 1, 3)
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.FloatTensor] = None,
-        head_mask: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = False,
-    ) -> Tuple[torch.Tensor]:
-        batch_size, dim, num_channels = hidden_states.shape
-        mixed_query_layer = self.query(hidden_states)
-
-        key_layer = self.transpose_for_scores(self.key(hidden_states))
-        value_layer = self.transpose_for_scores(self.value(hidden_states))
-        query_layer = self.transpose_for_scores(mixed_query_layer)
-
-        # Take the dot product between "query" and "key" to get the raw attention scores.
-        attention_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2))
-
-        attention_scores = attention_scores / math.sqrt(self.attention_head_size)
-
-        relative_position_bias = self.relative_position_bias_table[self.relative_position_index.view(-1)]
-        relative_position_bias = relative_position_bias.view(
-            self.window_size[0] * self.window_size[1], self.window_size[0] * self.window_size[1], -1
-        )
-
-        relative_position_bias = relative_position_bias.permute(2, 0, 1).contiguous()
-        attention_scores = attention_scores + relative_position_bias.unsqueeze(0)
-
-        if attention_mask is not None:
-            # Apply the attention mask is (precomputed for all layers in UnimerSwinModel forward() function)
-            mask_shape = attention_mask.shape[0]
-            attention_scores = attention_scores.view(
-                batch_size // mask_shape, mask_shape, self.num_attention_heads, dim, dim
-            )
-            attention_scores = attention_scores + attention_mask.unsqueeze(1).unsqueeze(0)
-            attention_scores = attention_scores.view(-1, self.num_attention_heads, dim, dim)
-
-        # Normalize the attention scores to probabilities.
-        attention_probs = nn.functional.softmax(attention_scores, dim=-1)
-
-        # This is actually dropping out entire tokens to attend to, which might
-        # seem a bit unusual, but is taken from the original Transformer paper.
-        attention_probs = self.dropout(attention_probs)
-
-        # Mask heads if we want to
-        if head_mask is not None:
-            attention_probs = attention_probs * head_mask
-
-        context_layer = torch.matmul(attention_probs, value_layer)
-        context_layer = context_layer.permute(0, 2, 1, 3).contiguous()
-        new_context_layer_shape = context_layer.size()[:-2] + (self.all_head_size,)
-        context_layer = context_layer.view(new_context_layer_shape)
-
-        outputs = (context_layer, attention_probs) if output_attentions else (context_layer,)
-
-        return outputs
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinSelfOutput
-class UnimerSwinSelfOutput(nn.Module):
-    def __init__(self, config, dim):
-        super().__init__()
-        self.dense = nn.Linear(dim, dim)
-        self.dropout = nn.Dropout(config.attention_probs_dropout_prob)
-
-    def forward(self, hidden_states: torch.Tensor, input_tensor: torch.Tensor) -> torch.Tensor:
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-
-        return hidden_states
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinAttention with Swin->UnimerSwin
-class UnimerSwinAttention(nn.Module):
-    def __init__(self, config, dim, num_heads, window_size):
-        super().__init__()
-        self.self = UnimerSwinSelfAttention(config, dim, num_heads, window_size)
-        self.output = UnimerSwinSelfOutput(config, dim)
-        self.pruned_heads = set()
-
-    def prune_heads(self, heads):
-        if len(heads) == 0:
-            return
-        heads, index = find_pruneable_heads_and_indices(
-            heads, self.self.num_attention_heads, self.self.attention_head_size, self.pruned_heads
-        )
-
-        # Prune linear layers
-        self.self.query = prune_linear_layer(self.self.query, index)
-        self.self.key = prune_linear_layer(self.self.key, index)
-        self.self.value = prune_linear_layer(self.self.value, index)
-        self.output.dense = prune_linear_layer(self.output.dense, index, dim=1)
-
-        # Update hyper params and store pruned heads
-        self.self.num_attention_heads = self.self.num_attention_heads - len(heads)
-        self.self.all_head_size = self.self.attention_head_size * self.self.num_attention_heads
-        self.pruned_heads = self.pruned_heads.union(heads)
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        attention_mask: Optional[torch.FloatTensor] = None,
-        head_mask: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = False,
-    ) -> Tuple[torch.Tensor]:
-        self_outputs = self.self(hidden_states, attention_mask, head_mask, output_attentions)
-        attention_output = self.output(self_outputs[0], hidden_states)
-        outputs = (attention_output,) + self_outputs[1:]  # add attentions if we output them
-        return outputs
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinIntermediate
-class UnimerSwinIntermediate(nn.Module):
-    def __init__(self, config, dim):
-        super().__init__()
-        self.dense = nn.Linear(dim, int(config.mlp_ratio * dim))
-        if isinstance(config.hidden_act, str):
-            self.intermediate_act_fn = ACT2FN[config.hidden_act]
-        else:
-            self.intermediate_act_fn = config.hidden_act
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.intermediate_act_fn(hidden_states)
-        return hidden_states
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinOutput
-class UnimerSwinOutput(nn.Module):
-    def __init__(self, config, dim):
-        super().__init__()
-        self.dense = nn.Linear(int(config.mlp_ratio * dim), dim)
-        self.dropout = nn.Dropout(config.hidden_dropout_prob)
-
-    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
-        hidden_states = self.dense(hidden_states)
-        hidden_states = self.dropout(hidden_states)
-        return hidden_states
-
-
-class ConvEnhance(nn.Module):
-    """Depth-wise convolution to get the positional information.
-    """
-    def __init__(self, config, dim, k=3):
-        super(ConvEnhance, self).__init__()
-        self.proj = nn.Conv2d(dim,
-                              dim,
-                              (k,k),
-                              (1,1),
-                              (k // 2,k // 2),
-                              groups=dim)
-        self.act_fn = ACT2FN[config.hidden_act]
-
-    def forward(self, x, size: Tuple[int, int]):
-        B, N, C = x.shape
-        H, W = size
-        assert N == H * W
-
-        feat = x.transpose(1, 2).view(B, C, H, W)
-        feat = self.proj(feat)
-        feat = self.act_fn(feat)
-        feat = feat.flatten(2).transpose(1, 2)
-
-        x = x + feat
-        return x
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinLayer with Swin->UnimerSwin
-class UnimerSwinLayer(nn.Module):
-    def __init__(self, config, dim, input_resolution, num_heads, shift_size=0):
-        super().__init__()
-        self.chunk_size_feed_forward = config.chunk_size_feed_forward
-        self.shift_size = shift_size
-        self.window_size = config.window_size
-        self.input_resolution = input_resolution
-        self.layernorm_before = nn.LayerNorm(dim, eps=config.layer_norm_eps)
-
-        self.ce = nn.ModuleList([ConvEnhance(config, dim=dim, k=3),
-                                  ConvEnhance(config, dim=dim, k=3)])
-
-        self.attention = UnimerSwinAttention(config, dim, num_heads, window_size=self.window_size)
-        self.drop_path = UnimerSwinDropPath(config.drop_path_rate) if config.drop_path_rate > 0.0 else nn.Identity()
-        self.layernorm_after = nn.LayerNorm(dim, eps=config.layer_norm_eps)
-        self.intermediate = UnimerSwinIntermediate(config, dim)
-        self.output = UnimerSwinOutput(config, dim)
-
-    def set_shift_and_window_size(self, input_resolution):
-        if min(input_resolution) <= self.window_size:
-            # if window size is larger than input resolution, we don't partition windows
-            self.shift_size = torch_int(0)
-            self.window_size = (
-                torch.min(torch.tensor(input_resolution)) if torch.jit.is_tracing() else min(input_resolution)
-            )
-
-    def get_attn_mask(self, height, width, dtype, device):
-        if self.shift_size > 0:
-            # calculate attention mask for SW-MSA
-            img_mask = torch.zeros((1, height, width, 1), dtype=dtype, device=device)
-            height_slices = (
-                slice(0, -self.window_size),
-                slice(-self.window_size, -self.shift_size),
-                slice(-self.shift_size, None),
-            )
-            width_slices = (
-                slice(0, -self.window_size),
-                slice(-self.window_size, -self.shift_size),
-                slice(-self.shift_size, None),
-            )
-            count = 0
-            for height_slice in height_slices:
-                for width_slice in width_slices:
-                    img_mask[:, height_slice, width_slice, :] = count
-                    count += 1
-
-            mask_windows = window_partition(img_mask, self.window_size)
-            mask_windows = mask_windows.view(-1, self.window_size * self.window_size)
-            attn_mask = mask_windows.unsqueeze(1) - mask_windows.unsqueeze(2)
-            attn_mask = attn_mask.masked_fill(attn_mask != 0, float(-100.0)).masked_fill(attn_mask == 0, float(0.0))
-        else:
-            attn_mask = None
-        return attn_mask
-
-    def maybe_pad(self, hidden_states, height, width):
-        pad_right = (self.window_size - width % self.window_size) % self.window_size
-        pad_bottom = (self.window_size - height % self.window_size) % self.window_size
-        pad_values = (0, 0, 0, pad_right, 0, pad_bottom)
-        hidden_states = nn.functional.pad(hidden_states, pad_values)
-        return hidden_states, pad_values
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        input_dimensions: Tuple[int, int],
-        head_mask: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = False,
-        always_partition: Optional[bool] = False,
-    ) -> Tuple[torch.Tensor, torch.Tensor]:
-        if not always_partition:
-            self.set_shift_and_window_size(input_dimensions)
-        else:
-            pass
-        height, width = input_dimensions
-        batch_size, _, channels = hidden_states.size()
-        
-
-
-        hidden_states = self.ce[0](hidden_states, input_dimensions)
-        shortcut = hidden_states
-
-
-        hidden_states = self.layernorm_before(hidden_states)
-        hidden_states = hidden_states.view(batch_size, height, width, channels)
-
-        # pad hidden_states to multiples of window size
-        hidden_states, pad_values = self.maybe_pad(hidden_states, height, width)
-
-        _, height_pad, width_pad, _ = hidden_states.shape
-        # cyclic shift
-        if self.shift_size > 0:
-            shifted_hidden_states = torch.roll(hidden_states, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
-        else:
-            shifted_hidden_states = hidden_states
-
-        # partition windows
-        hidden_states_windows = window_partition(shifted_hidden_states, self.window_size)
-        hidden_states_windows = hidden_states_windows.view(-1, self.window_size * self.window_size, channels)
-        attn_mask = self.get_attn_mask(
-            height_pad, width_pad, dtype=hidden_states.dtype, device=hidden_states_windows.device
-        )
-
-        attention_outputs = self.attention(
-            hidden_states_windows, attn_mask, head_mask, output_attentions=output_attentions
-        )
-
-        attention_output = attention_outputs[0]
-
-        attention_windows = attention_output.view(-1, self.window_size, self.window_size, channels)
-        shifted_windows = window_reverse(attention_windows, self.window_size, height_pad, width_pad)
-
-        # reverse cyclic shift
-        if self.shift_size > 0:
-            attention_windows = torch.roll(shifted_windows, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
-        else:
-            attention_windows = shifted_windows
-
-        was_padded = pad_values[3] > 0 or pad_values[5] > 0
-        if was_padded:
-            attention_windows = attention_windows[:, :height, :width, :].contiguous()
-
-        attention_windows = attention_windows.view(batch_size, height * width, channels)
-
-        hidden_states = shortcut + self.drop_path(attention_windows)
-
-
-
-        hidden_states = self.ce[1](hidden_states, input_dimensions)
-        layer_output = self.layernorm_after(hidden_states)
-        layer_output = self.intermediate(layer_output)
-        layer_output = hidden_states + self.output(layer_output)
-
-        layer_outputs = (layer_output, attention_outputs[1]) if output_attentions else (layer_output,)
-        return layer_outputs
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinStage with Swin->UnimerSwin
-class UnimerSwinStage(nn.Module):
-    def __init__(self, config, dim, input_resolution, depth, num_heads, drop_path, downsample):
-        super().__init__()
-        self.config = config
-        self.dim = dim
-        self.blocks = nn.ModuleList(
-            [
-                UnimerSwinLayer(
-                    config=config,
-                    dim=dim,
-                    input_resolution=input_resolution,
-                    num_heads=num_heads,
-                    shift_size=0,
-                )
-                for i in range(depth)
-            ]
-        )
-
-        # patch merging layer
-        if downsample is not None:
-            self.downsample = downsample(input_resolution, dim=dim, norm_layer=nn.LayerNorm)
-        else:
-            self.downsample = None
-
-        self.pointing = False
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        input_dimensions: Tuple[int, int],
-        head_mask: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = False,
-        always_partition: Optional[bool] = False,
-    ) -> Tuple[torch.Tensor]:
-        height, width = input_dimensions
-        for i, layer_module in enumerate(self.blocks):
-            layer_head_mask = head_mask[i] if head_mask is not None else None
-
-            layer_outputs = layer_module(
-                hidden_states, input_dimensions, layer_head_mask, output_attentions, always_partition
-            )
-
-            hidden_states = layer_outputs[0]
-
-        hidden_states_before_downsampling = hidden_states
-        if self.downsample is not None:
-            height_downsampled, width_downsampled = (height + 1) // 2, (width + 1) // 2
-            output_dimensions = (height, width, height_downsampled, width_downsampled)
-            hidden_states = self.downsample(hidden_states_before_downsampling, input_dimensions)
-        else:
-            output_dimensions = (height, width, height, width)
-
-        stage_outputs = (hidden_states, hidden_states_before_downsampling, output_dimensions)
-
-        if output_attentions:
-            stage_outputs += layer_outputs[1:]
-        return stage_outputs
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinEncoder with Swin->UnimerSwin
-class UnimerSwinEncoder(nn.Module):
-    def __init__(self, config, grid_size):
-        super().__init__()
-        self.num_layers = len(config.depths)
-        self.config = config
-        dpr = [x.item() for x in torch.linspace(0, config.drop_path_rate, sum(config.depths))]
-        self.layers = nn.ModuleList(
-            [
-                UnimerSwinStage(
-                    config=config,
-                    dim=int(config.embed_dim * 2**i_layer),
-                    input_resolution=(grid_size[0] // (2**i_layer), grid_size[1] // (2**i_layer)),
-                    depth=config.depths[i_layer],
-                    num_heads=config.num_heads[i_layer],
-                    drop_path=dpr[sum(config.depths[:i_layer]) : sum(config.depths[: i_layer + 1])],
-                    downsample=UnimerSwinPatchMerging if (i_layer < self.num_layers - 1) else None,
-                )
-                for i_layer in range(self.num_layers)
-            ]
-        )
-
-        self.gradient_checkpointing = False
-
-    def forward(
-        self,
-        hidden_states: torch.Tensor,
-        input_dimensions: Tuple[int, int],
-        head_mask: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = False,
-        output_hidden_states: Optional[bool] = False,
-        output_hidden_states_before_downsampling: Optional[bool] = False,
-        always_partition: Optional[bool] = False,
-        return_dict: Optional[bool] = True,
-    ) -> Union[Tuple, UnimerSwinEncoderOutput]:
-        all_hidden_states = () if output_hidden_states else None
-        all_reshaped_hidden_states = () if output_hidden_states else None
-        all_self_attentions = () if output_attentions else None
-
-        if output_hidden_states:
-            batch_size, _, hidden_size = hidden_states.shape
-            # rearrange b (h w) c -> b c h w
-            reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size)
-            reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
-            all_hidden_states += (hidden_states,)
-            all_reshaped_hidden_states += (reshaped_hidden_state,)
-
-        for i, layer_module in enumerate(self.layers):
-            layer_head_mask = head_mask[i] if head_mask is not None else None
-
-            if self.gradient_checkpointing and self.training:
-                layer_outputs = self._gradient_checkpointing_func(
-                    layer_module.__call__,
-                    hidden_states,
-                    input_dimensions,
-                    layer_head_mask,
-                    output_attentions,
-                    always_partition,
-                )
-            else:
-                layer_outputs = layer_module(
-                    hidden_states, input_dimensions, layer_head_mask, output_attentions, always_partition
-                )
-
-            hidden_states = layer_outputs[0]
-            hidden_states_before_downsampling = layer_outputs[1]
-            output_dimensions = layer_outputs[2]
-
-            input_dimensions = (output_dimensions[-2], output_dimensions[-1])
-
-            if output_hidden_states and output_hidden_states_before_downsampling:
-                batch_size, _, hidden_size = hidden_states_before_downsampling.shape
-                # rearrange b (h w) c -> b c h w
-                # here we use the original (not downsampled) height and width
-                reshaped_hidden_state = hidden_states_before_downsampling.view(
-                    batch_size, *(output_dimensions[0], output_dimensions[1]), hidden_size
-                )
-                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
-                all_hidden_states += (hidden_states_before_downsampling,)
-                all_reshaped_hidden_states += (reshaped_hidden_state,)
-            elif output_hidden_states and not output_hidden_states_before_downsampling:
-                batch_size, _, hidden_size = hidden_states.shape
-                # rearrange b (h w) c -> b c h w
-                reshaped_hidden_state = hidden_states.view(batch_size, *input_dimensions, hidden_size)
-                reshaped_hidden_state = reshaped_hidden_state.permute(0, 3, 1, 2)
-                all_hidden_states += (hidden_states,)
-                all_reshaped_hidden_states += (reshaped_hidden_state,)
-
-            if output_attentions:
-                all_self_attentions += layer_outputs[3:]
-
-        if not return_dict:
-            return tuple(v for v in [hidden_states, all_hidden_states, all_self_attentions] if v is not None)
-
-        return UnimerSwinEncoderOutput(
-            last_hidden_state=hidden_states,
-            hidden_states=all_hidden_states,
-            attentions=all_self_attentions,
-            reshaped_hidden_states=all_reshaped_hidden_states,
-        )
-
-
-# Copied from transformers.models.swin.modeling_swin.SwinPreTrainedModel with Swin->UnimerSwin
-class UnimerSwinPreTrainedModel(PreTrainedModel):
-    """
-    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
-    models.
-    """
-
-    config_class = UnimerSwinConfig
-    base_model_prefix = "unimer-swin"
-    main_input_name = "pixel_values"
-    supports_gradient_checkpointing = True
-    _no_split_modules = ["UnimerSwinStage"]
-
-    def _init_weights(self, module):
-        """Initialize the weights"""
-        if isinstance(module, (nn.Linear, nn.Conv2d)):
-            # Slightly different from the TF version which uses truncated_normal for initialization
-            # cf https://github.com/pytorch/pytorch/pull/5617
-            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
-            if module.bias is not None:
-                module.bias.data.zero_()
-        elif isinstance(module, nn.LayerNorm):
-            module.bias.data.zero_()
-            module.weight.data.fill_(1.0)
-
-
-SWIN_START_DOCSTRING = r"""
-    This model is a PyTorch [torch.nn.Module](https://pytorch.org/docs/stable/nn.html#torch.nn.Module) sub-class. Use
-    it as a regular PyTorch Module and refer to the PyTorch documentation for all matter related to general usage and
-    behavior.
-
-    Parameters:
-        config ([`UnimerSwinConfig`]): Model configuration class with all the parameters of the model.
-            Initializing with a config file does not load the weights associated with the model, only the
-            configuration. Check out the [`~PreTrainedModel.from_pretrained`] method to load the model weights.
-"""
-
-SWIN_INPUTS_DOCSTRING = r"""
-    Args:
-        pixel_values (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)`):
-            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
-            [`DonutImageProcessor.__call__`] for details.
-        head_mask (`torch.FloatTensor` of shape `(num_heads,)` or `(num_layers, num_heads)`, *optional*):
-            Mask to nullify selected heads of the self-attention modules. Mask values selected in `[0, 1]`:
-
-            - 1 indicates the head is **not masked**,
-            - 0 indicates the head is **masked**.
-
-        output_attentions (`bool`, *optional*):
-            Whether or not to return the attentions tensors of all attention layers. See `attentions` under returned
-            tensors for more detail.
-        output_hidden_states (`bool`, *optional*):
-            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
-            more detail.
-        interpolate_pos_encoding (`bool`, *optional*, defaults to `False`):
-            Whether to interpolate the pre-trained position encodings.
-        return_dict (`bool`, *optional*):
-            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
-"""
-
-
-@add_start_docstrings(
-    "The bare UnimerSwin Model transformer outputting raw hidden-states without any specific head on top.",
-    SWIN_START_DOCSTRING,
-)
-class UnimerSwinModel(UnimerSwinPreTrainedModel):
-    def __init__(self, config, add_pooling_layer=True, use_mask_token=False):
-        super().__init__(config)
-        self.config = config
-        self.num_layers = len(config.depths)
-        self.num_features = int(config.embed_dim * 2 ** (self.num_layers - 1))
-
-        self.embeddings = UnimerSwinEmbeddings(config, use_mask_token=use_mask_token)
-        self.encoder = UnimerSwinEncoder(config, self.embeddings.patch_grid)
-        self.pooler = nn.AdaptiveAvgPool1d(1) if add_pooling_layer else None
-
-        # Initialize weights and apply final processing
-        self.post_init()
-
-    def get_input_embeddings(self):
-        return self.embeddings.patch_embeddings
-
-    def _prune_heads(self, heads_to_prune):
-        """
-        Prunes heads of the model. heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base
-        class PreTrainedModel
-        """
-        for layer, heads in heads_to_prune.items():
-            self.encoder.layer[layer].attention.prune_heads(heads)
-
-    @add_start_docstrings_to_model_forward(SWIN_INPUTS_DOCSTRING)
-    @add_code_sample_docstrings(
-        checkpoint=_CHECKPOINT_FOR_DOC,
-        output_type=UnimerSwinModelOutput,
-        config_class=_CONFIG_FOR_DOC,
-        modality="vision",
-        expected_output=_EXPECTED_OUTPUT_SHAPE,
-    )
-    def forward(
-        self,
-        pixel_values: Optional[torch.FloatTensor] = None,
-        bool_masked_pos: Optional[torch.BoolTensor] = None,
-        head_mask: Optional[torch.FloatTensor] = None,
-        output_attentions: Optional[bool] = None,
-        output_hidden_states: Optional[bool] = None,
-        interpolate_pos_encoding: bool = False,
-        return_dict: Optional[bool] = None,
-    ) -> Union[Tuple, UnimerSwinModelOutput]:
-        r"""
-        bool_masked_pos (`torch.BoolTensor` of shape `(batch_size, num_patches)`):
-            Boolean masked positions. Indicates which patches are masked (1) and which aren't (0).
-        """
-        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
-        output_hidden_states = (
-            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
-        )
-        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
-
-        if pixel_values is None:
-            raise ValueError("You have to specify pixel_values")
-
-        # Prepare head mask if needed
-        # 1.0 in head_mask indicate we keep the head
-        # attention_probs has shape bsz x n_heads x N x N
-        # input head_mask has shape [num_heads] or [num_hidden_layers x num_heads]
-        # and head_mask is converted to shape [num_hidden_layers x batch x num_heads x seq_length x seq_length]
-        head_mask = self.get_head_mask(head_mask, len(self.config.depths))
-
-        embedding_output, input_dimensions = self.embeddings(
-            pixel_values, bool_masked_pos=bool_masked_pos, interpolate_pos_encoding=interpolate_pos_encoding
-        )
-
-        encoder_outputs = self.encoder(
-            embedding_output,
-            input_dimensions,
-            head_mask=head_mask,
-            output_attentions=output_attentions,
-            output_hidden_states=output_hidden_states,
-            return_dict=return_dict,
-        )
-
-        sequence_output = encoder_outputs[0]
-
-        pooled_output = None
-        if self.pooler is not None:
-            pooled_output = self.pooler(sequence_output.transpose(1, 2))
-            pooled_output = torch.flatten(pooled_output, 1)
-
-        if not return_dict:
-            output = (sequence_output, pooled_output) + encoder_outputs[1:]
-
-            return output
-
-        return UnimerSwinModelOutput(
-            last_hidden_state=sequence_output,
-            pooler_output=pooled_output,
-            hidden_states=encoder_outputs.hidden_states,
-            attentions=encoder_outputs.attentions,
-            reshaped_hidden_states=encoder_outputs.reshaped_hidden_states,
-        )
diff --git a/magic_pdf/model/sub_modules/model_init.py b/magic_pdf/model/sub_modules/model_init.py
deleted file mode 100644
index b885606dd64599897c29d41acfca3f6a843beacc..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/model_init.py
+++ /dev/null
@@ -1,213 +0,0 @@
-import torch
-from loguru import logger
-
-from magic_pdf.config.constants import MODEL_NAME
-from magic_pdf.model.model_list import AtomicModel
-from magic_pdf.model.sub_modules.language_detection.yolov11.YOLOv11 import YOLOv11LangDetModel
-from magic_pdf.model.sub_modules.layout.doclayout_yolo.DocLayoutYOLO import DocLayoutYOLOModel
-from magic_pdf.model.sub_modules.mfd.yolov8.YOLOv8 import YOLOv8MFDModel
-from magic_pdf.model.sub_modules.mfr.unimernet.Unimernet import UnimernetModel
-from magic_pdf.model.sub_modules.ocr.paddleocr2pytorch.pytorch_paddle import PytorchPaddleOCR
-from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
-# try:
-#     from magic_pdf_ascend_plugin.libs.license_verifier import (
-#         LicenseExpiredError, LicenseFormatError, LicenseSignatureError,
-#         load_license)
-#     from magic_pdf_ascend_plugin.model_plugin.ocr.paddleocr.ppocr_273_npu import ModifiedPaddleOCR
-#     from magic_pdf_ascend_plugin.model_plugin.table.rapidtable.rapid_table_npu import RapidTableModel
-#     license_key = load_license()
-#     logger.info(f'Using Ascend Plugin Success, License id is {license_key["payload"]["id"]},'
-#                 f' License expired at {license_key["payload"]["date"]["end_date"]}')
-# except Exception as e:
-#     if isinstance(e, ImportError):
-#         pass
-#     elif isinstance(e, LicenseFormatError):
-#         logger.error('Ascend Plugin: Invalid license format. Please check the license file.')
-#     elif isinstance(e, LicenseSignatureError):
-#         logger.error('Ascend Plugin: Invalid signature. The license may be tampered with.')
-#     elif isinstance(e, LicenseExpiredError):
-#         logger.error('Ascend Plugin: License has expired. Please renew your license.')
-#     elif isinstance(e, FileNotFoundError):
-#         logger.error('Ascend Plugin: Not found License file.')
-#     else:
-#         logger.error(f'Ascend Plugin: {e}')
-#     from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_273_mod import ModifiedPaddleOCR
-#     # from magic_pdf.model.sub_modules.ocr.paddleocr.ppocr_291_mod import ModifiedPaddleOCR
-#     from magic_pdf.model.sub_modules.table.rapidtable.rapid_table import RapidTableModel
-
-
-def table_model_init(table_model_type, model_path, max_time, _device_='cpu', lang=None, table_sub_model_name=None):
-    if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
-        from magic_pdf.model.sub_modules.table.structeqtable.struct_eqtable import StructTableModel
-        table_model = StructTableModel(model_path, max_new_tokens=2048, max_time=max_time)
-    elif table_model_type == MODEL_NAME.TABLE_MASTER:
-        from magic_pdf.model.sub_modules.table.tablemaster.tablemaster_paddle import TableMasterPaddleModel
-        config = {
-            'model_dir': model_path,
-            'device': _device_
-        }
-        table_model = TableMasterPaddleModel(config)
-    elif table_model_type == MODEL_NAME.RAPID_TABLE:
-        atom_model_manager = AtomModelSingleton()
-        ocr_engine = atom_model_manager.get_atom_model(
-            atom_model_name='ocr',
-            ocr_show_log=False,
-            det_db_box_thresh=0.5,
-            det_db_unclip_ratio=1.6,
-            lang=lang
-        )
-        table_model = RapidTableModel(ocr_engine, table_sub_model_name)
-    else:
-        logger.error('table model type not allow')
-        exit(1)
-
-    return table_model
-
-
-def mfd_model_init(weight, device='cpu'):
-    if str(device).startswith('npu'):
-        device = torch.device(device)
-    mfd_model = YOLOv8MFDModel(weight, device)
-    return mfd_model
-
-
-def mfr_model_init(weight_dir, cfg_path, device='cpu'):
-    mfr_model = UnimernetModel(weight_dir, cfg_path, device)
-    return mfr_model
-
-
-def layout_model_init(weight, config_file, device):
-    from magic_pdf.model.sub_modules.layout.layoutlmv3.model_init import Layoutlmv3_Predictor
-    model = Layoutlmv3_Predictor(weight, config_file, device)
-    return model
-
-
-def doclayout_yolo_model_init(weight, device='cpu'):
-    if str(device).startswith('npu'):
-        device = torch.device(device)
-    model = DocLayoutYOLOModel(weight, device)
-    return model
-
-
-def langdetect_model_init(langdetect_model_weight, device='cpu'):
-    if str(device).startswith('npu'):
-        device = torch.device(device)
-    model = YOLOv11LangDetModel(langdetect_model_weight, device)
-    return model
-
-
-def ocr_model_init(show_log: bool = False,
-                   det_db_box_thresh=0.3,
-                   lang=None,
-                   use_dilation=True,
-                   det_db_unclip_ratio=1.8,
-                   ):
-    if lang is not None and lang != '':
-        # model = ModifiedPaddleOCR(
-        model = PytorchPaddleOCR(
-            show_log=show_log,
-            det_db_box_thresh=det_db_box_thresh,
-            lang=lang,
-            use_dilation=use_dilation,
-            det_db_unclip_ratio=det_db_unclip_ratio,
-        )
-    else:
-        # model = ModifiedPaddleOCR(
-        model = PytorchPaddleOCR(
-            show_log=show_log,
-            det_db_box_thresh=det_db_box_thresh,
-            use_dilation=use_dilation,
-            det_db_unclip_ratio=det_db_unclip_ratio,
-        )
-    return model
-
-
-class AtomModelSingleton:
-    _instance = None
-    _models = {}
-
-    def __new__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def get_atom_model(self, atom_model_name: str, **kwargs):
-
-        lang = kwargs.get('lang', None)
-        layout_model_name = kwargs.get('layout_model_name', None)
-        table_model_name = kwargs.get('table_model_name', None)
-
-        if atom_model_name in [AtomicModel.OCR]:
-            key = (atom_model_name, lang)
-        elif atom_model_name in [AtomicModel.Layout]:
-            key = (atom_model_name, layout_model_name)
-        elif atom_model_name in [AtomicModel.Table]:
-            key = (atom_model_name, table_model_name, lang)
-        else:
-            key = atom_model_name
-
-        if key not in self._models:
-            self._models[key] = atom_model_init(model_name=atom_model_name, **kwargs)
-        return self._models[key]
-
-def atom_model_init(model_name: str, **kwargs):
-    atom_model = None
-    if model_name == AtomicModel.Layout:
-        if kwargs.get('layout_model_name') == MODEL_NAME.LAYOUTLMv3:
-            atom_model = layout_model_init(
-                kwargs.get('layout_weights'),
-                kwargs.get('layout_config_file'),
-                kwargs.get('device')
-            )
-        elif kwargs.get('layout_model_name') == MODEL_NAME.DocLayout_YOLO:
-            atom_model = doclayout_yolo_model_init(
-                kwargs.get('doclayout_yolo_weights'),
-                kwargs.get('device')
-            )
-        else:
-            logger.error('layout model name not allow')
-            exit(1)
-    elif model_name == AtomicModel.MFD:
-        atom_model = mfd_model_init(
-            kwargs.get('mfd_weights'),
-            kwargs.get('device')
-        )
-    elif model_name == AtomicModel.MFR:
-        atom_model = mfr_model_init(
-            kwargs.get('mfr_weight_dir'),
-            kwargs.get('mfr_cfg_path'),
-            kwargs.get('device')
-        )
-    elif model_name == AtomicModel.OCR:
-        atom_model = ocr_model_init(
-            kwargs.get('ocr_show_log'),
-            kwargs.get('det_db_box_thresh'),
-            kwargs.get('lang'),
-        )
-    elif model_name == AtomicModel.Table:
-        atom_model = table_model_init(
-            kwargs.get('table_model_name'),
-            kwargs.get('table_model_path'),
-            kwargs.get('table_max_time'),
-            kwargs.get('device'),
-            kwargs.get('lang'),
-            kwargs.get('table_sub_model_name')
-        )
-    elif model_name == AtomicModel.LangDetect:
-        if kwargs.get('langdetect_model_name') == MODEL_NAME.YOLO_V11_LangDetect:
-            atom_model = langdetect_model_init(
-                kwargs.get('langdetect_model_weight'),
-                kwargs.get('device')
-            )
-        else:
-            logger.error('langdetect model name not allow')
-            exit(1)
-    else:
-        logger.error('model name not allow')
-        exit(1)
-
-    if atom_model is None:
-        logger.error('model init failed')
-        exit(1)
-    else:
-        return atom_model
diff --git a/magic_pdf/model/sub_modules/model_utils.py b/magic_pdf/model/sub_modules/model_utils.py
deleted file mode 100644
index 04d0fbbd028290e46cab1adc5911674fe0541ef0..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/model_utils.py
+++ /dev/null
@@ -1,309 +0,0 @@
-import time
-import torch
-from loguru import logger
-import numpy as np
-
-from magic_pdf.libs.boxbase import get_minbox_if_overlap_by_ratio
-from magic_pdf.libs.clean_memory import clean_memory
-
-
-def crop_img(input_res, input_np_img, crop_paste_x=0, crop_paste_y=0):
-
-    crop_xmin, crop_ymin = int(input_res['poly'][0]), int(input_res['poly'][1])
-    crop_xmax, crop_ymax = int(input_res['poly'][4]), int(input_res['poly'][5])
-
-    # Calculate new dimensions
-    crop_new_width = crop_xmax - crop_xmin + crop_paste_x * 2
-    crop_new_height = crop_ymax - crop_ymin + crop_paste_y * 2
-
-    # Create a white background array
-    return_image = np.ones((crop_new_height, crop_new_width, 3), dtype=np.uint8) * 255
-
-    # Crop the original image using numpy slicing
-    cropped_img = input_np_img[crop_ymin:crop_ymax, crop_xmin:crop_xmax]
-
-    # Paste the cropped image onto the white background
-    return_image[crop_paste_y:crop_paste_y + (crop_ymax - crop_ymin),
-    crop_paste_x:crop_paste_x + (crop_xmax - crop_xmin)] = cropped_img
-
-    return_list = [crop_paste_x, crop_paste_y, crop_xmin, crop_ymin, crop_xmax, crop_ymax, crop_new_width,
-                   crop_new_height]
-    return return_image, return_list
-
-
-def get_coords_and_area(block_with_poly):
-    """Extract coordinates and area from a table."""
-    xmin, ymin = int(block_with_poly['poly'][0]), int(block_with_poly['poly'][1])
-    xmax, ymax = int(block_with_poly['poly'][4]), int(block_with_poly['poly'][5])
-    area = (xmax - xmin) * (ymax - ymin)
-    return xmin, ymin, xmax, ymax, area
-
-
-def calculate_intersection(box1, box2):
-    """Calculate intersection coordinates between two boxes."""
-    intersection_xmin = max(box1[0], box2[0])
-    intersection_ymin = max(box1[1], box2[1])
-    intersection_xmax = min(box1[2], box2[2])
-    intersection_ymax = min(box1[3], box2[3])
-
-    # Check if intersection is valid
-    if intersection_xmax <= intersection_xmin or intersection_ymax <= intersection_ymin:
-        return None
-
-    return intersection_xmin, intersection_ymin, intersection_xmax, intersection_ymax
-
-
-def calculate_iou(box1, box2):
-    """Calculate IoU between two boxes."""
-    intersection = calculate_intersection(box1[:4], box2[:4])
-
-    if not intersection:
-        return 0
-
-    intersection_xmin, intersection_ymin, intersection_xmax, intersection_ymax = intersection
-    intersection_area = (intersection_xmax - intersection_xmin) * (intersection_ymax - intersection_ymin)
-
-    area1, area2 = box1[4], box2[4]
-    union_area = area1 + area2 - intersection_area
-
-    return intersection_area / union_area if union_area > 0 else 0
-
-
-def is_inside(small_box, big_box, overlap_threshold=0.8):
-    """Check if small_box is inside big_box by at least overlap_threshold."""
-    intersection = calculate_intersection(small_box[:4], big_box[:4])
-
-    if not intersection:
-        return False
-
-    intersection_xmin, intersection_ymin, intersection_xmax, intersection_ymax = intersection
-    intersection_area = (intersection_xmax - intersection_xmin) * (intersection_ymax - intersection_ymin)
-
-    # Check if overlap exceeds threshold
-    return intersection_area >= overlap_threshold * small_box[4]
-
-
-def do_overlap(box1, box2):
-    """Check if two boxes overlap."""
-    return calculate_intersection(box1[:4], box2[:4]) is not None
-
-
-def merge_high_iou_tables(table_res_list, layout_res, table_indices, iou_threshold=0.7):
-    """Merge tables with IoU > threshold."""
-    if len(table_res_list) < 2:
-        return table_res_list, table_indices
-
-    table_info = [get_coords_and_area(table) for table in table_res_list]
-    merged = True
-
-    while merged:
-        merged = False
-        i = 0
-        while i < len(table_res_list) - 1:
-            j = i + 1
-            while j < len(table_res_list):
-                iou = calculate_iou(table_info[i], table_info[j])
-
-                if iou > iou_threshold:
-                    # Merge tables by taking their union
-                    x1_min, y1_min, x1_max, y1_max, _ = table_info[i]
-                    x2_min, y2_min, x2_max, y2_max, _ = table_info[j]
-
-                    union_xmin = min(x1_min, x2_min)
-                    union_ymin = min(y1_min, y2_min)
-                    union_xmax = max(x1_max, x2_max)
-                    union_ymax = max(y1_max, y2_max)
-
-                    # Create merged table
-                    merged_table = table_res_list[i].copy()
-                    merged_table['poly'][0] = union_xmin
-                    merged_table['poly'][1] = union_ymin
-                    merged_table['poly'][2] = union_xmax
-                    merged_table['poly'][3] = union_ymin
-                    merged_table['poly'][4] = union_xmax
-                    merged_table['poly'][5] = union_ymax
-                    merged_table['poly'][6] = union_xmin
-                    merged_table['poly'][7] = union_ymax
-
-                    # Update layout_res
-                    to_remove = [table_indices[j], table_indices[i]]
-                    for idx in sorted(to_remove, reverse=True):
-                        del layout_res[idx]
-                    layout_res.append(merged_table)
-
-                    # Update tracking lists
-                    table_indices = [k if k < min(to_remove) else
-                                     k - 1 if k < max(to_remove) else
-                                     k - 2 if k > max(to_remove) else
-                                     len(layout_res) - 1
-                                     for k in table_indices
-                                     if k not in to_remove]
-                    table_indices.append(len(layout_res) - 1)
-
-                    # Update table lists
-                    table_res_list.pop(j)
-                    table_res_list.pop(i)
-                    table_res_list.append(merged_table)
-
-                    # Update table_info
-                    table_info = [get_coords_and_area(table) for table in table_res_list]
-
-                    merged = True
-                    break
-                j += 1
-
-            if merged:
-                break
-            i += 1
-
-    return table_res_list, table_indices
-
-
-def filter_nested_tables(table_res_list, overlap_threshold=0.8, area_threshold=0.8):
-    """Remove big tables containing multiple smaller tables within them."""
-    if len(table_res_list) < 3:
-        return table_res_list
-
-    table_info = [get_coords_and_area(table) for table in table_res_list]
-    big_tables_idx = []
-
-    for i in range(len(table_res_list)):
-        # Find tables inside this one
-        tables_inside = [j for j in range(len(table_res_list))
-                         if i != j and is_inside(table_info[j], table_info[i], overlap_threshold)]
-
-        # Continue if there are at least 3 tables inside
-        if len(tables_inside) >= 3:
-            # Check if inside tables overlap with each other
-            tables_overlap = any(do_overlap(table_info[tables_inside[idx1]], table_info[tables_inside[idx2]])
-                                 for idx1 in range(len(tables_inside))
-                                 for idx2 in range(idx1 + 1, len(tables_inside)))
-
-            # If no overlaps, check area condition
-            if not tables_overlap:
-                total_inside_area = sum(table_info[j][4] for j in tables_inside)
-                big_table_area = table_info[i][4]
-
-                if total_inside_area > area_threshold * big_table_area:
-                    big_tables_idx.append(i)
-
-    return [table for i, table in enumerate(table_res_list) if i not in big_tables_idx]
-
-
-def remove_overlaps_min_blocks(res_list):
-    #  重叠block，小的不能直接删除，需要和大的那个合并成一个更大的。
-    #  删除重叠blocks中较小的那些
-    need_remove = []
-    for res1 in res_list:
-        for res2 in res_list:
-            if res1 != res2:
-                overlap_box = get_minbox_if_overlap_by_ratio(
-                    res1['bbox'], res2['bbox'], 0.8
-                )
-                if overlap_box is not None:
-                    res_to_remove = next(
-                        (res for res in res_list if res['bbox'] == overlap_box),
-                        None,
-                    )
-                    if (
-                        res_to_remove is not None
-                        and res_to_remove not in need_remove
-                    ):
-                        large_res = res1 if res1 != res_to_remove else res2
-                        x1, y1, x2, y2 = large_res['bbox']
-                        sx1, sy1, sx2, sy2 = res_to_remove['bbox']
-                        x1 = min(x1, sx1)
-                        y1 = min(y1, sy1)
-                        x2 = max(x2, sx2)
-                        y2 = max(y2, sy2)
-                        large_res['bbox'] = [x1, y1, x2, y2]
-                        need_remove.append(res_to_remove)
-
-    if len(need_remove) > 0:
-        for res in need_remove:
-            res_list.remove(res)
-
-    return res_list, need_remove
-
-
-def get_res_list_from_layout_res(layout_res, iou_threshold=0.7, overlap_threshold=0.8, area_threshold=0.8):
-    """Extract OCR, table and other regions from layout results."""
-    ocr_res_list = []
-    text_res_list = []
-    table_res_list = []
-    table_indices = []
-    single_page_mfdetrec_res = []
-
-    # Categorize regions
-    for i, res in enumerate(layout_res):
-        category_id = int(res['category_id'])
-
-        if category_id in [13, 14]:  # Formula regions
-            single_page_mfdetrec_res.append({
-                "bbox": [int(res['poly'][0]), int(res['poly'][1]),
-                         int(res['poly'][4]), int(res['poly'][5])],
-            })
-        elif category_id in [0, 2, 4, 6, 7, 3]:  # OCR regions
-            ocr_res_list.append(res)
-        elif category_id == 5:  # Table regions
-            table_res_list.append(res)
-            table_indices.append(i)
-        elif category_id in [1]:  # Text regions
-            res['bbox'] = [int(res['poly'][0]), int(res['poly'][1]), int(res['poly'][4]), int(res['poly'][5])]
-            text_res_list.append(res)
-
-    # Process tables: merge high IoU tables first, then filter nested tables
-    table_res_list, table_indices = merge_high_iou_tables(
-        table_res_list, layout_res, table_indices, iou_threshold)
-
-    filtered_table_res_list = filter_nested_tables(
-        table_res_list, overlap_threshold, area_threshold)
-
-    # Remove filtered out tables from layout_res
-    if len(filtered_table_res_list) < len(table_res_list):
-        kept_tables = set(id(table) for table in filtered_table_res_list)
-        to_remove = [table_indices[i] for i, table in enumerate(table_res_list)
-                     if id(table) not in kept_tables]
-
-        for idx in sorted(to_remove, reverse=True):
-            del layout_res[idx]
-
-    # Remove overlaps in OCR and text regions
-    text_res_list, need_remove = remove_overlaps_min_blocks(text_res_list)
-    for res in text_res_list:
-        # 将res的poly使用bbox重构
-        res['poly'] = [res['bbox'][0], res['bbox'][1], res['bbox'][2], res['bbox'][1],
-                       res['bbox'][2], res['bbox'][3], res['bbox'][0], res['bbox'][3]]
-        # 删除res的bbox
-        del res['bbox']
-
-    ocr_res_list.extend(text_res_list)
-
-    if len(need_remove) > 0:
-        for res in need_remove:
-            del res['bbox']
-            layout_res.remove(res)
-
-    return ocr_res_list, filtered_table_res_list, single_page_mfdetrec_res
-
-
-def clean_vram(device, vram_threshold=8):
-    total_memory = get_vram(device)
-    if total_memory and total_memory <= vram_threshold:
-        gc_start = time.time()
-        clean_memory(device)
-        gc_time = round(time.time() - gc_start, 2)
-        logger.info(f"gc time: {gc_time}")
-
-
-def get_vram(device):
-    if torch.cuda.is_available() and str(device).startswith("cuda"):
-        total_memory = torch.cuda.get_device_properties(device).total_memory / (1024 ** 3)  # 将字节转换为 GB
-        return total_memory
-    elif str(device).startswith("npu"):
-        import torch_npu
-        if torch_npu.npu.is_available():
-            total_memory = torch_npu.npu.get_device_properties(device).total_memory / (1024 ** 3)  # 转为 GB
-            return total_memory
-    else:
-        return None
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/__init__.py b/magic_pdf/model/sub_modules/ocr/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py
deleted file mode 100644
index 70989fdf3958646b5778e4000d3750220a1d5c7a..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/ocr_utils.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
-import copy
-
-import cv2
-import numpy as np
-from magic_pdf.pre_proc.ocr_dict_merge import merge_spans_to_line
-from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold
-
-
-def img_decode(content: bytes):
-    np_arr = np.frombuffer(content, dtype=np.uint8)
-    return cv2.imdecode(np_arr, cv2.IMREAD_UNCHANGED)
-
-def check_img(img):
-    if isinstance(img, bytes):
-        img = img_decode(img)
-    if isinstance(img, np.ndarray) and len(img.shape) == 2:
-        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-    return img
-
-
-def alpha_to_color(img, alpha_color=(255, 255, 255)):
-    if len(img.shape) == 3 and img.shape[2] == 4:
-        B, G, R, A = cv2.split(img)
-        alpha = A / 255
-
-        R = (alpha_color[0] * (1 - alpha) + R * alpha).astype(np.uint8)
-        G = (alpha_color[1] * (1 - alpha) + G * alpha).astype(np.uint8)
-        B = (alpha_color[2] * (1 - alpha) + B * alpha).astype(np.uint8)
-
-        img = cv2.merge((B, G, R))
-    return img
-
-
-def preprocess_image(_image):
-    alpha_color = (255, 255, 255)
-    _image = alpha_to_color(_image, alpha_color)
-    return _image
-
-
-def sorted_boxes(dt_boxes):
-    """
-    Sort text boxes in order from top to bottom, left to right
-    args:
-        dt_boxes(array):detected text boxes with shape [4, 2]
-    return:
-        sorted boxes(array) with shape [4, 2]
-    """
-    num_boxes = dt_boxes.shape[0]
-    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
-    _boxes = list(sorted_boxes)
-
-    for i in range(num_boxes - 1):
-        for j in range(i, -1, -1):
-            if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
-                    (_boxes[j + 1][0][0] < _boxes[j][0][0]):
-                tmp = _boxes[j]
-                _boxes[j] = _boxes[j + 1]
-                _boxes[j + 1] = tmp
-            else:
-                break
-    return _boxes
-
-
-def bbox_to_points(bbox):
-    """ 将bbox格式转换为四个顶点的数组 """
-    x0, y0, x1, y1 = bbox
-    return np.array([[x0, y0], [x1, y0], [x1, y1], [x0, y1]]).astype('float32')
-
-
-def points_to_bbox(points):
-    """ 将四个顶点的数组转换为bbox格式 """
-    x0, y0 = points[0]
-    x1, _ = points[1]
-    _, y1 = points[2]
-    return [x0, y0, x1, y1]
-
-
-def merge_intervals(intervals):
-    # Sort the intervals based on the start value
-    intervals.sort(key=lambda x: x[0])
-
-    merged = []
-    for interval in intervals:
-        # If the list of merged intervals is empty or if the current
-        # interval does not overlap with the previous, simply append it.
-        if not merged or merged[-1][1] < interval[0]:
-            merged.append(interval)
-        else:
-            # Otherwise, there is overlap, so we merge the current and previous intervals.
-            merged[-1][1] = max(merged[-1][1], interval[1])
-
-    return merged
-
-
-def remove_intervals(original, masks):
-    # Merge all mask intervals
-    merged_masks = merge_intervals(masks)
-
-    result = []
-    original_start, original_end = original
-
-    for mask in merged_masks:
-        mask_start, mask_end = mask
-
-        # If the mask starts after the original range, ignore it
-        if mask_start > original_end:
-            continue
-
-        # If the mask ends before the original range starts, ignore it
-        if mask_end < original_start:
-            continue
-
-        # Remove the masked part from the original range
-        if original_start < mask_start:
-            result.append([original_start, mask_start - 1])
-
-        original_start = max(mask_end + 1, original_start)
-
-    # Add the remaining part of the original range, if any
-    if original_start <= original_end:
-        result.append([original_start, original_end])
-
-    return result
-
-
-def update_det_boxes(dt_boxes, mfd_res):
-    new_dt_boxes = []
-    angle_boxes_list = []
-    for text_box in dt_boxes:
-
-        if calculate_is_angle(text_box):
-            angle_boxes_list.append(text_box)
-            continue
-
-        text_bbox = points_to_bbox(text_box)
-        masks_list = []
-        for mf_box in mfd_res:
-            mf_bbox = mf_box['bbox']
-            if __is_overlaps_y_exceeds_threshold(text_bbox, mf_bbox):
-                masks_list.append([mf_bbox[0], mf_bbox[2]])
-        text_x_range = [text_bbox[0], text_bbox[2]]
-        text_remove_mask_range = remove_intervals(text_x_range, masks_list)
-        temp_dt_box = []
-        for text_remove_mask in text_remove_mask_range:
-            temp_dt_box.append(bbox_to_points([text_remove_mask[0], text_bbox[1], text_remove_mask[1], text_bbox[3]]))
-        if len(temp_dt_box) > 0:
-            new_dt_boxes.extend(temp_dt_box)
-
-    new_dt_boxes.extend(angle_boxes_list)
-
-    return new_dt_boxes
-
-
-def merge_overlapping_spans(spans):
-    """
-    Merges overlapping spans on the same line.
-
-    :param spans: A list of span coordinates [(x1, y1, x2, y2), ...]
-    :return: A list of merged spans
-    """
-    # Return an empty list if the input spans list is empty
-    if not spans:
-        return []
-
-    # Sort spans by their starting x-coordinate
-    spans.sort(key=lambda x: x[0])
-
-    # Initialize the list of merged spans
-    merged = []
-    for span in spans:
-        # Unpack span coordinates
-        x1, y1, x2, y2 = span
-        # If the merged list is empty or there's no horizontal overlap, add the span directly
-        if not merged or merged[-1][2] < x1:
-            merged.append(span)
-        else:
-            # If there is horizontal overlap, merge the current span with the previous one
-            last_span = merged.pop()
-            # Update the merged span's top-left corner to the smaller (x1, y1) and bottom-right to the larger (x2, y2)
-            x1 = min(last_span[0], x1)
-            y1 = min(last_span[1], y1)
-            x2 = max(last_span[2], x2)
-            y2 = max(last_span[3], y2)
-            # Add the merged span back to the list
-            merged.append((x1, y1, x2, y2))
-
-    # Return the list of merged spans
-    return merged
-
-
-def merge_det_boxes(dt_boxes):
-    """
-    Merge detection boxes.
-
-    This function takes a list of detected bounding boxes, each represented by four corner points.
-    The goal is to merge these bounding boxes into larger text regions.
-
-    Parameters:
-    dt_boxes (list): A list containing multiple text detection boxes, where each box is defined by four corner points.
-
-    Returns:
-    list: A list containing the merged text regions, where each region is represented by four corner points.
-    """
-    # Convert the detection boxes into a dictionary format with bounding boxes and type
-    dt_boxes_dict_list = []
-    angle_boxes_list = []
-    for text_box in dt_boxes:
-        text_bbox = points_to_bbox(text_box)
-
-        if calculate_is_angle(text_box):
-            angle_boxes_list.append(text_box)
-            continue
-
-        text_box_dict = {
-            'bbox': text_bbox,
-            'type': 'text',
-        }
-        dt_boxes_dict_list.append(text_box_dict)
-
-    # Merge adjacent text regions into lines
-    lines = merge_spans_to_line(dt_boxes_dict_list)
-
-    # Initialize a new list for storing the merged text regions
-    new_dt_boxes = []
-    for line in lines:
-        line_bbox_list = []
-        for span in line:
-            line_bbox_list.append(span['bbox'])
-
-        # Merge overlapping text regions within the same line
-        merged_spans = merge_overlapping_spans(line_bbox_list)
-
-        # Convert the merged text regions back to point format and add them to the new detection box list
-        for span in merged_spans:
-            new_dt_boxes.append(bbox_to_points(span))
-
-    new_dt_boxes.extend(angle_boxes_list)
-
-    return new_dt_boxes
-
-
-def get_adjusted_mfdetrec_res(single_page_mfdetrec_res, useful_list):
-    paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
-    # Adjust the coordinates of the formula area
-    adjusted_mfdetrec_res = []
-    for mf_res in single_page_mfdetrec_res:
-        mf_xmin, mf_ymin, mf_xmax, mf_ymax = mf_res["bbox"]
-        # Adjust the coordinates of the formula area to the coordinates relative to the cropping area
-        x0 = mf_xmin - xmin + paste_x
-        y0 = mf_ymin - ymin + paste_y
-        x1 = mf_xmax - xmin + paste_x
-        y1 = mf_ymax - ymin + paste_y
-        # Filter formula blocks outside the graph
-        if any([x1 < 0, y1 < 0]) or any([x0 > new_width, y0 > new_height]):
-            continue
-        else:
-            adjusted_mfdetrec_res.append({
-                "bbox": [x0, y0, x1, y1],
-            })
-    return adjusted_mfdetrec_res
-
-
-def get_ocr_result_list(ocr_res, useful_list, ocr_enable, new_image, lang):
-    paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
-    ocr_result_list = []
-    ori_im = new_image.copy()
-    for box_ocr_res in ocr_res:
-
-        if len(box_ocr_res) == 2:
-            p1, p2, p3, p4 = box_ocr_res[0]
-            text, score = box_ocr_res[1]
-            # logger.info(f"text: {text}, score: {score}")
-            if score < 0.6:  # 过滤低置信度的结果
-                continue
-        else:
-            p1, p2, p3, p4 = box_ocr_res
-            text, score = "", 1
-
-            if ocr_enable:
-                tmp_box = copy.deepcopy(np.array([p1, p2, p3, p4]).astype('float32'))
-                img_crop = get_rotate_crop_image(ori_im, tmp_box)
-
-        # average_angle_degrees = calculate_angle_degrees(box_ocr_res[0])
-        # if average_angle_degrees > 0.5:
-        poly = [p1, p2, p3, p4]
-        if calculate_is_angle(poly):
-            # logger.info(f"average_angle_degrees: {average_angle_degrees}, text: {text}")
-            # 与x轴的夹角超过0.5度，对边界做一下矫正
-            # 计算几何中心
-            x_center = sum(point[0] for point in poly) / 4
-            y_center = sum(point[1] for point in poly) / 4
-            new_height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2
-            new_width = p3[0] - p1[0]
-            p1 = [x_center - new_width / 2, y_center - new_height / 2]
-            p2 = [x_center + new_width / 2, y_center - new_height / 2]
-            p3 = [x_center + new_width / 2, y_center + new_height / 2]
-            p4 = [x_center - new_width / 2, y_center + new_height / 2]
-
-        # Convert the coordinates back to the original coordinate system
-        p1 = [p1[0] - paste_x + xmin, p1[1] - paste_y + ymin]
-        p2 = [p2[0] - paste_x + xmin, p2[1] - paste_y + ymin]
-        p3 = [p3[0] - paste_x + xmin, p3[1] - paste_y + ymin]
-        p4 = [p4[0] - paste_x + xmin, p4[1] - paste_y + ymin]
-
-        if ocr_enable:
-            ocr_result_list.append({
-                'category_id': 15,
-                'poly': p1 + p2 + p3 + p4,
-                'score': 1,
-                'text': text,
-                'np_img': img_crop,
-                'lang': lang,
-            })
-        else:
-            ocr_result_list.append({
-                'category_id': 15,
-                'poly': p1 + p2 + p3 + p4,
-                'score': float(round(score, 2)),
-                'text': text,
-            })
-
-    return ocr_result_list
-
-
-def calculate_is_angle(poly):
-    p1, p2, p3, p4 = poly
-    height = ((p4[1] - p1[1]) + (p3[1] - p2[1])) / 2
-    if 0.8 * height <= (p3[1] - p1[1]) <= 1.2 * height:
-        return False
-    else:
-        # logger.info((p3[1] - p1[1])/height)
-        return True
-
-
-def get_rotate_crop_image(img, points):
-    '''
-    img_height, img_width = img.shape[0:2]
-    left = int(np.min(points[:, 0]))
-    right = int(np.max(points[:, 0]))
-    top = int(np.min(points[:, 1]))
-    bottom = int(np.max(points[:, 1]))
-    img_crop = img[top:bottom, left:right, :].copy()
-    points[:, 0] = points[:, 0] - left
-    points[:, 1] = points[:, 1] - top
-    '''
-    assert len(points) == 4, "shape of points must be 4*2"
-    img_crop_width = int(
-        max(
-            np.linalg.norm(points[0] - points[1]),
-            np.linalg.norm(points[2] - points[3])))
-    img_crop_height = int(
-        max(
-            np.linalg.norm(points[0] - points[3]),
-            np.linalg.norm(points[1] - points[2])))
-    pts_std = np.float32([[0, 0], [img_crop_width, 0],
-                          [img_crop_width, img_crop_height],
-                          [0, img_crop_height]])
-    M = cv2.getPerspectiveTransform(points, pts_std)
-    dst_img = cv2.warpPerspective(
-        img,
-        M, (img_crop_width, img_crop_height),
-        borderMode=cv2.BORDER_REPLICATE,
-        flags=cv2.INTER_CUBIC)
-    dst_img_height, dst_img_width = dst_img.shape[0:2]
-    if dst_img_height * 1.0 / dst_img_width >= 1.5:
-        dst_img = np.rot90(dst_img)
-    return dst_img
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
deleted file mode 100644
index 448bfda9287fb2ac63ba0ef92b6552fe21bb7680..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorch_paddle.py
+++ /dev/null
@@ -1,199 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
-import copy
-import os.path
-import warnings
-from pathlib import Path
-
-import cv2
-import numpy as np
-import yaml
-from loguru import logger
-
-from magic_pdf.libs.config_reader import get_device, get_local_models_dir
-from .ocr_utils import check_img, preprocess_image, sorted_boxes, merge_det_boxes, update_det_boxes, get_rotate_crop_image
-from .tools.infer.predict_system import TextSystem
-from .tools.infer import pytorchocr_utility as utility
-import argparse
-
-
-latin_lang = [
-        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',  # noqa: E126
-        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
-        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
-        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
-]
-arabic_lang = ['ar', 'fa', 'ug', 'ur']
-cyrillic_lang = [
-        'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
-        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
-]
-devanagari_lang = [
-        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
-        'sa', 'bgc'
-]
-
-
-def get_model_params(lang, config):
-    if lang in config['lang']:
-        params = config['lang'][lang]
-        det = params.get('det')
-        rec = params.get('rec')
-        dict_file = params.get('dict')
-        return det, rec, dict_file
-    else:
-        raise Exception (f'Language {lang} not supported')
-
-
-root_dir = Path(__file__).resolve().parent
-
-
-class PytorchPaddleOCR(TextSystem):
-    def __init__(self, *args, **kwargs):
-        parser = utility.init_args()
-        args = parser.parse_args(args)
-
-        self.lang = kwargs.get('lang', 'ch')
-
-        device = get_device()
-        if device == 'cpu' and self.lang in ['ch', 'ch_server']:
-            logger.warning("The current device in use is CPU. To ensure the speed of parsing, the language is automatically switched to ch_lite.")
-            self.lang = 'ch_lite'
-
-        if self.lang in latin_lang:
-            self.lang = 'latin'
-        elif self.lang in arabic_lang:
-            self.lang = 'arabic'
-        elif self.lang in cyrillic_lang:
-            self.lang = 'cyrillic'
-        elif self.lang in devanagari_lang:
-            self.lang = 'devanagari'
-        else:
-            pass
-
-        models_config_path = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'models_config.yml')
-        with open(models_config_path) as file:
-            config = yaml.safe_load(file)
-            det, rec, dict_file = get_model_params(self.lang, config)
-        ocr_models_dir = os.path.join(get_local_models_dir(), 'OCR', 'paddleocr_torch')
-        kwargs['det_model_path'] = os.path.join(ocr_models_dir, det)
-        kwargs['rec_model_path'] = os.path.join(ocr_models_dir, rec)
-        kwargs['rec_char_dict_path'] = os.path.join(root_dir, 'pytorchocr', 'utils', 'resources', 'dict', dict_file)
-        # kwargs['rec_batch_num'] = 8
-
-        kwargs['device'] = device
-
-        default_args = vars(args)
-        default_args.update(kwargs)
-        args = argparse.Namespace(**default_args)
-
-        super().__init__(args)
-
-    def ocr(self,
-            img,
-            det=True,
-            rec=True,
-            mfd_res=None,
-            tqdm_enable=False,
-            ):
-        assert isinstance(img, (np.ndarray, list, str, bytes))
-        if isinstance(img, list) and det == True:
-            logger.error('When input a list of images, det must be false')
-            exit(0)
-        img = check_img(img)
-        imgs = [img]
-        with warnings.catch_warnings():
-            warnings.simplefilter("ignore", category=RuntimeWarning)
-            if det and rec:
-                ocr_res = []
-                for img in imgs:
-                    img = preprocess_image(img)
-                    dt_boxes, rec_res = self.__call__(img, mfd_res=mfd_res)
-                    if not dt_boxes and not rec_res:
-                        ocr_res.append(None)
-                        continue
-                    tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
-                    ocr_res.append(tmp_res)
-                return ocr_res
-            elif det and not rec:
-                ocr_res = []
-                for img in imgs:
-                    img = preprocess_image(img)
-                    dt_boxes, elapse = self.text_detector(img)
-                    # logger.debug("dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse))
-                    if dt_boxes is None:
-                        ocr_res.append(None)
-                        continue
-                    dt_boxes = sorted_boxes(dt_boxes)
-                    # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly，因此需要过滤所有倾斜程度较大的文本框
-                    dt_boxes = merge_det_boxes(dt_boxes)
-                    if mfd_res:
-                        dt_boxes = update_det_boxes(dt_boxes, mfd_res)
-                    tmp_res = [box.tolist() for box in dt_boxes]
-                    ocr_res.append(tmp_res)
-                return ocr_res
-            elif not det and rec:
-                ocr_res = []
-                for img in imgs:
-                    if not isinstance(img, list):
-                        img = preprocess_image(img)
-                        img = [img]
-                    rec_res, elapse = self.text_recognizer(img, tqdm_enable=tqdm_enable)
-                    # logger.debug("rec_res num  : {}, elapsed : {}".format(len(rec_res), elapse))
-                    ocr_res.append(rec_res)
-                return ocr_res
-
-    def __call__(self, img, mfd_res=None):
-
-        if img is None:
-            logger.debug("no valid image provided")
-            return None, None
-
-        ori_im = img.copy()
-        dt_boxes, elapse = self.text_detector(img)
-
-        if dt_boxes is None:
-            logger.debug("no dt_boxes found, elapsed : {}".format(elapse))
-            return None, None
-        else:
-            pass
-            # logger.debug("dt_boxes num : {}, elapsed : {}".format(len(dt_boxes), elapse))
-        img_crop_list = []
-
-        dt_boxes = sorted_boxes(dt_boxes)
-
-        # merge_det_boxes 和 update_det_boxes 都会把poly转成bbox再转回poly，因此需要过滤所有倾斜程度较大的文本框
-        dt_boxes = merge_det_boxes(dt_boxes)
-
-        if mfd_res:
-            dt_boxes = update_det_boxes(dt_boxes, mfd_res)
-
-        for bno in range(len(dt_boxes)):
-            tmp_box = copy.deepcopy(dt_boxes[bno])
-            img_crop = get_rotate_crop_image(ori_im, tmp_box)
-            img_crop_list.append(img_crop)
-
-        rec_res, elapse = self.text_recognizer(img_crop_list)
-        # logger.debug("rec_res num  : {}, elapsed : {}".format(len(rec_res), elapse))
-
-        filter_boxes, filter_rec_res = [], []
-        for box, rec_result in zip(dt_boxes, rec_res):
-            text, score = rec_result
-            if score >= self.drop_score:
-                filter_boxes.append(box)
-                filter_rec_res.append(rec_result)
-
-        return filter_boxes, filter_rec_res
-
-if __name__ == '__main__':
-    pytorch_paddle_ocr = PytorchPaddleOCR()
-    img = cv2.imread("/Users/myhloli/Downloads/screenshot-20250326-194348.png")
-    dt_boxes, rec_res = pytorch_paddle_ocr(img)
-    ocr_res = []
-    if not dt_boxes and not rec_res:
-        ocr_res.append(None)
-    else:
-        tmp_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)]
-        ocr_res.append(tmp_res)
-    print(ocr_res)
-
-
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/__init__.py
deleted file mode 100755
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py
deleted file mode 100755
index c169d20db9d3b3ea799e1c304ce8684cd8f12362..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/base_ocr_v20.py
+++ /dev/null
@@ -1,39 +0,0 @@
-import os
-import torch
-from .modeling.architectures.base_model import BaseModel
-
-class BaseOCRV20:
-    def __init__(self, config, **kwargs):
-        self.config = config
-        self.build_net(**kwargs)
-        self.net.eval()
-
-
-    def build_net(self, **kwargs):
-        self.net = BaseModel(self.config, **kwargs)
-
-    def read_pytorch_weights(self, weights_path):
-        if not os.path.exists(weights_path):
-            raise FileNotFoundError('{} is not existed.'.format(weights_path))
-        weights = torch.load(weights_path)
-        return weights
-
-    def get_out_channels(self, weights):
-        if list(weights.keys())[-1].endswith('.weight') and len(list(weights.values())[-1].shape) == 2:
-            out_channels = list(weights.values())[-1].numpy().shape[1]
-        else:
-            out_channels = list(weights.values())[-1].numpy().shape[0]
-        return out_channels
-
-    def load_state_dict(self, weights):
-        self.net.load_state_dict(weights)
-        # print('weights is loaded.')
-
-    def load_pytorch_weights(self, weights_path):
-        self.net.load_state_dict(torch.load(weights_path, weights_only=True))
-        # print('model is loaded: {}'.format(weights_path))
-
-    def inference(self, inputs):
-        with torch.no_grad():
-            infer = self.net(inputs)
-        return infer
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py
deleted file mode 100755
index 9eef2969a0854c6fc295c3696ba153d300e7c2f1..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/__init__.py
+++ /dev/null
@@ -1,8 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-from .imaug import transform, create_operators
-
-
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
deleted file mode 100755
index 13abd6741c581fcb6d042854404f65c49213e9d9..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/__init__.py
+++ /dev/null
@@ -1,48 +0,0 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-# from .iaa_augment import IaaAugment
-# from .make_border_map import MakeBorderMap
-# from .make_shrink_map import MakeShrinkMap
-# from .random_crop_data import EastRandomCropData, PSERandomCrop
-
-# from .rec_img_aug import RecAug, RecResizeImg, ClsResizeImg
-# from .randaugment import RandAugment
-from .operators import *
-# from .label_ops import *
-
-# from .east_process import *
-# from .sast_process import *
-# from .gen_table_mask import *
-
-def transform(data, ops=None):
-    """ transform """
-    if ops is None:
-        ops = []
-    for op in ops:
-        data = op(data)
-        if data is None:
-            return None
-    return data
-
-
-def create_operators(op_param_list, global_config=None):
-    """
-    create operators based on the config
-    Args:
-        params(list): a dict list, used to create some operators
-    """
-    assert isinstance(op_param_list, list), ('operator config should be a list')
-    ops = []
-    for operator in op_param_list:
-        assert isinstance(operator,
-                          dict) and len(operator) == 1, "yaml format error"
-        op_name = list(operator)[0]
-        param = {} if operator[op_name] is None else operator[op_name]
-        if global_config is not None:
-            param.update(global_config)
-        op = eval(op_name)(**param)
-        ops.append(op)
-    return ops
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py
deleted file mode 100755
index daa67a25dae93dde74fc0b92aad4aa6ef4d4c003..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/data/imaug/operators.py
+++ /dev/null
@@ -1,418 +0,0 @@
-"""
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import sys
-import six
-import cv2
-import numpy as np
-
-
-class DecodeImage(object):
-    """ decode image """
-
-    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
-        self.img_mode = img_mode
-        self.channel_first = channel_first
-
-    def __call__(self, data):
-        img = data['image']
-        if six.PY2:
-            assert type(img) is str and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        else:
-            assert type(img) is bytes and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        img = np.frombuffer(img, dtype='uint8')
-        img = cv2.imdecode(img, 1)
-        if img is None:
-            return None
-        if self.img_mode == 'GRAY':
-            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-        elif self.img_mode == 'RGB':
-            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
-            img = img[:, :, ::-1]
-
-        if self.channel_first:
-            img = img.transpose((2, 0, 1))
-
-        data['image'] = img
-        return data
-
-
-class NRTRDecodeImage(object):
-    """ decode image """
-
-    def __init__(self, img_mode='RGB', channel_first=False, **kwargs):
-        self.img_mode = img_mode
-        self.channel_first = channel_first
-
-    def __call__(self, data):
-        img = data['image']
-        if six.PY2:
-            assert type(img) is str and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        else:
-            assert type(img) is bytes and len(
-                img) > 0, "invalid input 'img' in DecodeImage"
-        img = np.frombuffer(img, dtype='uint8')
-
-        img = cv2.imdecode(img, 1)
-
-        if img is None:
-            return None
-        if self.img_mode == 'GRAY':
-            img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
-        elif self.img_mode == 'RGB':
-            assert img.shape[2] == 3, 'invalid shape of image[%s]' % (img.shape)
-            img = img[:, :, ::-1]
-        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-        if self.channel_first:
-            img = img.transpose((2, 0, 1))
-        data['image'] = img
-        return data
-
-
-class NormalizeImage(object):
-    """ normalize image such as substract mean, divide std
-    """
-
-    def __init__(self, scale=None, mean=None, std=None, order='chw', **kwargs):
-        if isinstance(scale, str):
-            scale = eval(scale)
-        self.scale = np.float32(scale if scale is not None else 1.0 / 255.0)
-        mean = mean if mean is not None else [0.485, 0.456, 0.406]
-        std = std if std is not None else [0.229, 0.224, 0.225]
-
-        shape = (3, 1, 1) if order == 'chw' else (1, 1, 3)
-        self.mean = np.array(mean).reshape(shape).astype('float32')
-        self.std = np.array(std).reshape(shape).astype('float32')
-
-    def __call__(self, data):
-        img = data['image']
-        from PIL import Image
-        if isinstance(img, Image.Image):
-            img = np.array(img)
-        assert isinstance(img,
-                          np.ndarray), "invalid input 'img' in NormalizeImage"
-        data['image'] = (
-            img.astype('float32') * self.scale - self.mean) / self.std
-        return data
-
-
-class ToCHWImage(object):
-    """ convert hwc image to chw image
-    """
-
-    def __init__(self, **kwargs):
-        pass
-
-    def __call__(self, data):
-        img = data['image']
-        from PIL import Image
-        if isinstance(img, Image.Image):
-            img = np.array(img)
-        data['image'] = img.transpose((2, 0, 1))
-        return data
-
-
-class Fasttext(object):
-    def __init__(self, path="None", **kwargs):
-        import fasttext
-        self.fast_model = fasttext.load_model(path)
-
-    def __call__(self, data):
-        label = data['label']
-        fast_label = self.fast_model[label]
-        data['fast_label'] = fast_label
-        return data
-
-
-class KeepKeys(object):
-    def __init__(self, keep_keys, **kwargs):
-        self.keep_keys = keep_keys
-
-    def __call__(self, data):
-        data_list = []
-        for key in self.keep_keys:
-            data_list.append(data[key])
-        return data_list
-
-
-class Resize(object):
-    def __init__(self, size=(640, 640), **kwargs):
-        self.size = size
-
-    def resize_image(self, img):
-        resize_h, resize_w = self.size
-        ori_h, ori_w = img.shape[:2]  # (h, w, c)
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        return img, [ratio_h, ratio_w]
-
-    def __call__(self, data):
-        img = data['image']
-        text_polys = data['polys']
-
-        img_resize, [ratio_h, ratio_w] = self.resize_image(img)
-        new_boxes = []
-        for box in text_polys:
-            new_box = []
-            for cord in box:
-                new_box.append([cord[0] * ratio_w, cord[1] * ratio_h])
-            new_boxes.append(new_box)
-        data['image'] = img_resize
-        data['polys'] = np.array(new_boxes, dtype=np.float32)
-        return data
-
-
-class DetResizeForTest(object):
-    def __init__(self, **kwargs):
-        super(DetResizeForTest, self).__init__()
-        self.resize_type = 0
-        if 'image_shape' in kwargs:
-            self.image_shape = kwargs['image_shape']
-            self.resize_type = 1
-        elif 'limit_side_len' in kwargs:
-            self.limit_side_len = kwargs['limit_side_len']
-            self.limit_type = kwargs.get('limit_type', 'min')
-        elif 'resize_long' in kwargs:
-            self.resize_type = 2
-            self.resize_long = kwargs.get('resize_long', 960)
-        else:
-            self.limit_side_len = 736
-            self.limit_type = 'min'
-
-    def __call__(self, data):
-        img = data['image']
-        src_h, src_w, _ = img.shape
-
-        if self.resize_type == 0:
-            # img, shape = self.resize_image_type0(img)
-            img, [ratio_h, ratio_w] = self.resize_image_type0(img)
-        elif self.resize_type == 2:
-            img, [ratio_h, ratio_w] = self.resize_image_type2(img)
-        else:
-            # img, shape = self.resize_image_type1(img)
-            img, [ratio_h, ratio_w] = self.resize_image_type1(img)
-        data['image'] = img
-        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
-        return data
-
-    def resize_image_type1(self, img):
-        resize_h, resize_w = self.image_shape
-        ori_h, ori_w = img.shape[:2]  # (h, w, c)
-        ratio_h = float(resize_h) / ori_h
-        ratio_w = float(resize_w) / ori_w
-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        # return img, np.array([ori_h, ori_w])
-        return img, [ratio_h, ratio_w]
-
-    def resize_image_type0(self, img):
-        """
-        resize image to a size multiple of 32 which is required by the network
-        args:
-            img(array): array with shape [h, w, c]
-        return(tuple):
-            img, (ratio_h, ratio_w)
-        """
-        limit_side_len = self.limit_side_len
-        h, w, c = img.shape
-
-        # limit the max side
-        if self.limit_type == 'max':
-            if max(h, w) > limit_side_len:
-                if h > w:
-                    ratio = float(limit_side_len) / h
-                else:
-                    ratio = float(limit_side_len) / w
-            else:
-                ratio = 1.
-        elif self.limit_type == 'min':
-            if min(h, w) < limit_side_len:
-                if h < w:
-                    ratio = float(limit_side_len) / h
-                else:
-                    ratio = float(limit_side_len) / w
-            else:
-                ratio = 1.
-        elif self.limit_type == 'resize_long':
-            ratio = float(limit_side_len) / max(h, w)
-        else:
-            raise Exception('not support limit type, image ')
-        resize_h = int(h * ratio)
-        resize_w = int(w * ratio)
-
-        resize_h = max(int(round(resize_h / 32) * 32), 32)
-        resize_w = max(int(round(resize_w / 32) * 32), 32)
-
-        try:
-            if int(resize_w) <= 0 or int(resize_h) <= 0:
-                return None, (None, None)
-            img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        except:
-            print(img.shape, resize_w, resize_h)
-            sys.exit(0)
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return img, [ratio_h, ratio_w]
-
-    def resize_image_type2(self, img):
-        h, w, _ = img.shape
-
-        resize_w = w
-        resize_h = h
-
-        if resize_h > resize_w:
-            ratio = float(self.resize_long) / resize_h
-        else:
-            ratio = float(self.resize_long) / resize_w
-
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-
-        max_stride = 128
-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
-        img = cv2.resize(img, (int(resize_w), int(resize_h)))
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-
-        return img, [ratio_h, ratio_w]
-
-
-class E2EResizeForTest(object):
-    def __init__(self, **kwargs):
-        super(E2EResizeForTest, self).__init__()
-        self.max_side_len = kwargs['max_side_len']
-        self.valid_set = kwargs['valid_set']
-
-    def __call__(self, data):
-        img = data['image']
-        src_h, src_w, _ = img.shape
-        if self.valid_set == 'totaltext':
-            im_resized, [ratio_h, ratio_w] = self.resize_image_for_totaltext(
-                img, max_side_len=self.max_side_len)
-        else:
-            im_resized, (ratio_h, ratio_w) = self.resize_image(
-                img, max_side_len=self.max_side_len)
-        data['image'] = im_resized
-        data['shape'] = np.array([src_h, src_w, ratio_h, ratio_w])
-        return data
-
-    def resize_image_for_totaltext(self, im, max_side_len=512):
-
-        h, w, _ = im.shape
-        resize_w = w
-        resize_h = h
-        ratio = 1.25
-        if h * ratio > max_side_len:
-            ratio = float(max_side_len) / resize_h
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-
-        max_stride = 128
-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-        return im, (ratio_h, ratio_w)
-
-    def resize_image(self, im, max_side_len=512):
-        """
-        resize image to a size multiple of max_stride which is required by the network
-        :param im: the resized image
-        :param max_side_len: limit of max image size to avoid out of memory in gpu
-        :return: the resized image and the resize ratio
-        """
-        h, w, _ = im.shape
-
-        resize_w = w
-        resize_h = h
-
-        # Fix the longer side
-        if resize_h > resize_w:
-            ratio = float(max_side_len) / resize_h
-        else:
-            ratio = float(max_side_len) / resize_w
-
-        resize_h = int(resize_h * ratio)
-        resize_w = int(resize_w * ratio)
-
-        max_stride = 128
-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
-        im = cv2.resize(im, (int(resize_w), int(resize_h)))
-        ratio_h = resize_h / float(h)
-        ratio_w = resize_w / float(w)
-
-        return im, (ratio_h, ratio_w)
-
-
-class KieResize(object):
-    def __init__(self, **kwargs):
-        super(KieResize, self).__init__()
-        self.max_side, self.min_side = kwargs['img_scale'][0], kwargs[
-            'img_scale'][1]
-
-    def __call__(self, data):
-        img = data['image']
-        points = data['points']
-        src_h, src_w, _ = img.shape
-        im_resized, scale_factor, [ratio_h, ratio_w
-                                   ], [new_h, new_w] = self.resize_image(img)
-        resize_points = self.resize_boxes(img, points, scale_factor)
-        data['ori_image'] = img
-        data['ori_boxes'] = points
-        data['points'] = resize_points
-        data['image'] = im_resized
-        data['shape'] = np.array([new_h, new_w])
-        return data
-
-    def resize_image(self, img):
-        norm_img = np.zeros([1024, 1024, 3], dtype='float32')
-        scale = [512, 1024]
-        h, w = img.shape[:2]
-        max_long_edge = max(scale)
-        max_short_edge = min(scale)
-        scale_factor = min(max_long_edge / max(h, w),
-                           max_short_edge / min(h, w))
-        resize_w, resize_h = int(w * float(scale_factor) + 0.5), int(h * float(
-            scale_factor) + 0.5)
-        max_stride = 32
-        resize_h = (resize_h + max_stride - 1) // max_stride * max_stride
-        resize_w = (resize_w + max_stride - 1) // max_stride * max_stride
-        im = cv2.resize(img, (resize_w, resize_h))
-        new_h, new_w = im.shape[:2]
-        w_scale = new_w / w
-        h_scale = new_h / h
-        scale_factor = np.array(
-            [w_scale, h_scale, w_scale, h_scale], dtype=np.float32)
-        norm_img[:new_h, :new_w, :] = im
-        return norm_img, scale_factor, [h_scale, w_scale], [new_h, new_w]
-
-    def resize_boxes(self, im, points, scale_factor):
-        points = points * scale_factor
-        img_shape = im.shape[:2]
-        points[:, 0::2] = np.clip(points[:, 0::2], 0, img_shape[1])
-        points[:, 1::2] = np.clip(points[:, 1::2], 0, img_shape[0])
-        return points
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
deleted file mode 100644
index 7ad5eb47c2efb04ef0b1ecdea9e2173acdf6706d..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/__init__.py
+++ /dev/null
@@ -1,25 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import copy
-
-__all__ = ["build_model"]
-
-
-def build_model(config, **kwargs):
-    from .base_model import BaseModel
-
-    config = copy.deepcopy(config)
-    module_class = BaseModel(config, **kwargs)
-    return module_class
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
deleted file mode 100644
index e7f7ce49b7201f99e050cb8d83b3eb0fb318760d..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/architectures/base_model.py
+++ /dev/null
@@ -1,105 +0,0 @@
-from torch import nn
-
-from ..backbones import build_backbone
-from ..heads import build_head
-from ..necks import build_neck
-
-
-class BaseModel(nn.Module):
-    def __init__(self, config, **kwargs):
-        """
-        the module for OCR.
-        args:
-            config (dict): the super parameters for module.
-        """
-        super(BaseModel, self).__init__()
-
-        in_channels = config.get("in_channels", 3)
-        model_type = config["model_type"]
-        # build backbone, backbone is need for del, rec and cls
-        if "Backbone" not in config or config["Backbone"] is None:
-            self.use_backbone = False
-        else:
-            self.use_backbone = True
-            config["Backbone"]["in_channels"] = in_channels
-            self.backbone = build_backbone(config["Backbone"], model_type)
-            in_channels = self.backbone.out_channels
-
-        # build neck
-        # for rec, neck can be cnn,rnn or reshape(None)
-        # for det, neck can be FPN, BIFPN and so on.
-        # for cls, neck should be none
-        if "Neck" not in config or config["Neck"] is None:
-            self.use_neck = False
-        else:
-            self.use_neck = True
-            config["Neck"]["in_channels"] = in_channels
-            self.neck = build_neck(config["Neck"])
-            in_channels = self.neck.out_channels
-
-        # # build head, head is need for det, rec and cls
-        if "Head" not in config or config["Head"] is None:
-            self.use_head = False
-        else:
-            self.use_head = True
-            config["Head"]["in_channels"] = in_channels
-            self.head = build_head(config["Head"], **kwargs)
-
-        self.return_all_feats = config.get("return_all_feats", False)
-
-        self._initialize_weights()
-
-    def _initialize_weights(self):
-        # weight initialization
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight, mode="fan_out")
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.ones_(m.weight)
-                nn.init.zeros_(m.bias)
-            elif isinstance(m, nn.Linear):
-                nn.init.normal_(m.weight, 0, 0.01)
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
-            elif isinstance(m, nn.ConvTranspose2d):
-                nn.init.kaiming_normal_(m.weight, mode="fan_out")
-                if m.bias is not None:
-                    nn.init.zeros_(m.bias)
-
-    def forward(self, x):
-        y = dict()
-        if self.use_backbone:
-            x = self.backbone(x)
-        if isinstance(x, dict):
-            y.update(x)
-        else:
-            y["backbone_out"] = x
-        final_name = "backbone_out"
-        if self.use_neck:
-            x = self.neck(x)
-            if isinstance(x, dict):
-                y.update(x)
-            else:
-                y["neck_out"] = x
-            final_name = "neck_out"
-        if self.use_head:
-            x = self.head(x)
-        # for multi head, save ctc neck out for udml
-        if isinstance(x, dict) and "ctc_nect" in x.keys():
-            y["neck_out"] = x["ctc_neck"]
-            y["head_out"] = x
-        elif isinstance(x, dict):
-            y.update(x)
-        else:
-            y["head_out"] = x
-        if self.return_all_feats:
-            if self.training:
-                return y
-            elif isinstance(x, dict):
-                return x
-            else:
-                return {final_name: x}
-        else:
-            return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
deleted file mode 100644
index 7f437a2388b1640995e0909595fcb1eaf6544dff..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/__init__.py
+++ /dev/null
@@ -1,63 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = ["build_backbone"]
-
-
-def build_backbone(config, model_type):
-    if model_type == "det":
-        from .det_mobilenet_v3 import MobileNetV3
-        from .rec_hgnet import PPHGNet_small
-        from .rec_lcnetv3 import PPLCNetV3
-
-        support_dict = [
-            "MobileNetV3",
-            "ResNet",
-            "ResNet_vd",
-            "ResNet_SAST",
-            "PPLCNetV3",
-            "PPHGNet_small",
-        ]
-    elif model_type == "rec" or model_type == "cls":
-        from .rec_hgnet import PPHGNet_small
-        from .rec_lcnetv3 import PPLCNetV3
-        from .rec_mobilenet_v3 import MobileNetV3
-        from .rec_svtrnet import SVTRNet
-        from .rec_mv1_enhance import MobileNetV1Enhance
-        from .rec_pphgnetv2 import PPHGNetV2_B4
-        support_dict = [
-            "MobileNetV1Enhance",
-            "MobileNetV3",
-            "ResNet",
-            "ResNetFPN",
-            "MTB",
-            "ResNet31",
-            "SVTRNet",
-            "ViTSTR",
-            "DenseNet",
-            "PPLCNetV3",
-            "PPHGNet_small",
-            "PPHGNetV2_B4",
-        ]
-    else:
-        raise NotImplementedError
-
-    module_name = config.pop("name")
-    assert module_name in support_dict, Exception(
-        "when model typs is {}, backbone only support {}".format(
-            model_type, support_dict
-        )
-    )
-    module_class = eval(module_name)(**config)
-    return module_class
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
deleted file mode 100644
index 03511599a0fb6d0d18940e9cd2fef19d217ec6ea..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/det_mobilenet_v3.py
+++ /dev/null
@@ -1,269 +0,0 @@
-from torch import nn
-
-from ..common import Activation
-
-
-def make_divisible(v, divisor=8, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        padding,
-        groups=1,
-        if_act=True,
-        act=None,
-        name=None,
-    ):
-        super(ConvBNLayer, self).__init__()
-        self.if_act = if_act
-        self.conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias=False,
-        )
-
-        self.bn = nn.BatchNorm2d(
-            out_channels,
-        )
-        if self.if_act:
-            self.act = Activation(act_type=act, inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.if_act:
-            x = self.act(x)
-        return x
-
-
-class SEModule(nn.Module):
-    def __init__(self, in_channels, reduction=4, name=""):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv1 = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=in_channels // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=True,
-        )
-        self.relu1 = Activation(act_type="relu", inplace=True)
-        self.conv2 = nn.Conv2d(
-            in_channels=in_channels // reduction,
-            out_channels=in_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=True,
-        )
-        self.hard_sigmoid = Activation(act_type="hard_sigmoid", inplace=True)
-
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = self.relu1(outputs)
-        outputs = self.conv2(outputs)
-        outputs = self.hard_sigmoid(outputs)
-        outputs = inputs * outputs
-        return outputs
-
-
-class ResidualUnit(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        mid_channels,
-        out_channels,
-        kernel_size,
-        stride,
-        use_se,
-        act=None,
-        name="",
-    ):
-        super(ResidualUnit, self).__init__()
-        self.if_shortcut = stride == 1 and in_channels == out_channels
-        self.if_se = use_se
-
-        self.expand_conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=mid_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            if_act=True,
-            act=act,
-            name=name + "_expand",
-        )
-        self.bottleneck_conv = ConvBNLayer(
-            in_channels=mid_channels,
-            out_channels=mid_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=int((kernel_size - 1) // 2),
-            groups=mid_channels,
-            if_act=True,
-            act=act,
-            name=name + "_depthwise",
-        )
-        if self.if_se:
-            self.mid_se = SEModule(mid_channels, name=name + "_se")
-        self.linear_conv = ConvBNLayer(
-            in_channels=mid_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            if_act=False,
-            act=None,
-            name=name + "_linear",
-        )
-
-    def forward(self, inputs):
-        x = self.expand_conv(inputs)
-        x = self.bottleneck_conv(x)
-        if self.if_se:
-            x = self.mid_se(x)
-        x = self.linear_conv(x)
-        if self.if_shortcut:
-            x = inputs + x
-        return x
-
-
-class MobileNetV3(nn.Module):
-    def __init__(
-        self, in_channels=3, model_name="large", scale=0.5, disable_se=False, **kwargs
-    ):
-        """
-        the MobilenetV3 backbone network for detection module.
-        Args:
-            params(dict): the super parameters for build network
-        """
-        super(MobileNetV3, self).__init__()
-
-        self.disable_se = disable_se
-
-        if model_name == "large":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", 1],
-                [3, 64, 24, False, "relu", 2],
-                [3, 72, 24, False, "relu", 1],
-                [5, 72, 40, True, "relu", 2],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],
-                [3, 240, 80, False, "hard_swish", 2],
-                [3, 200, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 480, 112, True, "hard_swish", 1],
-                [3, 672, 112, True, "hard_swish", 1],
-                [5, 672, 160, True, "hard_swish", 2],
-                [5, 960, 160, True, "hard_swish", 1],
-                [5, 960, 160, True, "hard_swish", 1],
-            ]
-            cls_ch_squeeze = 960
-        elif model_name == "small":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", 2],
-                [3, 72, 24, False, "relu", 2],
-                [3, 88, 24, False, "relu", 1],
-                [5, 96, 40, True, "hard_swish", 2],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 120, 48, True, "hard_swish", 1],
-                [5, 144, 48, True, "hard_swish", 1],
-                [5, 288, 96, True, "hard_swish", 2],
-                [5, 576, 96, True, "hard_swish", 1],
-                [5, 576, 96, True, "hard_swish", 1],
-            ]
-            cls_ch_squeeze = 576
-        else:
-            raise NotImplementedError(
-                "mode[" + model_name + "_model] is not implemented!"
-            )
-
-        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert (
-            scale in supported_scale
-        ), "supported scale are {} but input scale is {}".format(supported_scale, scale)
-        inplanes = 16
-        # conv1
-        self.conv = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=make_divisible(inplanes * scale),
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=1,
-            if_act=True,
-            act="hard_swish",
-            name="conv1",
-        )
-
-        self.stages = nn.ModuleList()
-        self.out_channels = []
-        block_list = []
-        i = 0
-        inplanes = make_divisible(inplanes * scale)
-        for k, exp, c, se, nl, s in cfg:
-            se = se and not self.disable_se
-            if s == 2 and i > 2:
-                self.out_channels.append(inplanes)
-                self.stages.append(nn.Sequential(*block_list))
-                block_list = []
-            block_list.append(
-                ResidualUnit(
-                    in_channels=inplanes,
-                    mid_channels=make_divisible(scale * exp),
-                    out_channels=make_divisible(scale * c),
-                    kernel_size=k,
-                    stride=s,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2),
-                )
-            )
-            inplanes = make_divisible(scale * c)
-            i += 1
-        block_list.append(
-            ConvBNLayer(
-                in_channels=inplanes,
-                out_channels=make_divisible(scale * cls_ch_squeeze),
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                groups=1,
-                if_act=True,
-                act="hard_swish",
-                name="conv_last",
-            )
-        )
-        self.stages.append(nn.Sequential(*block_list))
-        self.out_channels.append(make_divisible(scale * cls_ch_squeeze))
-        # for i, stage in enumerate(self.stages):
-        #     self.add_sublayer(sublayer=stage, name="stage{}".format(i))
-
-    def forward(self, x):
-        x = self.conv(x)
-        out_list = []
-        for stage in self.stages:
-            x = stage(x)
-            out_list.append(x)
-        return out_list
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
deleted file mode 100644
index c1515a712a10c3c925d54d53a99c0f7e67453c9f..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_hgnet.py
+++ /dev/null
@@ -1,290 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-
-class ConvBNAct(nn.Module):
-    def __init__(
-        self, in_channels, out_channels, kernel_size, stride, groups=1, use_act=True
-    ):
-        super().__init__()
-        self.use_act = use_act
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        if self.use_act:
-            self.act = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.use_act:
-            x = self.act(x)
-        return x
-
-
-class ESEModule(nn.Module):
-    def __init__(self, channels):
-        super().__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv = nn.Conv2d(
-            in_channels=channels,
-            out_channels=channels,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-        )
-        self.sigmoid = nn.Sigmoid()
-
-    def forward(self, x):
-        identity = x
-        x = self.avg_pool(x)
-        x = self.conv(x)
-        x = self.sigmoid(x)
-        return x * identity
-
-
-class HG_Block(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        mid_channels,
-        out_channels,
-        layer_num,
-        identity=False,
-    ):
-        super().__init__()
-        self.identity = identity
-
-        self.layers = nn.ModuleList()
-        self.layers.append(
-            ConvBNAct(
-                in_channels=in_channels,
-                out_channels=mid_channels,
-                kernel_size=3,
-                stride=1,
-            )
-        )
-        for _ in range(layer_num - 1):
-            self.layers.append(
-                ConvBNAct(
-                    in_channels=mid_channels,
-                    out_channels=mid_channels,
-                    kernel_size=3,
-                    stride=1,
-                )
-            )
-
-        # feature aggregation
-        total_channels = in_channels + layer_num * mid_channels
-        self.aggregation_conv = ConvBNAct(
-            in_channels=total_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-        )
-        self.att = ESEModule(out_channels)
-
-    def forward(self, x):
-        identity = x
-        output = []
-        output.append(x)
-        for layer in self.layers:
-            x = layer(x)
-            output.append(x)
-        x = torch.cat(output, dim=1)
-        x = self.aggregation_conv(x)
-        x = self.att(x)
-        if self.identity:
-            x += identity
-        return x
-
-
-class HG_Stage(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        mid_channels,
-        out_channels,
-        block_num,
-        layer_num,
-        downsample=True,
-        stride=[2, 1],
-    ):
-        super().__init__()
-        self.downsample = downsample
-        if downsample:
-            self.downsample = ConvBNAct(
-                in_channels=in_channels,
-                out_channels=in_channels,
-                kernel_size=3,
-                stride=stride,
-                groups=in_channels,
-                use_act=False,
-            )
-
-        blocks_list = []
-        blocks_list.append(
-            HG_Block(in_channels, mid_channels, out_channels, layer_num, identity=False)
-        )
-        for _ in range(block_num - 1):
-            blocks_list.append(
-                HG_Block(
-                    out_channels, mid_channels, out_channels, layer_num, identity=True
-                )
-            )
-        self.blocks = nn.Sequential(*blocks_list)
-
-    def forward(self, x):
-        if self.downsample:
-            x = self.downsample(x)
-        x = self.blocks(x)
-        return x
-
-
-class PPHGNet(nn.Module):
-    """
-    PPHGNet
-    Args:
-        stem_channels: list. Stem channel list of PPHGNet.
-        stage_config: dict. The configuration of each stage of PPHGNet. such as the number of channels, stride, etc.
-        layer_num: int. Number of layers of HG_Block.
-        use_last_conv: boolean. Whether to use a 1x1 convolutional layer before the classification layer.
-        class_expand: int=2048. Number of channels for the last 1x1 convolutional layer.
-        dropout_prob: float. Parameters of dropout, 0.0 means dropout is not used.
-        class_num: int=1000. The number of classes.
-    Returns:
-        model: nn.Layer. Specific PPHGNet model depends on args.
-    """
-
-    def __init__(
-        self,
-        stem_channels,
-        stage_config,
-        layer_num,
-        in_channels=3,
-        det=False,
-        out_indices=None,
-    ):
-        super().__init__()
-        self.det = det
-        self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]
-
-        # stem
-        stem_channels.insert(0, in_channels)
-        self.stem = nn.Sequential(
-            *[
-                ConvBNAct(
-                    in_channels=stem_channels[i],
-                    out_channels=stem_channels[i + 1],
-                    kernel_size=3,
-                    stride=2 if i == 0 else 1,
-                )
-                for i in range(len(stem_channels) - 1)
-            ]
-        )
-
-        if self.det:
-            self.pool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-        # stages
-        self.stages = nn.ModuleList()
-        self.out_channels = []
-        for block_id, k in enumerate(stage_config):
-            (
-                in_channels,
-                mid_channels,
-                out_channels,
-                block_num,
-                downsample,
-                stride,
-            ) = stage_config[k]
-            self.stages.append(
-                HG_Stage(
-                    in_channels,
-                    mid_channels,
-                    out_channels,
-                    block_num,
-                    layer_num,
-                    downsample,
-                    stride,
-                )
-            )
-            if block_id in self.out_indices:
-                self.out_channels.append(out_channels)
-
-        if not self.det:
-            self.out_channels = stage_config["stage4"][2]
-
-        self._init_weights()
-
-    def _init_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.ones_(m.weight)
-                nn.init.zeros_(m.bias)
-            elif isinstance(m, nn.Linear):
-                nn.init.zeros_(m.bias)
-
-    def forward(self, x):
-        x = self.stem(x)
-        if self.det:
-            x = self.pool(x)
-
-        out = []
-        for i, stage in enumerate(self.stages):
-            x = stage(x)
-            if self.det and i in self.out_indices:
-                out.append(x)
-        if self.det:
-            return out
-
-        if self.training:
-            x = F.adaptive_avg_pool2d(x, [1, 40])
-        else:
-            x = F.avg_pool2d(x, [3, 2])
-        return x
-
-
-def PPHGNet_small(pretrained=False, use_ssld=False, det=False, **kwargs):
-    """
-    PPHGNet_small
-    Args:
-        pretrained: bool=False or str. If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld: bool=False. Whether using distillation pretrained model when pretrained=True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNet_small` model depends on args.
-    """
-    stage_config_det = {
-        # in_channels, mid_channels, out_channels, blocks, downsample
-        "stage1": [128, 128, 256, 1, False, 2],
-        "stage2": [256, 160, 512, 1, True, 2],
-        "stage3": [512, 192, 768, 2, True, 2],
-        "stage4": [768, 224, 1024, 1, True, 2],
-    }
-
-    stage_config_rec = {
-        # in_channels, mid_channels, out_channels, blocks, downsample
-        "stage1": [128, 128, 256, 1, True, [2, 1]],
-        "stage2": [256, 160, 512, 1, True, [1, 2]],
-        "stage3": [512, 192, 768, 2, True, [2, 1]],
-        "stage4": [768, 224, 1024, 1, True, [2, 1]],
-    }
-
-    model = PPHGNet(
-        stem_channels=[64, 64, 128],
-        stage_config=stage_config_det if det else stage_config_rec,
-        layer_num=6,
-        det=det,
-        **kwargs
-    )
-    return model
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
deleted file mode 100644
index e2bd4572a767560c0a0250aec64fae0c9bdaee2c..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_lcnetv3.py
+++ /dev/null
@@ -1,516 +0,0 @@
-# copyright (c) 2021 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from __future__ import absolute_import, division, print_function
-
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-from ..common import Activation
-
-NET_CONFIG_det = {
-    "blocks2":
-    # k, in_c, out_c, s, use_se
-    [[3, 16, 32, 1, False]],
-    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
-    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
-    "blocks5": [
-        [3, 128, 256, 2, False],
-        [5, 256, 256, 1, False],
-        [5, 256, 256, 1, False],
-        [5, 256, 256, 1, False],
-        [5, 256, 256, 1, False],
-    ],
-    "blocks6": [
-        [5, 256, 512, 2, True],
-        [5, 512, 512, 1, True],
-        [5, 512, 512, 1, False],
-        [5, 512, 512, 1, False],
-    ],
-}
-
-NET_CONFIG_rec = {
-    "blocks2":
-    # k, in_c, out_c, s, use_se
-    [[3, 16, 32, 1, False]],
-    "blocks3": [[3, 32, 64, 1, False], [3, 64, 64, 1, False]],
-    "blocks4": [[3, 64, 128, (2, 1), False], [3, 128, 128, 1, False]],
-    "blocks5": [
-        [3, 128, 256, (1, 2), False],
-        [5, 256, 256, 1, False],
-        [5, 256, 256, 1, False],
-        [5, 256, 256, 1, False],
-        [5, 256, 256, 1, False],
-    ],
-    "blocks6": [
-        [5, 256, 512, (2, 1), True],
-        [5, 512, 512, 1, True],
-        [5, 512, 512, (2, 1), False],
-        [5, 512, 512, 1, False],
-    ],
-}
-
-
-def make_divisible(v, divisor=16, min_value=None):
-    if min_value is None:
-        min_value = divisor
-    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
-    if new_v < 0.9 * v:
-        new_v += divisor
-    return new_v
-
-
-class LearnableAffineBlock(nn.Module):
-    def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.1):
-        super().__init__()
-        self.scale = nn.Parameter(torch.Tensor([scale_value]))
-        self.bias = nn.Parameter(torch.Tensor([bias_value]))
-
-    def forward(self, x):
-        return self.scale * x + self.bias
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(
-        self, in_channels, out_channels, kernel_size, stride, groups=1, lr_mult=1.0
-    ):
-        super().__init__()
-        self.conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False,
-        )
-
-        self.bn = nn.BatchNorm2d(
-            out_channels,
-        )
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        return x
-
-
-class Act(nn.Module):
-    def __init__(self, act="hswish", lr_mult=1.0, lab_lr=0.1):
-        super().__init__()
-        if act == "hswish":
-            self.act = nn.Hardswish(inplace=True)
-        else:
-            assert act == "relu"
-            self.act = Activation(act)
-        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
-
-    def forward(self, x):
-        return self.lab(self.act(x))
-
-
-class LearnableRepLayer(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        groups=1,
-        num_conv_branches=1,
-        lr_mult=1.0,
-        lab_lr=0.1,
-    ):
-        super().__init__()
-        self.is_repped = False
-        self.groups = groups
-        self.stride = stride
-        self.kernel_size = kernel_size
-        self.in_channels = in_channels
-        self.out_channels = out_channels
-        self.num_conv_branches = num_conv_branches
-        self.padding = (kernel_size - 1) // 2
-
-        self.identity = (
-            nn.BatchNorm2d(
-                num_features=in_channels,
-            )
-            if out_channels == in_channels and stride == 1
-            else None
-        )
-
-        self.conv_kxk = nn.ModuleList(
-            [
-                ConvBNLayer(
-                    in_channels,
-                    out_channels,
-                    kernel_size,
-                    stride,
-                    groups=groups,
-                    lr_mult=lr_mult,
-                )
-                for _ in range(self.num_conv_branches)
-            ]
-        )
-
-        self.conv_1x1 = (
-            ConvBNLayer(
-                in_channels, out_channels, 1, stride, groups=groups, lr_mult=lr_mult
-            )
-            if kernel_size > 1
-            else None
-        )
-
-        self.lab = LearnableAffineBlock(lr_mult=lr_mult, lab_lr=lab_lr)
-        self.act = Act(lr_mult=lr_mult, lab_lr=lab_lr)
-
-    def forward(self, x):
-        # for export
-        if self.is_repped:
-            out = self.lab(self.reparam_conv(x))
-            if self.stride != 2:
-                out = self.act(out)
-            return out
-
-        out = 0
-        if self.identity is not None:
-            out += self.identity(x)
-
-        if self.conv_1x1 is not None:
-            out += self.conv_1x1(x)
-
-        for conv in self.conv_kxk:
-            out += conv(x)
-
-        out = self.lab(out)
-        if self.stride != 2:
-            out = self.act(out)
-        return out
-
-    def rep(self):
-        if self.is_repped:
-            return
-        kernel, bias = self._get_kernel_bias()
-        self.reparam_conv = nn.Conv2d(
-            in_channels=self.in_channels,
-            out_channels=self.out_channels,
-            kernel_size=self.kernel_size,
-            stride=self.stride,
-            padding=self.padding,
-            groups=self.groups,
-        )
-        self.reparam_conv.weight.data = kernel
-        self.reparam_conv.bias.data = bias
-        self.is_repped = True
-
-    def _pad_kernel_1x1_to_kxk(self, kernel1x1, pad):
-        if not isinstance(kernel1x1, torch.Tensor):
-            return 0
-        else:
-            return nn.functional.pad(kernel1x1, [pad, pad, pad, pad])
-
-    def _get_kernel_bias(self):
-        kernel_conv_1x1, bias_conv_1x1 = self._fuse_bn_tensor(self.conv_1x1)
-        kernel_conv_1x1 = self._pad_kernel_1x1_to_kxk(
-            kernel_conv_1x1, self.kernel_size // 2
-        )
-
-        kernel_identity, bias_identity = self._fuse_bn_tensor(self.identity)
-
-        kernel_conv_kxk = 0
-        bias_conv_kxk = 0
-        for conv in self.conv_kxk:
-            kernel, bias = self._fuse_bn_tensor(conv)
-            kernel_conv_kxk += kernel
-            bias_conv_kxk += bias
-
-        kernel_reparam = kernel_conv_kxk + kernel_conv_1x1 + kernel_identity
-        bias_reparam = bias_conv_kxk + bias_conv_1x1 + bias_identity
-        return kernel_reparam, bias_reparam
-
-    def _fuse_bn_tensor(self, branch):
-        if not branch:
-            return 0, 0
-        elif isinstance(branch, ConvBNLayer):
-            kernel = branch.conv.weight
-            running_mean = branch.bn._mean
-            running_var = branch.bn._variance
-            gamma = branch.bn.weight
-            beta = branch.bn.bias
-            eps = branch.bn._epsilon
-        else:
-            assert isinstance(branch, nn.BatchNorm2d)
-            if not hasattr(self, "id_tensor"):
-                input_dim = self.in_channels // self.groups
-                kernel_value = torch.zeros(
-                    (self.in_channels, input_dim, self.kernel_size, self.kernel_size),
-                    dtype=branch.weight.dtype,
-                )
-                for i in range(self.in_channels):
-                    kernel_value[
-                        i, i % input_dim, self.kernel_size // 2, self.kernel_size // 2
-                    ] = 1
-                self.id_tensor = kernel_value
-            kernel = self.id_tensor
-            running_mean = branch._mean
-            running_var = branch._variance
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch._epsilon
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape((-1, 1, 1, 1))
-        return kernel * t, beta - running_mean * gamma / std
-
-
-class SELayer(nn.Module):
-    def __init__(self, channel, reduction=4, lr_mult=1.0):
-        super().__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv1 = nn.Conv2d(
-            in_channels=channel,
-            out_channels=channel // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-        )
-        self.relu = nn.ReLU()
-        self.conv2 = nn.Conv2d(
-            in_channels=channel // reduction,
-            out_channels=channel,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-        )
-        self.hardsigmoid = nn.Hardsigmoid(inplace=True)
-
-    def forward(self, x):
-        identity = x
-        x = self.avg_pool(x)
-        x = self.conv1(x)
-        x = self.relu(x)
-        x = self.conv2(x)
-        x = self.hardsigmoid(x)
-        x = identity * x
-        return x
-
-
-class LCNetV3Block(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        stride,
-        dw_size,
-        use_se=False,
-        conv_kxk_num=4,
-        lr_mult=1.0,
-        lab_lr=0.1,
-    ):
-        super().__init__()
-        self.use_se = use_se
-        self.dw_conv = LearnableRepLayer(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=dw_size,
-            stride=stride,
-            groups=in_channels,
-            num_conv_branches=conv_kxk_num,
-            lr_mult=lr_mult,
-            lab_lr=lab_lr,
-        )
-        if use_se:
-            self.se = SELayer(in_channels, lr_mult=lr_mult)
-        self.pw_conv = LearnableRepLayer(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            num_conv_branches=conv_kxk_num,
-            lr_mult=lr_mult,
-            lab_lr=lab_lr,
-        )
-
-    def forward(self, x):
-        x = self.dw_conv(x)
-        if self.use_se:
-            x = self.se(x)
-        x = self.pw_conv(x)
-        return x
-
-
-class PPLCNetV3(nn.Module):
-    def __init__(
-        self,
-        scale=1.0,
-        conv_kxk_num=4,
-        lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0, 1.0],
-        lab_lr=0.1,
-        det=False,
-        **kwargs
-    ):
-        super().__init__()
-        self.scale = scale
-        self.lr_mult_list = lr_mult_list
-        self.det = det
-
-        self.net_config = NET_CONFIG_det if self.det else NET_CONFIG_rec
-
-        assert isinstance(
-            self.lr_mult_list, (list, tuple)
-        ), "lr_mult_list should be in (list, tuple) but got {}".format(
-            type(self.lr_mult_list)
-        )
-        assert (
-            len(self.lr_mult_list) == 6
-        ), "lr_mult_list length should be 6 but got {}".format(len(self.lr_mult_list))
-
-        self.conv1 = ConvBNLayer(
-            in_channels=3,
-            out_channels=make_divisible(16 * scale),
-            kernel_size=3,
-            stride=2,
-            lr_mult=self.lr_mult_list[0],
-        )
-
-        self.blocks2 = nn.Sequential(
-            *[
-                LCNetV3Block(
-                    in_channels=make_divisible(in_c * scale),
-                    out_channels=make_divisible(out_c * scale),
-                    dw_size=k,
-                    stride=s,
-                    use_se=se,
-                    conv_kxk_num=conv_kxk_num,
-                    lr_mult=self.lr_mult_list[1],
-                    lab_lr=lab_lr,
-                )
-                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks2"])
-            ]
-        )
-
-        self.blocks3 = nn.Sequential(
-            *[
-                LCNetV3Block(
-                    in_channels=make_divisible(in_c * scale),
-                    out_channels=make_divisible(out_c * scale),
-                    dw_size=k,
-                    stride=s,
-                    use_se=se,
-                    conv_kxk_num=conv_kxk_num,
-                    lr_mult=self.lr_mult_list[2],
-                    lab_lr=lab_lr,
-                )
-                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks3"])
-            ]
-        )
-
-        self.blocks4 = nn.Sequential(
-            *[
-                LCNetV3Block(
-                    in_channels=make_divisible(in_c * scale),
-                    out_channels=make_divisible(out_c * scale),
-                    dw_size=k,
-                    stride=s,
-                    use_se=se,
-                    conv_kxk_num=conv_kxk_num,
-                    lr_mult=self.lr_mult_list[3],
-                    lab_lr=lab_lr,
-                )
-                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks4"])
-            ]
-        )
-
-        self.blocks5 = nn.Sequential(
-            *[
-                LCNetV3Block(
-                    in_channels=make_divisible(in_c * scale),
-                    out_channels=make_divisible(out_c * scale),
-                    dw_size=k,
-                    stride=s,
-                    use_se=se,
-                    conv_kxk_num=conv_kxk_num,
-                    lr_mult=self.lr_mult_list[4],
-                    lab_lr=lab_lr,
-                )
-                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks5"])
-            ]
-        )
-
-        self.blocks6 = nn.Sequential(
-            *[
-                LCNetV3Block(
-                    in_channels=make_divisible(in_c * scale),
-                    out_channels=make_divisible(out_c * scale),
-                    dw_size=k,
-                    stride=s,
-                    use_se=se,
-                    conv_kxk_num=conv_kxk_num,
-                    lr_mult=self.lr_mult_list[5],
-                    lab_lr=lab_lr,
-                )
-                for i, (k, in_c, out_c, s, se) in enumerate(self.net_config["blocks6"])
-            ]
-        )
-        self.out_channels = make_divisible(512 * scale)
-
-        if self.det:
-            mv_c = [16, 24, 56, 480]
-            self.out_channels = [
-                make_divisible(self.net_config["blocks3"][-1][2] * scale),
-                make_divisible(self.net_config["blocks4"][-1][2] * scale),
-                make_divisible(self.net_config["blocks5"][-1][2] * scale),
-                make_divisible(self.net_config["blocks6"][-1][2] * scale),
-            ]
-
-            self.layer_list = nn.ModuleList(
-                [
-                    nn.Conv2d(self.out_channels[0], int(mv_c[0] * scale), 1, 1, 0),
-                    nn.Conv2d(self.out_channels[1], int(mv_c[1] * scale), 1, 1, 0),
-                    nn.Conv2d(self.out_channels[2], int(mv_c[2] * scale), 1, 1, 0),
-                    nn.Conv2d(self.out_channels[3], int(mv_c[3] * scale), 1, 1, 0),
-                ]
-            )
-            self.out_channels = [
-                int(mv_c[0] * scale),
-                int(mv_c[1] * scale),
-                int(mv_c[2] * scale),
-                int(mv_c[3] * scale),
-            ]
-
-    def forward(self, x):
-        out_list = []
-        x = self.conv1(x)
-        x = self.blocks2(x)
-        x = self.blocks3(x)
-        out_list.append(x)
-        x = self.blocks4(x)
-        out_list.append(x)
-        x = self.blocks5(x)
-        out_list.append(x)
-        x = self.blocks6(x)
-        out_list.append(x)
-
-        if self.det:
-            out_list[0] = self.layer_list[0](out_list[0])
-            out_list[1] = self.layer_list[1](out_list[1])
-            out_list[2] = self.layer_list[2](out_list[2])
-            out_list[3] = self.layer_list[3](out_list[3])
-            return out_list
-
-        if self.training:
-            x = F.adaptive_avg_pool2d(x, [1, 40])
-        else:
-            x = F.avg_pool2d(x, [3, 2])
-        return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
deleted file mode 100644
index d284a6d49a2b4abfab285643aa849b9e6bf2db37..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mobilenet_v3.py
+++ /dev/null
@@ -1,136 +0,0 @@
-from torch import nn
-
-from .det_mobilenet_v3 import ConvBNLayer, ResidualUnit, make_divisible
-
-
-class MobileNetV3(nn.Module):
-    def __init__(
-        self,
-        in_channels=3,
-        model_name="small",
-        scale=0.5,
-        large_stride=None,
-        small_stride=None,
-        **kwargs
-    ):
-        super(MobileNetV3, self).__init__()
-        if small_stride is None:
-            small_stride = [2, 2, 2, 2]
-        if large_stride is None:
-            large_stride = [1, 2, 2, 2]
-
-        assert isinstance(
-            large_stride, list
-        ), "large_stride type must " "be list but got {}".format(type(large_stride))
-        assert isinstance(
-            small_stride, list
-        ), "small_stride type must " "be list but got {}".format(type(small_stride))
-        assert (
-            len(large_stride) == 4
-        ), "large_stride length must be " "4 but got {}".format(len(large_stride))
-        assert (
-            len(small_stride) == 4
-        ), "small_stride length must be " "4 but got {}".format(len(small_stride))
-
-        if model_name == "large":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, False, "relu", large_stride[0]],
-                [3, 64, 24, False, "relu", (large_stride[1], 1)],
-                [3, 72, 24, False, "relu", 1],
-                [5, 72, 40, True, "relu", (large_stride[2], 1)],
-                [5, 120, 40, True, "relu", 1],
-                [5, 120, 40, True, "relu", 1],
-                [3, 240, 80, False, "hard_swish", 1],
-                [3, 200, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 184, 80, False, "hard_swish", 1],
-                [3, 480, 112, True, "hard_swish", 1],
-                [3, 672, 112, True, "hard_swish", 1],
-                [5, 672, 160, True, "hard_swish", (large_stride[3], 1)],
-                [5, 960, 160, True, "hard_swish", 1],
-                [5, 960, 160, True, "hard_swish", 1],
-            ]
-            cls_ch_squeeze = 960
-        elif model_name == "small":
-            cfg = [
-                # k, exp, c,  se,     nl,  s,
-                [3, 16, 16, True, "relu", (small_stride[0], 1)],
-                [3, 72, 24, False, "relu", (small_stride[1], 1)],
-                [3, 88, 24, False, "relu", 1],
-                [5, 96, 40, True, "hard_swish", (small_stride[2], 1)],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 240, 40, True, "hard_swish", 1],
-                [5, 120, 48, True, "hard_swish", 1],
-                [5, 144, 48, True, "hard_swish", 1],
-                [5, 288, 96, True, "hard_swish", (small_stride[3], 1)],
-                [5, 576, 96, True, "hard_swish", 1],
-                [5, 576, 96, True, "hard_swish", 1],
-            ]
-            cls_ch_squeeze = 576
-        else:
-            raise NotImplementedError(
-                "mode[" + model_name + "_model] is not implemented!"
-            )
-
-        supported_scale = [0.35, 0.5, 0.75, 1.0, 1.25]
-        assert (
-            scale in supported_scale
-        ), "supported scales are {} but input scale is {}".format(
-            supported_scale, scale
-        )
-
-        inplanes = 16
-        # conv1
-        self.conv1 = ConvBNLayer(
-            in_channels=in_channels,
-            out_channels=make_divisible(inplanes * scale),
-            kernel_size=3,
-            stride=2,
-            padding=1,
-            groups=1,
-            if_act=True,
-            act="hard_swish",
-            name="conv1",
-        )
-        i = 0
-        block_list = []
-        inplanes = make_divisible(inplanes * scale)
-        for k, exp, c, se, nl, s in cfg:
-            block_list.append(
-                ResidualUnit(
-                    in_channels=inplanes,
-                    mid_channels=make_divisible(scale * exp),
-                    out_channels=make_divisible(scale * c),
-                    kernel_size=k,
-                    stride=s,
-                    use_se=se,
-                    act=nl,
-                    name="conv" + str(i + 2),
-                )
-            )
-            inplanes = make_divisible(scale * c)
-            i += 1
-        self.blocks = nn.Sequential(*block_list)
-
-        self.conv2 = ConvBNLayer(
-            in_channels=inplanes,
-            out_channels=make_divisible(scale * cls_ch_squeeze),
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            groups=1,
-            if_act=True,
-            act="hard_swish",
-            name="conv_last",
-        )
-
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
-        self.out_channels = make_divisible(scale * cls_ch_squeeze)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.blocks(x)
-        x = self.conv2(x)
-        x = self.pool(x)
-        return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
deleted file mode 100644
index 447c48f6554c69fec68b77de25e0386cba4aaca8..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_mv1_enhance.py
+++ /dev/null
@@ -1,234 +0,0 @@
-import os, sys
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from ..common import Activation
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(self,
-                 num_channels,
-                 filter_size,
-                 num_filters,
-                 stride,
-                 padding,
-                 channels=None,
-                 num_groups=1,
-                 act='hard_swish'):
-        super(ConvBNLayer, self).__init__()
-        self.act = act
-        self._conv = nn.Conv2d(
-            in_channels=num_channels,
-            out_channels=num_filters,
-            kernel_size=filter_size,
-            stride=stride,
-            padding=padding,
-            groups=num_groups,
-            bias=False)
-
-        self._batch_norm = nn.BatchNorm2d(
-            num_filters,
-        )
-        if self.act is not None:
-            self._act = Activation(act_type=act, inplace=True)
-
-    def forward(self, inputs):
-        y = self._conv(inputs)
-        y = self._batch_norm(y)
-        if self.act is not None:
-            y = self._act(y)
-        return y
-
-
-class DepthwiseSeparable(nn.Module):
-    def __init__(self,
-                 num_channels,
-                 num_filters1,
-                 num_filters2,
-                 num_groups,
-                 stride,
-                 scale,
-                 dw_size=3,
-                 padding=1,
-                 use_se=False):
-        super(DepthwiseSeparable, self).__init__()
-        self.use_se = use_se
-        self._depthwise_conv = ConvBNLayer(
-            num_channels=num_channels,
-            num_filters=int(num_filters1 * scale),
-            filter_size=dw_size,
-            stride=stride,
-            padding=padding,
-            num_groups=int(num_groups * scale))
-        if use_se:
-            self._se = SEModule(int(num_filters1 * scale))
-        self._pointwise_conv = ConvBNLayer(
-            num_channels=int(num_filters1 * scale),
-            filter_size=1,
-            num_filters=int(num_filters2 * scale),
-            stride=1,
-            padding=0)
-
-    def forward(self, inputs):
-        y = self._depthwise_conv(inputs)
-        if self.use_se:
-            y = self._se(y)
-        y = self._pointwise_conv(y)
-        return y
-
-
-class MobileNetV1Enhance(nn.Module):
-    def __init__(self,
-                 in_channels=3,
-                 scale=0.5,
-                 last_conv_stride=1,
-                 last_pool_type='max',
-                 **kwargs):
-        super().__init__()
-        self.scale = scale
-        self.block_list = []
-
-        self.conv1 = ConvBNLayer(
-            num_channels=in_channels,
-            filter_size=3,
-            channels=3,
-            num_filters=int(32 * scale),
-            stride=2,
-            padding=1)
-
-        conv2_1 = DepthwiseSeparable(
-            num_channels=int(32 * scale),
-            num_filters1=32,
-            num_filters2=64,
-            num_groups=32,
-            stride=1,
-            scale=scale)
-        self.block_list.append(conv2_1)
-
-        conv2_2 = DepthwiseSeparable(
-            num_channels=int(64 * scale),
-            num_filters1=64,
-            num_filters2=128,
-            num_groups=64,
-            stride=1,
-            scale=scale)
-        self.block_list.append(conv2_2)
-
-        conv3_1 = DepthwiseSeparable(
-            num_channels=int(128 * scale),
-            num_filters1=128,
-            num_filters2=128,
-            num_groups=128,
-            stride=1,
-            scale=scale)
-        self.block_list.append(conv3_1)
-
-        conv3_2 = DepthwiseSeparable(
-            num_channels=int(128 * scale),
-            num_filters1=128,
-            num_filters2=256,
-            num_groups=128,
-            stride=(2, 1),
-            scale=scale)
-        self.block_list.append(conv3_2)
-
-        conv4_1 = DepthwiseSeparable(
-            num_channels=int(256 * scale),
-            num_filters1=256,
-            num_filters2=256,
-            num_groups=256,
-            stride=1,
-            scale=scale)
-        self.block_list.append(conv4_1)
-
-        conv4_2 = DepthwiseSeparable(
-            num_channels=int(256 * scale),
-            num_filters1=256,
-            num_filters2=512,
-            num_groups=256,
-            stride=(2, 1),
-            scale=scale)
-        self.block_list.append(conv4_2)
-
-        for _ in range(5):
-            conv5 = DepthwiseSeparable(
-                num_channels=int(512 * scale),
-                num_filters1=512,
-                num_filters2=512,
-                num_groups=512,
-                stride=1,
-                dw_size=5,
-                padding=2,
-                scale=scale,
-                use_se=False)
-            self.block_list.append(conv5)
-
-        conv5_6 = DepthwiseSeparable(
-            num_channels=int(512 * scale),
-            num_filters1=512,
-            num_filters2=1024,
-            num_groups=512,
-            stride=(2, 1),
-            dw_size=5,
-            padding=2,
-            scale=scale,
-            use_se=True)
-        self.block_list.append(conv5_6)
-
-        conv6 = DepthwiseSeparable(
-            num_channels=int(1024 * scale),
-            num_filters1=1024,
-            num_filters2=1024,
-            num_groups=1024,
-            stride=last_conv_stride,
-            dw_size=5,
-            padding=2,
-            use_se=True,
-            scale=scale)
-        self.block_list.append(conv6)
-
-        self.block_list = nn.Sequential(*self.block_list)
-        if last_pool_type == 'avg':
-            self.pool = nn.AvgPool2d(kernel_size=2, stride=2, padding=0)
-        else:
-            self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
-        self.out_channels = int(1024 * scale)
-
-    def forward(self, inputs):
-        y = self.conv1(inputs)
-        y = self.block_list(y)
-        y = self.pool(y)
-        return y
-
-def hardsigmoid(x):
-    return F.relu6(x + 3., inplace=True) / 6.
-
-class SEModule(nn.Module):
-    def __init__(self, channel, reduction=4):
-        super(SEModule, self).__init__()
-        self.avg_pool = nn.AdaptiveAvgPool2d(1)
-        self.conv1 = nn.Conv2d(
-            in_channels=channel,
-            out_channels=channel // reduction,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=True)
-        self.conv2 = nn.Conv2d(
-            in_channels=channel // reduction,
-            out_channels=channel,
-            kernel_size=1,
-            stride=1,
-            padding=0,
-            bias=True)
-
-    def forward(self, inputs):
-        outputs = self.avg_pool(inputs)
-        outputs = self.conv1(outputs)
-        outputs = F.relu(outputs)
-        outputs = self.conv2(outputs)
-        outputs = hardsigmoid(outputs)
-        x = torch.mul(inputs, outputs)
-
-        return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py
deleted file mode 100644
index 390ca4c61b4b7fd1635d5229d5ef3d79fc3509fe..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_pphgnetv2.py
+++ /dev/null
@@ -1,810 +0,0 @@
-import math
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-
-class AdaptiveAvgPool2D(nn.AdaptiveAvgPool2d):
-    def __init__(self, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        if isinstance(self.output_size, int) and self.output_size == 1:
-            self._gap = True
-        elif (
-            isinstance(self.output_size, tuple)
-            and self.output_size[0] == 1
-            and self.output_size[1] == 1
-        ):
-            self._gap = True
-        else:
-            self._gap = False
-
-    def forward(self, x):
-        if self._gap:
-            # Global Average Pooling
-            N, C, _, _ = x.shape
-            x_mean = torch.mean(x, dim=[2, 3])
-            x_mean = torch.reshape(x_mean, [N, C, 1, 1])
-            return x_mean
-        else:
-            return F.adaptive_avg_pool2d(
-                x,
-                output_size=self.output_size
-            )
-
-class LearnableAffineBlock(nn.Module):
-    """
-    Create a learnable affine block module. This module can significantly improve accuracy on smaller models.
-
-    Args:
-        scale_value (float): The initial value of the scale parameter, default is 1.0.
-        bias_value (float): The initial value of the bias parameter, default is 0.0.
-        lr_mult (float): The learning rate multiplier, default is 1.0.
-        lab_lr (float): The learning rate, default is 0.01.
-    """
-
-    def __init__(self, scale_value=1.0, bias_value=0.0, lr_mult=1.0, lab_lr=0.01):
-        super().__init__()
-        self.scale = nn.Parameter(torch.Tensor([scale_value]))
-        self.bias = nn.Parameter(torch.Tensor([bias_value]))
-
-    def forward(self, x):
-        return self.scale * x + self.bias
-
-
-class ConvBNAct(nn.Module):
-    """
-    ConvBNAct is a combination of convolution and batchnorm layers.
-
-    Args:
-        in_channels (int): Number of input channels.
-        out_channels (int): Number of output channels.
-        kernel_size (int): Size of the convolution kernel. Defaults to 3.
-        stride (int): Stride of the convolution. Defaults to 1.
-        padding (int/str): Padding or padding type for the convolution. Defaults to 1.
-        groups (int): Number of groups for the convolution. Defaults to 1.
-        use_act: (bool): Whether to use activation function. Defaults to True.
-        use_lab (bool): Whether to use the LAB operation. Defaults to False.
-        lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size=3,
-        stride=1,
-        padding=1,
-        groups=1,
-        use_act=True,
-        use_lab=False,
-        lr_mult=1.0,
-    ):
-        super().__init__()
-        self.use_act = use_act
-        self.use_lab = use_lab
-
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride,
-            padding=padding if isinstance(padding, str) else (kernel_size - 1) // 2,
-            # padding=(kernel_size - 1) // 2,
-            groups=groups,
-            bias=False,
-        )
-        self.bn = nn.BatchNorm2d(
-            out_channels,
-        )
-        if self.use_act:
-            self.act = nn.ReLU()
-            if self.use_lab:
-                self.lab = LearnableAffineBlock(lr_mult=lr_mult)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        if self.use_act:
-            x = self.act(x)
-            if self.use_lab:
-                x = self.lab(x)
-        return x
-
-
-class LightConvBNAct(nn.Module):
-    """
-    LightConvBNAct is a combination of pw and dw layers.
-
-    Args:
-        in_channels (int): Number of input channels.
-        out_channels (int): Number of output channels.
-        kernel_size (int): Size of the depth-wise convolution kernel.
-        use_lab (bool): Whether to use the LAB operation. Defaults to False.
-        lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        use_lab=False,
-        lr_mult=1.0,
-        **kwargs,
-    ):
-        super().__init__()
-        self.conv1 = ConvBNAct(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            use_act=False,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.conv2 = ConvBNAct(
-            in_channels=out_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            groups=out_channels,
-            use_act=True,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.conv2(x)
-        return x
-
-
-class CustomMaxPool2d(nn.Module):
-    def __init__(
-            self,
-            kernel_size,
-            stride=None,
-            padding=0,
-            dilation=1,
-            return_indices=False,
-            ceil_mode=False,
-            data_format="NCHW",
-    ):
-        super(CustomMaxPool2d, self).__init__()
-        self.kernel_size = kernel_size if isinstance(kernel_size, (tuple, list)) else (kernel_size, kernel_size)
-        self.stride = stride if stride is not None else self.kernel_size
-        self.stride = self.stride if isinstance(self.stride, (tuple, list)) else (self.stride, self.stride)
-        self.dilation = dilation if isinstance(dilation, (tuple, list)) else (dilation, dilation)
-        self.return_indices = return_indices
-        self.ceil_mode = ceil_mode
-        self.padding_mode = padding
-
-        # 当padding不是"same"时使用标准MaxPool2d
-        if padding != "same":
-            self.padding = padding if isinstance(padding, (tuple, list)) else (padding, padding)
-            self.pool = nn.MaxPool2d(
-                kernel_size=self.kernel_size,
-                stride=self.stride,
-                padding=self.padding,
-                dilation=self.dilation,
-                return_indices=self.return_indices,
-                ceil_mode=self.ceil_mode
-            )
-
-    def forward(self, x):
-        # 处理same padding
-        if self.padding_mode == "same":
-            input_height, input_width = x.size(2), x.size(3)
-
-            # 计算期望的输出尺寸
-            out_height = math.ceil(input_height / self.stride[0])
-            out_width = math.ceil(input_width / self.stride[1])
-
-            # 计算需要的padding
-            pad_height = max((out_height - 1) * self.stride[0] + self.kernel_size[0] - input_height, 0)
-            pad_width = max((out_width - 1) * self.stride[1] + self.kernel_size[1] - input_width, 0)
-
-            # 将padding分配到两边
-            pad_top = pad_height // 2
-            pad_bottom = pad_height - pad_top
-            pad_left = pad_width // 2
-            pad_right = pad_width - pad_left
-
-            # 应用padding
-            x = F.pad(x, (pad_left, pad_right, pad_top, pad_bottom))
-
-            # 使用标准max_pool2d函数
-            if self.return_indices:
-                return F.max_pool2d_with_indices(
-                    x,
-                    kernel_size=self.kernel_size,
-                    stride=self.stride,
-                    padding=0,  # 已经手动pad过了
-                    dilation=self.dilation,
-                    ceil_mode=self.ceil_mode
-                )
-            else:
-                return F.max_pool2d(
-                    x,
-                    kernel_size=self.kernel_size,
-                    stride=self.stride,
-                    padding=0,  # 已经手动pad过了
-                    dilation=self.dilation,
-                    ceil_mode=self.ceil_mode
-                )
-        else:
-            # 使用预定义的MaxPool2d
-            return self.pool(x)
-
-class StemBlock(nn.Module):
-    """
-    StemBlock for PP-HGNetV2.
-
-    Args:
-        in_channels (int): Number of input channels.
-        mid_channels (int): Number of middle channels.
-        out_channels (int): Number of output channels.
-        use_lab (bool): Whether to use the LAB operation. Defaults to False.
-        lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        mid_channels,
-        out_channels,
-        use_lab=False,
-        lr_mult=1.0,
-        text_rec=False,
-    ):
-        super().__init__()
-        self.stem1 = ConvBNAct(
-            in_channels=in_channels,
-            out_channels=mid_channels,
-            kernel_size=3,
-            stride=2,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.stem2a = ConvBNAct(
-            in_channels=mid_channels,
-            out_channels=mid_channels // 2,
-            kernel_size=2,
-            stride=1,
-            padding="same",
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.stem2b = ConvBNAct(
-            in_channels=mid_channels // 2,
-            out_channels=mid_channels,
-            kernel_size=2,
-            stride=1,
-            padding="same",
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.stem3 = ConvBNAct(
-            in_channels=mid_channels * 2,
-            out_channels=mid_channels,
-            kernel_size=3,
-            stride=1 if text_rec else 2,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.stem4 = ConvBNAct(
-            in_channels=mid_channels,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.pool = CustomMaxPool2d(
-            kernel_size=2, stride=1, ceil_mode=True, padding="same"
-        )
-        # self.pool = nn.MaxPool2d(
-        #     kernel_size=2, stride=1, ceil_mode=True, padding=1
-        # )
-
-    def forward(self, x):
-        x = self.stem1(x)
-        x2 = self.stem2a(x)
-        x2 = self.stem2b(x2)
-        x1 = self.pool(x)
-
-        # if x1.shape[2:] != x2.shape[2:]:
-        #     x1 = F.interpolate(x1, size=x2.shape[2:], mode='bilinear', align_corners=False)
-
-        x = torch.cat([x1, x2], 1)
-        x = self.stem3(x)
-        x = self.stem4(x)
-
-        return x
-
-
-class HGV2_Block(nn.Module):
-    """
-    HGV2_Block, the basic unit that constitutes the HGV2_Stage.
-
-    Args:
-        in_channels (int): Number of input channels.
-        mid_channels (int): Number of middle channels.
-        out_channels (int): Number of output channels.
-        kernel_size (int): Size of the convolution kernel. Defaults to 3.
-        layer_num (int): Number of layers in the HGV2 block. Defaults to 6.
-        stride (int): Stride of the convolution. Defaults to 1.
-        padding (int/str): Padding or padding type for the convolution. Defaults to 1.
-        groups (int): Number of groups for the convolution. Defaults to 1.
-        use_act (bool): Whether to use activation function. Defaults to True.
-        use_lab (bool): Whether to use the LAB operation. Defaults to False.
-        lr_mult (float): Learning rate multiplier for the layer. Defaults to 1.0.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        mid_channels,
-        out_channels,
-        kernel_size=3,
-        layer_num=6,
-        identity=False,
-        light_block=True,
-        use_lab=False,
-        lr_mult=1.0,
-    ):
-        super().__init__()
-        self.identity = identity
-
-        self.layers = nn.ModuleList()
-        block_type = "LightConvBNAct" if light_block else "ConvBNAct"
-        for i in range(layer_num):
-            self.layers.append(
-                eval(block_type)(
-                    in_channels=in_channels if i == 0 else mid_channels,
-                    out_channels=mid_channels,
-                    stride=1,
-                    kernel_size=kernel_size,
-                    use_lab=use_lab,
-                    lr_mult=lr_mult,
-                )
-            )
-        # feature aggregation
-        total_channels = in_channels + layer_num * mid_channels
-        self.aggregation_squeeze_conv = ConvBNAct(
-            in_channels=total_channels,
-            out_channels=out_channels // 2,
-            kernel_size=1,
-            stride=1,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-        self.aggregation_excitation_conv = ConvBNAct(
-            in_channels=out_channels // 2,
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            use_lab=use_lab,
-            lr_mult=lr_mult,
-        )
-
-    def forward(self, x):
-        identity = x
-        output = []
-        output.append(x)
-        for layer in self.layers:
-            x = layer(x)
-            output.append(x)
-        x = torch.cat(output, dim=1)
-        x = self.aggregation_squeeze_conv(x)
-        x = self.aggregation_excitation_conv(x)
-        if self.identity:
-            x += identity
-        return x
-
-
-class HGV2_Stage(nn.Module):
-    """
-    HGV2_Stage, the basic unit that constitutes the PPHGNetV2.
-
-    Args:
-        in_channels (int): Number of input channels.
-        mid_channels (int): Number of middle channels.
-        out_channels (int): Number of output channels.
-        block_num (int): Number of blocks in the HGV2 stage.
-        layer_num (int): Number of layers in the HGV2 block. Defaults to 6.
-        is_downsample (bool): Whether to use downsampling operation. Defaults to False.
-        light_block (bool): Whether to use light block. Defaults to True.
-        kernel_size (int): Size of the convolution kernel. Defaults to 3.
-        use_lab (bool, optional): Whether to use the LAB operation. Defaults to False.
-        lr_mult (float, optional): Learning rate multiplier for the layer. Defaults to 1.0.
-    """
-
-    def __init__(
-        self,
-        in_channels,
-        mid_channels,
-        out_channels,
-        block_num,
-        layer_num=6,
-        is_downsample=True,
-        light_block=True,
-        kernel_size=3,
-        use_lab=False,
-        stride=2,
-        lr_mult=1.0,
-    ):
-
-        super().__init__()
-        self.is_downsample = is_downsample
-        if self.is_downsample:
-            self.downsample = ConvBNAct(
-                in_channels=in_channels,
-                out_channels=in_channels,
-                kernel_size=3,
-                stride=stride,
-                groups=in_channels,
-                use_act=False,
-                use_lab=use_lab,
-                lr_mult=lr_mult,
-            )
-
-        blocks_list = []
-        for i in range(block_num):
-            blocks_list.append(
-                HGV2_Block(
-                    in_channels=in_channels if i == 0 else out_channels,
-                    mid_channels=mid_channels,
-                    out_channels=out_channels,
-                    kernel_size=kernel_size,
-                    layer_num=layer_num,
-                    identity=False if i == 0 else True,
-                    light_block=light_block,
-                    use_lab=use_lab,
-                    lr_mult=lr_mult,
-                )
-            )
-        self.blocks = nn.Sequential(*blocks_list)
-
-    def forward(self, x):
-        if self.is_downsample:
-            x = self.downsample(x)
-        x = self.blocks(x)
-        return x
-
-
-class DropoutInferDownscale(nn.Module):
-    """
-    实现与Paddle的mode="downscale_in_infer"等效的Dropout
-    训练模式：out = input * mask（直接应用掩码，不进行放大）
-    推理模式：out = input * (1.0 - p)（在推理时按概率缩小）
-    """
-
-    def __init__(self, p=0.5):
-        super().__init__()
-        self.p = p
-
-    def forward(self, x):
-        if self.training:
-            # 训练时：应用随机mask但不放大
-            return F.dropout(x, self.p, training=True) * (1.0 - self.p)
-        else:
-            # 推理时：按照dropout概率缩小输出
-            return x * (1.0 - self.p)
-
-class PPHGNetV2(nn.Module):
-    """
-    PPHGNetV2
-
-    Args:
-        stage_config (dict): Config for PPHGNetV2 stages. such as the number of channels, stride, etc.
-        stem_channels: (list): Number of channels of the stem of the PPHGNetV2.
-        use_lab (bool): Whether to use the LAB operation. Defaults to False.
-        use_last_conv (bool): Whether to use the last conv layer as the output channel. Defaults to True.
-        class_expand (int): Number of channels for the last 1x1 convolutional layer.
-        drop_prob (float): Dropout probability for the last 1x1 convolutional layer. Defaults to 0.0.
-        class_num (int): The number of classes for the classification layer. Defaults to 1000.
-        lr_mult_list (list): Learning rate multiplier for the stages. Defaults to [1.0, 1.0, 1.0, 1.0, 1.0].
-    Returns:
-        model: nn.Layer. Specific PPHGNetV2 model depends on args.
-    """
-
-    def __init__(
-        self,
-        stage_config,
-        stem_channels=[3, 32, 64],
-        use_lab=False,
-        use_last_conv=True,
-        class_expand=2048,
-        dropout_prob=0.0,
-        class_num=1000,
-        lr_mult_list=[1.0, 1.0, 1.0, 1.0, 1.0],
-        det=False,
-        text_rec=False,
-        out_indices=None,
-        **kwargs,
-    ):
-        super().__init__()
-        self.det = det
-        self.text_rec = text_rec
-        self.use_lab = use_lab
-        self.use_last_conv = use_last_conv
-        self.class_expand = class_expand
-        self.class_num = class_num
-        self.out_indices = out_indices if out_indices is not None else [0, 1, 2, 3]
-        self.out_channels = []
-
-        # stem
-        self.stem = StemBlock(
-            in_channels=stem_channels[0],
-            mid_channels=stem_channels[1],
-            out_channels=stem_channels[2],
-            use_lab=use_lab,
-            lr_mult=lr_mult_list[0],
-            text_rec=text_rec,
-        )
-
-        # stages
-        self.stages = nn.ModuleList()
-        for i, k in enumerate(stage_config):
-            (
-                in_channels,
-                mid_channels,
-                out_channels,
-                block_num,
-                is_downsample,
-                light_block,
-                kernel_size,
-                layer_num,
-                stride,
-            ) = stage_config[k]
-            self.stages.append(
-                HGV2_Stage(
-                    in_channels,
-                    mid_channels,
-                    out_channels,
-                    block_num,
-                    layer_num,
-                    is_downsample,
-                    light_block,
-                    kernel_size,
-                    use_lab,
-                    stride,
-                    lr_mult=lr_mult_list[i + 1],
-                )
-            )
-            if i in self.out_indices:
-                self.out_channels.append(out_channels)
-        if not self.det:
-            self.out_channels = stage_config["stage4"][2]
-
-        self.avg_pool = AdaptiveAvgPool2D(1)
-
-        if self.use_last_conv:
-            self.last_conv = nn.Conv2d(
-                in_channels=out_channels,
-                out_channels=self.class_expand,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=False,
-            )
-            self.act = nn.ReLU()
-            if self.use_lab:
-                self.lab = LearnableAffineBlock()
-            self.dropout = DropoutInferDownscale(p=dropout_prob)
-
-        self.flatten = nn.Flatten(start_dim=1, end_dim=-1)
-        if not self.det:
-            self.fc = nn.Linear(
-                self.class_expand if self.use_last_conv else out_channels,
-                self.class_num,
-            )
-
-        self._init_weights()
-
-    def _init_weights(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(m.weight)
-            elif isinstance(m, nn.BatchNorm2d):
-                nn.init.ones_(m.weight)
-                nn.init.zeros_(m.bias)
-            elif isinstance(m, nn.Linear):
-                nn.init.zeros_(m.bias)
-
-    def forward(self, x):
-        x = self.stem(x)
-        out = []
-        for i, stage in enumerate(self.stages):
-            x = stage(x)
-            if self.det and i in self.out_indices:
-                out.append(x)
-        if self.det:
-            return out
-
-        if self.text_rec:
-            if self.training:
-                x = F.adaptive_avg_pool2d(x, [1, 40])
-            else:
-                x = F.avg_pool2d(x, [3, 2])
-        return x
-
-
-def PPHGNetV2_B0(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNetV2_B0
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B0` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [16, 16, 64, 1, False, False, 3, 3],
-        "stage2": [64, 32, 256, 1, True, False, 3, 3],
-        "stage3": [256, 64, 512, 2, True, True, 5, 3],
-        "stage4": [512, 128, 1024, 1, True, True, 5, 3],
-    }
-
-    model = PPHGNetV2(
-        stem_channels=[3, 16, 16], stage_config=stage_config, use_lab=True, **kwargs
-    )
-    return model
-
-
-def PPHGNetV2_B1(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNetV2_B1
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B1` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [32, 32, 64, 1, False, False, 3, 3],
-        "stage2": [64, 48, 256, 1, True, False, 3, 3],
-        "stage3": [256, 96, 512, 2, True, True, 5, 3],
-        "stage4": [512, 192, 1024, 1, True, True, 5, 3],
-    }
-
-    model = PPHGNetV2(
-        stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs
-    )
-    return model
-
-
-def PPHGNetV2_B2(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNetV2_B2
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B2` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [32, 32, 96, 1, False, False, 3, 4],
-        "stage2": [96, 64, 384, 1, True, False, 3, 4],
-        "stage3": [384, 128, 768, 3, True, True, 5, 4],
-        "stage4": [768, 256, 1536, 1, True, True, 5, 4],
-    }
-
-    model = PPHGNetV2(
-        stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs
-    )
-    return model
-
-
-def PPHGNetV2_B3(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNetV2_B3
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B3` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [32, 32, 128, 1, False, False, 3, 5],
-        "stage2": [128, 64, 512, 1, True, False, 3, 5],
-        "stage3": [512, 128, 1024, 3, True, True, 5, 5],
-        "stage4": [1024, 256, 2048, 1, True, True, 5, 5],
-    }
-
-    model = PPHGNetV2(
-        stem_channels=[3, 24, 32], stage_config=stage_config, use_lab=True, **kwargs
-    )
-    return model
-
-
-def PPHGNetV2_B4(pretrained=False, use_ssld=False, det=False, text_rec=False, **kwargs):
-    """
-    PPHGNetV2_B4
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B4` model depends on args.
-    """
-    stage_config_rec = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num, stride
-        "stage1": [48, 48, 128, 1, True, False, 3, 6, [2, 1]],
-        "stage2": [128, 96, 512, 1, True, False, 3, 6, [1, 2]],
-        "stage3": [512, 192, 1024, 3, True, True, 5, 6, [2, 1]],
-        "stage4": [1024, 384, 2048, 1, True, True, 5, 6, [2, 1]],
-    }
-
-    stage_config_det = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [48, 48, 128, 1, False, False, 3, 6, 2],
-        "stage2": [128, 96, 512, 1, True, False, 3, 6, 2],
-        "stage3": [512, 192, 1024, 3, True, True, 5, 6, 2],
-        "stage4": [1024, 384, 2048, 1, True, True, 5, 6, 2],
-    }
-    model = PPHGNetV2(
-        stem_channels=[3, 32, 48],
-        stage_config=stage_config_det if det else stage_config_rec,
-        use_lab=False,
-        det=det,
-        text_rec=text_rec,
-        **kwargs,
-    )
-    return model
-
-
-def PPHGNetV2_B5(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNetV2_B5
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B5` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [64, 64, 128, 1, False, False, 3, 6],
-        "stage2": [128, 128, 512, 2, True, False, 3, 6],
-        "stage3": [512, 256, 1024, 5, True, True, 5, 6],
-        "stage4": [1024, 512, 2048, 2, True, True, 5, 6],
-    }
-
-    model = PPHGNetV2(
-        stem_channels=[3, 32, 64], stage_config=stage_config, use_lab=False, **kwargs
-    )
-    return model
-
-
-def PPHGNetV2_B6(pretrained=False, use_ssld=False, **kwargs):
-    """
-    PPHGNetV2_B6
-    Args:
-        pretrained (bool/str): If `True` load pretrained parameters, `False` otherwise.
-                    If str, means the path of the pretrained model.
-        use_ssld (bool) Whether using ssld pretrained model when pretrained is True.
-    Returns:
-        model: nn.Layer. Specific `PPHGNetV2_B6` model depends on args.
-    """
-    stage_config = {
-        # in_channels, mid_channels, out_channels, num_blocks, is_downsample, light_block, kernel_size, layer_num
-        "stage1": [96, 96, 192, 2, False, False, 3, 6],
-        "stage2": [192, 192, 512, 3, True, False, 3, 6],
-        "stage3": [512, 384, 1024, 6, True, True, 5, 6],
-        "stage4": [1024, 768, 2048, 3, True, True, 5, 6],
-    }
-
-    model = PPHGNetV2(
-        stem_channels=[3, 48, 96], stage_config=stage_config, use_lab=False, **kwargs
-    )
-    return model
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
deleted file mode 100644
index 3a117736d9456723055a83e5e0195267d1be513a..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/backbones/rec_svtrnet.py
+++ /dev/null
@@ -1,638 +0,0 @@
-import numpy as np
-import torch
-from torch import nn
-
-from ..common import Activation
-
-
-def drop_path(x, drop_prob=0.0, training=False):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
-    """
-    if drop_prob == 0.0 or not training:
-        return x
-    keep_prob = torch.as_tensor(1 - drop_prob)
-    shape = (x.shape[0],) + (1,) * (x.ndim - 1)
-    random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype)
-    random_tensor = torch.floor(random_tensor)  # binarize
-    output = x.divide(keep_prob) * random_tensor
-    return output
-
-
-class ConvBNLayer(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size=3,
-        stride=1,
-        padding=0,
-        bias_attr=False,
-        groups=1,
-        act="gelu",
-    ):
-        super().__init__()
-        self.conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=out_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias=bias_attr,
-        )
-        self.norm = nn.BatchNorm2d(out_channels)
-        self.act = Activation(act_type=act, inplace=True)
-
-    def forward(self, inputs):
-        out = self.conv(inputs)
-        out = self.norm(out)
-        out = self.act(out)
-        return out
-
-
-class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks)."""
-
-    def __init__(self, drop_prob=None):
-        super(DropPath, self).__init__()
-        self.drop_prob = drop_prob
-
-    def forward(self, x):
-        return drop_path(x, self.drop_prob, self.training)
-
-
-class Identity(nn.Module):
-    def __init__(self):
-        super(Identity, self).__init__()
-
-    def forward(self, input):
-        return input
-
-
-class Mlp(nn.Module):
-    def __init__(
-        self,
-        in_features,
-        hidden_features=None,
-        out_features=None,
-        act_layer="gelu",
-        drop=0.0,
-    ):
-        super().__init__()
-        out_features = out_features or in_features
-        hidden_features = hidden_features or in_features
-        self.fc1 = nn.Linear(in_features, hidden_features)
-        self.act = Activation(act_type=act_layer, inplace=True)
-        self.fc2 = nn.Linear(hidden_features, out_features)
-        self.drop = nn.Dropout(drop)
-
-    def forward(self, x):
-        x = self.fc1(x)
-        x = self.act(x)
-        x = self.drop(x)
-        x = self.fc2(x)
-        x = self.drop(x)
-        return x
-
-
-class ConvMixer(nn.Module):
-    def __init__(
-        self,
-        dim,
-        num_heads=8,
-        HW=[8, 25],
-        local_k=[3, 3],
-    ):
-        super().__init__()
-        self.HW = HW
-        self.dim = dim
-        self.local_mixer = nn.Conv2d(
-            dim,
-            dim,
-            local_k,
-            1,
-            [local_k[0] // 2, local_k[1] // 2],
-            groups=num_heads,
-        )
-
-    def forward(self, x):
-        h = self.HW[0]
-        w = self.HW[1]
-        x = x.transpose([0, 2, 1]).reshape([0, self.dim, h, w])
-        x = self.local_mixer(x)
-        x = x.flatten(2).permute(0, 2, 1)
-        return x
-
-
-class Attention(nn.Module):
-    def __init__(
-        self,
-        dim,
-        num_heads=8,
-        mixer="Global",
-        HW=[8, 25],
-        local_k=[7, 11],
-        qkv_bias=False,
-        qk_scale=None,
-        attn_drop=0.0,
-        proj_drop=0.0,
-    ):
-        super().__init__()
-        self.num_heads = num_heads
-        head_dim = dim // num_heads
-        self.scale = qk_scale or head_dim**-0.5
-        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
-        self.attn_drop = nn.Dropout(attn_drop)
-        self.proj = nn.Linear(dim, dim)
-        self.proj_drop = nn.Dropout(proj_drop)
-        self.HW = HW
-        if HW is not None:
-            H = HW[0]
-            W = HW[1]
-            self.N = H * W
-            self.C = dim
-        if mixer == "Local" and HW is not None:
-            hk = local_k[0]
-            wk = local_k[1]
-            mask = torch.ones(H * W, H + hk - 1, W + wk - 1, dtype=torch.float32)
-            for h in range(0, H):
-                for w in range(0, W):
-                    mask[h * W + w, h : h + hk, w : w + wk] = 0.0
-            mask_paddle = mask[:, hk // 2 : H + hk // 2, wk // 2 : W + wk // 2].flatten(
-                1
-            )
-            mask_inf = torch.full(
-                [H * W, H * W], fill_value=float("-Inf"), dtype=torch.float32
-            )
-            mask = torch.where(mask_paddle < 1, mask_paddle, mask_inf)
-            self.mask = mask.unsqueeze(0).unsqueeze(1)
-            # self.mask = mask[None, None, :]
-        self.mixer = mixer
-
-    def forward(self, x):
-        if self.HW is not None:
-            N = self.N
-            C = self.C
-        else:
-            _, N, C = x.shape
-        qkv = self.qkv(x)
-        qkv = qkv.reshape((-1, N, 3, self.num_heads, C // self.num_heads)).permute(
-            2, 0, 3, 1, 4
-        )
-        q, k, v = qkv[0] * self.scale, qkv[1], qkv[2]
-
-        attn = q.matmul(k.permute(0, 1, 3, 2))
-        if self.mixer == "Local":
-            attn += self.mask
-        attn = nn.functional.softmax(attn, dim=-1)
-        attn = self.attn_drop(attn)
-
-        x = (attn.matmul(v)).permute(0, 2, 1, 3).reshape((-1, N, C))
-        x = self.proj(x)
-        x = self.proj_drop(x)
-        return x
-
-
-class Block(nn.Module):
-    def __init__(
-        self,
-        dim,
-        num_heads,
-        mixer="Global",
-        local_mixer=[7, 11],
-        HW=None,
-        mlp_ratio=4.0,
-        qkv_bias=False,
-        qk_scale=None,
-        drop=0.0,
-        attn_drop=0.0,
-        drop_path=0.0,
-        act_layer="gelu",
-        norm_layer="nn.LayerNorm",
-        epsilon=1e-6,
-        prenorm=True,
-    ):
-        super().__init__()
-        if isinstance(norm_layer, str):
-            self.norm1 = eval(norm_layer)(dim, eps=epsilon)
-        else:
-            self.norm1 = norm_layer(dim)
-        if mixer == "Global" or mixer == "Local":
-            self.mixer = Attention(
-                dim,
-                num_heads=num_heads,
-                mixer=mixer,
-                HW=HW,
-                local_k=local_mixer,
-                qkv_bias=qkv_bias,
-                qk_scale=qk_scale,
-                attn_drop=attn_drop,
-                proj_drop=drop,
-            )
-        elif mixer == "Conv":
-            self.mixer = ConvMixer(dim, num_heads=num_heads, HW=HW, local_k=local_mixer)
-        else:
-            raise TypeError("The mixer must be one of [Global, Local, Conv]")
-
-        self.drop_path = DropPath(drop_path) if drop_path > 0.0 else Identity()
-        if isinstance(norm_layer, str):
-            self.norm2 = eval(norm_layer)(dim, eps=epsilon)
-        else:
-            self.norm2 = norm_layer(dim)
-        mlp_hidden_dim = int(dim * mlp_ratio)
-        self.mlp_ratio = mlp_ratio
-        self.mlp = Mlp(
-            in_features=dim,
-            hidden_features=mlp_hidden_dim,
-            act_layer=act_layer,
-            drop=drop,
-        )
-        self.prenorm = prenorm
-
-    def forward(self, x):
-        if self.prenorm:
-            x = self.norm1(x + self.drop_path(self.mixer(x)))
-            x = self.norm2(x + self.drop_path(self.mlp(x)))
-        else:
-            x = x + self.drop_path(self.mixer(self.norm1(x)))
-            x = x + self.drop_path(self.mlp(self.norm2(x)))
-        return x
-
-
-class PatchEmbed(nn.Module):
-    """Image to Patch Embedding"""
-
-    def __init__(
-        self,
-        img_size=[32, 100],
-        in_channels=3,
-        embed_dim=768,
-        sub_num=2,
-        patch_size=[4, 4],
-        mode="pope",
-    ):
-        super().__init__()
-        num_patches = (img_size[1] // (2**sub_num)) * (img_size[0] // (2**sub_num))
-        self.img_size = img_size
-        self.num_patches = num_patches
-        self.embed_dim = embed_dim
-        self.norm = None
-        if mode == "pope":
-            if sub_num == 2:
-                self.proj = nn.Sequential(
-                    ConvBNLayer(
-                        in_channels=in_channels,
-                        out_channels=embed_dim // 2,
-                        kernel_size=3,
-                        stride=2,
-                        padding=1,
-                        act="gelu",
-                        bias_attr=True,
-                    ),
-                    ConvBNLayer(
-                        in_channels=embed_dim // 2,
-                        out_channels=embed_dim,
-                        kernel_size=3,
-                        stride=2,
-                        padding=1,
-                        act="gelu",
-                        bias_attr=True,
-                    ),
-                )
-            if sub_num == 3:
-                self.proj = nn.Sequential(
-                    ConvBNLayer(
-                        in_channels=in_channels,
-                        out_channels=embed_dim // 4,
-                        kernel_size=3,
-                        stride=2,
-                        padding=1,
-                        act="gelu",
-                        bias_attr=True,
-                    ),
-                    ConvBNLayer(
-                        in_channels=embed_dim // 4,
-                        out_channels=embed_dim // 2,
-                        kernel_size=3,
-                        stride=2,
-                        padding=1,
-                        act="gelu",
-                        bias_attr=True,
-                    ),
-                    ConvBNLayer(
-                        in_channels=embed_dim // 2,
-                        out_channels=embed_dim,
-                        kernel_size=3,
-                        stride=2,
-                        padding=1,
-                        act="gelu",
-                        bias_attr=True,
-                    ),
-                )
-        elif mode == "linear":
-            self.proj = nn.Conv2d(
-                1, embed_dim, kernel_size=patch_size, stride=patch_size
-            )
-            self.num_patches = (
-                img_size[0] // patch_size[0] * img_size[1] // patch_size[1]
-            )
-
-    def forward(self, x):
-        B, C, H, W = x.shape
-        assert (
-            H == self.img_size[0] and W == self.img_size[1]
-        ), "Input image size ({}*{}) doesn't match model ({}*{}).".format(
-            H, W, self.img_size[0], self.img_size[1]
-        )
-        x = self.proj(x).flatten(2).permute(0, 2, 1)
-        return x
-
-
-class SubSample(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        types="Pool",
-        stride=[2, 1],
-        sub_norm="nn.LayerNorm",
-        act=None,
-    ):
-        super().__init__()
-        self.types = types
-        if types == "Pool":
-            self.avgpool = nn.AvgPool2d(
-                kernel_size=[3, 5], stride=stride, padding=[1, 2]
-            )
-            self.maxpool = nn.MaxPool2d(
-                kernel_size=[3, 5], stride=stride, padding=[1, 2]
-            )
-            self.proj = nn.Linear(in_channels, out_channels)
-        else:
-            self.conv = nn.Conv2d(
-                in_channels,
-                out_channels,
-                kernel_size=3,
-                stride=stride,
-                padding=1,
-            )
-        self.norm = eval(sub_norm)(out_channels)
-        if act is not None:
-            self.act = act()
-        else:
-            self.act = None
-
-    def forward(self, x):
-        if self.types == "Pool":
-            x1 = self.avgpool(x)
-            x2 = self.maxpool(x)
-            x = (x1 + x2) * 0.5
-            out = self.proj(x.flatten(2).permute(0, 2, 1))
-        else:
-            x = self.conv(x)
-            out = x.flatten(2).permute(0, 2, 1)
-        out = self.norm(out)
-        if self.act is not None:
-            out = self.act(out)
-
-        return out
-
-
-class SVTRNet(nn.Module):
-    def __init__(
-        self,
-        img_size=[32, 100],
-        in_channels=3,
-        embed_dim=[64, 128, 256],
-        depth=[3, 6, 3],
-        num_heads=[2, 4, 8],
-        mixer=["Local"] * 6 + ["Global"] * 6,  # Local atten, Global atten, Conv
-        local_mixer=[[7, 11], [7, 11], [7, 11]],
-        patch_merging="Conv",  # Conv, Pool, None
-        mlp_ratio=4,
-        qkv_bias=True,
-        qk_scale=None,
-        drop_rate=0.0,
-        last_drop=0.0,
-        attn_drop_rate=0.0,
-        drop_path_rate=0.1,
-        norm_layer="nn.LayerNorm",
-        sub_norm="nn.LayerNorm",
-        epsilon=1e-6,
-        out_channels=192,
-        out_char_num=25,
-        block_unit="Block",
-        act="gelu",
-        last_stage=True,
-        sub_num=2,
-        prenorm=True,
-        use_lenhead=False,
-        **kwargs
-    ):
-        super().__init__()
-        self.img_size = img_size
-        self.embed_dim = embed_dim
-        self.out_channels = out_channels
-        self.prenorm = prenorm
-        patch_merging = (
-            None
-            if patch_merging != "Conv" and patch_merging != "Pool"
-            else patch_merging
-        )
-        self.patch_embed = PatchEmbed(
-            img_size=img_size,
-            in_channels=in_channels,
-            embed_dim=embed_dim[0],
-            sub_num=sub_num,
-        )
-        num_patches = self.patch_embed.num_patches
-        self.HW = [img_size[0] // (2**sub_num), img_size[1] // (2**sub_num)]
-        self.pos_embed = nn.Parameter(torch.zeros(1, num_patches, embed_dim[0]))
-        self.pos_drop = nn.Dropout(p=drop_rate)
-        Block_unit = eval(block_unit)
-
-        dpr = np.linspace(0, drop_path_rate, sum(depth))
-        self.blocks1 = nn.ModuleList(
-            [
-                Block_unit(
-                    dim=embed_dim[0],
-                    num_heads=num_heads[0],
-                    mixer=mixer[0 : depth[0]][i],
-                    HW=self.HW,
-                    local_mixer=local_mixer[0],
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    qk_scale=qk_scale,
-                    drop=drop_rate,
-                    act_layer=act,
-                    attn_drop=attn_drop_rate,
-                    drop_path=dpr[0 : depth[0]][i],
-                    norm_layer=norm_layer,
-                    epsilon=epsilon,
-                    prenorm=prenorm,
-                )
-                for i in range(depth[0])
-            ]
-        )
-        if patch_merging is not None:
-            self.sub_sample1 = SubSample(
-                embed_dim[0],
-                embed_dim[1],
-                sub_norm=sub_norm,
-                stride=[2, 1],
-                types=patch_merging,
-            )
-            HW = [self.HW[0] // 2, self.HW[1]]
-        else:
-            HW = self.HW
-        self.patch_merging = patch_merging
-        self.blocks2 = nn.ModuleList(
-            [
-                Block_unit(
-                    dim=embed_dim[1],
-                    num_heads=num_heads[1],
-                    mixer=mixer[depth[0] : depth[0] + depth[1]][i],
-                    HW=HW,
-                    local_mixer=local_mixer[1],
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    qk_scale=qk_scale,
-                    drop=drop_rate,
-                    act_layer=act,
-                    attn_drop=attn_drop_rate,
-                    drop_path=dpr[depth[0] : depth[0] + depth[1]][i],
-                    norm_layer=norm_layer,
-                    epsilon=epsilon,
-                    prenorm=prenorm,
-                )
-                for i in range(depth[1])
-            ]
-        )
-        if patch_merging is not None:
-            self.sub_sample2 = SubSample(
-                embed_dim[1],
-                embed_dim[2],
-                sub_norm=sub_norm,
-                stride=[2, 1],
-                types=patch_merging,
-            )
-            HW = [self.HW[0] // 4, self.HW[1]]
-        else:
-            HW = self.HW
-        self.blocks3 = nn.ModuleList(
-            [
-                Block_unit(
-                    dim=embed_dim[2],
-                    num_heads=num_heads[2],
-                    mixer=mixer[depth[0] + depth[1] :][i],
-                    HW=HW,
-                    local_mixer=local_mixer[2],
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    qk_scale=qk_scale,
-                    drop=drop_rate,
-                    act_layer=act,
-                    attn_drop=attn_drop_rate,
-                    drop_path=dpr[depth[0] + depth[1] :][i],
-                    norm_layer=norm_layer,
-                    epsilon=epsilon,
-                    prenorm=prenorm,
-                )
-                for i in range(depth[2])
-            ]
-        )
-        self.last_stage = last_stage
-        if last_stage:
-            self.avg_pool = nn.AdaptiveAvgPool2d([1, out_char_num])
-            self.last_conv = nn.Conv2d(
-                in_channels=embed_dim[2],
-                out_channels=self.out_channels,
-                kernel_size=1,
-                stride=1,
-                padding=0,
-                bias=False,
-            )
-            self.hardswish = Activation("hard_swish", inplace=True)  # nn.Hardswish()
-            # self.dropout = nn.Dropout(p=last_drop, mode="downscale_in_infer")
-            self.dropout = nn.Dropout(p=last_drop)
-        if not prenorm:
-            self.norm = eval(norm_layer)(embed_dim[-1], eps=epsilon)
-        self.use_lenhead = use_lenhead
-        if use_lenhead:
-            self.len_conv = nn.Linear(embed_dim[2], self.out_channels)
-            self.hardswish_len = Activation(
-                "hard_swish", inplace=True
-            )  # nn.Hardswish()
-            self.dropout_len = nn.Dropout(p=last_drop)
-
-        torch.nn.init.xavier_normal_(self.pos_embed)
-        self.apply(self._init_weights)
-
-    def _init_weights(self, m):
-        # weight initialization
-        if isinstance(m, nn.Conv2d):
-            nn.init.kaiming_normal_(m.weight, mode="fan_out")
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.BatchNorm2d):
-            nn.init.ones_(m.weight)
-            nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.Linear):
-            nn.init.normal_(m.weight, 0, 0.01)
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.ConvTranspose2d):
-            nn.init.kaiming_normal_(m.weight, mode="fan_out")
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.ones_(m.weight)
-            nn.init.zeros_(m.bias)
-
-    def forward_features(self, x):
-        x = self.patch_embed(x)
-        x = x + self.pos_embed
-        x = self.pos_drop(x)
-        for blk in self.blocks1:
-            x = blk(x)
-        if self.patch_merging is not None:
-            x = self.sub_sample1(
-                x.permute(0, 2, 1).reshape(
-                    [-1, self.embed_dim[0], self.HW[0], self.HW[1]]
-                )
-            )
-        for blk in self.blocks2:
-            x = blk(x)
-        if self.patch_merging is not None:
-            x = self.sub_sample2(
-                x.permute(0, 2, 1).reshape(
-                    [-1, self.embed_dim[1], self.HW[0] // 2, self.HW[1]]
-                )
-            )
-        for blk in self.blocks3:
-            x = blk(x)
-        if not self.prenorm:
-            x = self.norm(x)
-        return x
-
-    def forward(self, x):
-        x = self.forward_features(x)
-        if self.use_lenhead:
-            len_x = self.len_conv(x.mean(1))
-            len_x = self.dropout_len(self.hardswish_len(len_x))
-        if self.last_stage:
-            if self.patch_merging is not None:
-                h = self.HW[0] // 4
-            else:
-                h = self.HW[0]
-            x = self.avg_pool(
-                x.permute(0, 2, 1).reshape([-1, self.embed_dim[2], h, self.HW[1]])
-            )
-            x = self.last_conv(x)
-            x = self.hardswish(x)
-            x = self.dropout(x)
-        if self.use_lenhead:
-            return x, len_x
-        return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
deleted file mode 100644
index ec1b30ccb0a04888562a0207bbdfbed1d8da0add..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/common.py
+++ /dev/null
@@ -1,76 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-
-class Hswish(nn.Module):
-    def __init__(self, inplace=True):
-        super(Hswish, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
-
-
-# out = max(0, min(1, slop*x+offset))
-# paddle.fluid.layers.hard_sigmoid(x, slope=0.2, offset=0.5, name=None)
-class Hsigmoid(nn.Module):
-    def __init__(self, inplace=True):
-        super(Hsigmoid, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        # torch: F.relu6(x + 3., inplace=self.inplace) / 6.
-        # paddle: F.relu6(1.2 * x + 3., inplace=self.inplace) / 6.
-        return F.relu6(1.2 * x + 3.0, inplace=self.inplace) / 6.0
-
-
-class GELU(nn.Module):
-    def __init__(self, inplace=True):
-        super(GELU, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        return torch.nn.functional.gelu(x)
-
-
-class Swish(nn.Module):
-    def __init__(self, inplace=True):
-        super(Swish, self).__init__()
-        self.inplace = inplace
-
-    def forward(self, x):
-        if self.inplace:
-            x.mul_(torch.sigmoid(x))
-            return x
-        else:
-            return x * torch.sigmoid(x)
-
-
-class Activation(nn.Module):
-    def __init__(self, act_type, inplace=True):
-        super(Activation, self).__init__()
-        act_type = act_type.lower()
-        if act_type == "relu":
-            self.act = nn.ReLU(inplace=inplace)
-        elif act_type == "relu6":
-            self.act = nn.ReLU6(inplace=inplace)
-        elif act_type == "sigmoid":
-            raise NotImplementedError
-        elif act_type == "hard_sigmoid":
-            self.act = Hsigmoid(
-                inplace
-            )  # nn.Hardsigmoid(inplace=inplace)#Hsigmoid(inplace)#
-        elif act_type == "hard_swish" or act_type == "hswish":
-            self.act = Hswish(inplace=inplace)
-        elif act_type == "leakyrelu":
-            self.act = nn.LeakyReLU(inplace=inplace)
-        elif act_type == "gelu":
-            self.act = GELU(inplace=inplace)
-        elif act_type == "swish":
-            self.act = Swish(inplace=inplace)
-        else:
-            raise NotImplementedError
-
-    def forward(self, inputs):
-        return self.act(inputs)
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
deleted file mode 100644
index 00428c4374f8d69f8b59b40406bbb56cdf904dd3..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/__init__.py
+++ /dev/null
@@ -1,43 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = ["build_head"]
-
-
-def build_head(config, **kwargs):
-    # det head
-    from .det_db_head import DBHead, PFHeadLocal
-
-    # rec head
-    from .rec_ctc_head import CTCHead
-    from .rec_multi_head import MultiHead
-
-    # cls head
-    from .cls_head import ClsHead
-
-    support_dict = [
-        "DBHead",
-        "CTCHead",
-        "ClsHead",
-        "MultiHead",
-        "PFHeadLocal",
-    ]
-
-    module_name = config.pop("name")
-    char_num = config.pop("char_num", 6625)
-    assert module_name in support_dict, Exception(
-        "head only support {}".format(support_dict)
-    )
-    module_class = eval(module_name)(**config, **kwargs)
-    return module_class
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
deleted file mode 100644
index 9353b9ebb88c043ab31eedb4219b191eb88417da..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/cls_head.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-
-class ClsHead(nn.Module):
-    """
-    Class orientation
-    Args:
-        params(dict): super parameters for build Class network
-    """
-
-    def __init__(self, in_channels, class_dim, **kwargs):
-        super(ClsHead, self).__init__()
-        self.pool = nn.AdaptiveAvgPool2d(1)
-        self.fc = nn.Linear(in_channels, class_dim, bias=True)
-
-    def forward(self, x):
-        x = self.pool(x)
-        x = torch.reshape(x, shape=[x.shape[0], x.shape[1]])
-        x = self.fc(x)
-        x = F.softmax(x, dim=1)
-        return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
deleted file mode 100644
index 7c1196830829e6c788e5864861471977cdb47e25..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/det_db_head.py
+++ /dev/null
@@ -1,109 +0,0 @@
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from ..common import Activation
-from ..backbones.det_mobilenet_v3 import ConvBNLayer
-
-class Head(nn.Module):
-    def __init__(self, in_channels, **kwargs):
-        super(Head, self).__init__()
-        self.conv1 = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=in_channels // 4,
-            kernel_size=3,
-            padding=1,
-            bias=False)
-        self.conv_bn1 = nn.BatchNorm2d(
-            in_channels // 4)
-        self.relu1 = Activation(act_type='relu')
-
-        self.conv2 = nn.ConvTranspose2d(
-            in_channels=in_channels // 4,
-            out_channels=in_channels // 4,
-            kernel_size=2,
-            stride=2)
-        self.conv_bn2 = nn.BatchNorm2d(
-            in_channels // 4)
-        self.relu2 = Activation(act_type='relu')
-
-        self.conv3 = nn.ConvTranspose2d(
-            in_channels=in_channels // 4,
-            out_channels=1,
-            kernel_size=2,
-            stride=2)
-
-    def forward(self, x, return_f=False):
-        x = self.conv1(x)
-        x = self.conv_bn1(x)
-        x = self.relu1(x)
-        x = self.conv2(x)
-        x = self.conv_bn2(x)
-        x = self.relu2(x)
-        if return_f is True:
-            f = x
-        x = self.conv3(x)
-        x = torch.sigmoid(x)
-        if return_f is True:
-            return x, f
-        return x
-
-
-class DBHead(nn.Module):
-    """
-    Differentiable Binarization (DB) for text detection:
-        see https://arxiv.org/abs/1911.08947
-    args:
-        params(dict): super parameters for build DB network
-    """
-
-    def __init__(self, in_channels, k=50, **kwargs):
-        super(DBHead, self).__init__()
-        self.k = k
-        binarize_name_list = [
-            'conv2d_56', 'batch_norm_47', 'conv2d_transpose_0', 'batch_norm_48',
-            'conv2d_transpose_1', 'binarize'
-        ]
-        thresh_name_list = [
-            'conv2d_57', 'batch_norm_49', 'conv2d_transpose_2', 'batch_norm_50',
-            'conv2d_transpose_3', 'thresh'
-        ]
-        self.binarize = Head(in_channels, **kwargs)# binarize_name_list)
-        self.thresh = Head(in_channels, **kwargs)#thresh_name_list)
-
-    def step_function(self, x, y):
-        return torch.reciprocal(1 + torch.exp(-self.k * (x - y)))
-
-    def forward(self, x):
-        shrink_maps = self.binarize(x)
-        return {'maps': shrink_maps}
-
-
-class LocalModule(nn.Module):
-    def __init__(self, in_c, mid_c, use_distance=True):
-        super(self.__class__, self).__init__()
-        self.last_3 = ConvBNLayer(in_c + 1, mid_c, 3, 1, 1, act='relu')
-        self.last_1 = nn.Conv2d(mid_c, 1, 1, 1, 0)
-
-    def forward(self, x, init_map, distance_map):
-        outf = torch.cat([init_map, x], dim=1)
-        # last Conv
-        out = self.last_1(self.last_3(outf))
-        return out
-
-class PFHeadLocal(DBHead):
-    def __init__(self, in_channels, k=50, mode='small', **kwargs):
-        super(PFHeadLocal, self).__init__(in_channels, k, **kwargs)
-        self.mode = mode
-
-        self.up_conv = nn.Upsample(scale_factor=2, mode="nearest")
-        if self.mode == 'large':
-            self.cbn_layer = LocalModule(in_channels // 4, in_channels // 4)
-        elif self.mode == 'small':
-            self.cbn_layer = LocalModule(in_channels // 4, in_channels // 8)
-
-    def forward(self, x, targets=None):
-        shrink_maps, f = self.binarize(x, return_f=True)
-        base_maps = shrink_maps
-        cbn_maps = self.cbn_layer(self.up_conv(f), shrink_maps, None)
-        cbn_maps = F.sigmoid(cbn_maps)
-        return {'maps': 0.5 * (base_maps + cbn_maps), 'cbn_maps': cbn_maps}
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py
deleted file mode 100644
index 42e2fabba48ce813b7736b2242eb117761a242bc..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_ctc_head.py
+++ /dev/null
@@ -1,54 +0,0 @@
-import torch.nn.functional as F
-from torch import nn
-
-
-class CTCHead(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels=6625,
-        fc_decay=0.0004,
-        mid_channels=None,
-        return_feats=False,
-        **kwargs
-    ):
-        super(CTCHead, self).__init__()
-        if mid_channels is None:
-            self.fc = nn.Linear(
-                in_channels,
-                out_channels,
-                bias=True,
-            )
-        else:
-            self.fc1 = nn.Linear(
-                in_channels,
-                mid_channels,
-                bias=True,
-            )
-            self.fc2 = nn.Linear(
-                mid_channels,
-                out_channels,
-                bias=True,
-            )
-
-        self.out_channels = out_channels
-        self.mid_channels = mid_channels
-        self.return_feats = return_feats
-
-    def forward(self, x, labels=None):
-        if self.mid_channels is None:
-            predicts = self.fc(x)
-        else:
-            x = self.fc1(x)
-            predicts = self.fc2(x)
-
-        if self.return_feats:
-            result = (x, predicts)
-        else:
-            result = predicts
-
-        if not self.training:
-            predicts = F.softmax(predicts, dim=2)
-            result = predicts
-
-        return result
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py
deleted file mode 100644
index a4807cbb0cde37024fac62a39c8dee7f75d6da1f..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/heads/rec_multi_head.py
+++ /dev/null
@@ -1,58 +0,0 @@
-from torch import nn
-
-from ..necks.rnn import Im2Seq, SequenceEncoder
-from .rec_ctc_head import CTCHead
-
-
-class FCTranspose(nn.Module):
-    def __init__(self, in_channels, out_channels, only_transpose=False):
-        super().__init__()
-        self.only_transpose = only_transpose
-        if not self.only_transpose:
-            self.fc = nn.Linear(in_channels, out_channels, bias=False)
-
-    def forward(self, x):
-        if self.only_transpose:
-            return x.permute([0, 2, 1])
-        else:
-            return self.fc(x.permute([0, 2, 1]))
-
-
-class MultiHead(nn.Module):
-    def __init__(self, in_channels, out_channels_list, **kwargs):
-        super().__init__()
-        self.head_list = kwargs.pop("head_list")
-
-        self.gtc_head = "sar"
-        assert len(self.head_list) >= 2
-        for idx, head_name in enumerate(self.head_list):
-            name = list(head_name)[0]
-            if name == "SARHead":
-                pass
-
-            elif name == "NRTRHead":
-                pass
-            elif name == "CTCHead":
-                # ctc neck
-                self.encoder_reshape = Im2Seq(in_channels)
-                neck_args = self.head_list[idx][name]["Neck"]
-                encoder_type = neck_args.pop("name")
-                self.ctc_encoder = SequenceEncoder(
-                    in_channels=in_channels, encoder_type=encoder_type, **neck_args
-                )
-                # ctc head
-                head_args = self.head_list[idx][name].get("Head", {})
-                if head_args is None:
-                    head_args = {}
-
-                self.ctc_head = CTCHead(
-                    in_channels=self.ctc_encoder.out_channels,
-                    out_channels=out_channels_list["CTCLabelDecode"],
-                    **head_args,
-                )
-            else:
-                raise NotImplementedError(f"{name} is not supported in MultiHead yet")
-
-    def forward(self, x, data=None):
-        ctc_encoder = self.ctc_encoder(x)
-        return self.ctc_head(ctc_encoder)
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py
deleted file mode 100644
index bbe85bc6a59f8d03541cbeb0e7cff34c5ba6c2e5..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/__init__.py
+++ /dev/null
@@ -1,29 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-__all__ = ["build_neck"]
-
-
-def build_neck(config):
-    from .db_fpn import DBFPN, LKPAN, RSEFPN
-    from .rnn import SequenceEncoder
-
-    support_dict = ["DBFPN", "SequenceEncoder", "RSEFPN", "LKPAN"]
-
-    module_name = config.pop("name")
-    assert module_name in support_dict, Exception(
-        "neck only support {}".format(support_dict)
-    )
-    module_class = eval(module_name)(**config)
-    return module_class
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py
deleted file mode 100644
index 9c8460a23a5816ba9ff8c6be6ed8fd31e4e697b2..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/db_fpn.py
+++ /dev/null
@@ -1,456 +0,0 @@
-import torch
-import torch.nn.functional as F
-from torch import nn
-
-from ..backbones.det_mobilenet_v3 import SEModule
-from ..necks.intracl import IntraCLBlock
-
-
-def hard_swish(x, inplace=True):
-    return x * F.relu6(x + 3.0, inplace=inplace) / 6.0
-
-
-class DSConv(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        padding,
-        stride=1,
-        groups=None,
-        if_act=True,
-        act="relu",
-        **kwargs
-    ):
-        super(DSConv, self).__init__()
-        if groups == None:
-            groups = in_channels
-        self.if_act = if_act
-        self.act = act
-        self.conv1 = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=in_channels,
-            kernel_size=kernel_size,
-            stride=stride,
-            padding=padding,
-            groups=groups,
-            bias=False,
-        )
-
-        self.bn1 = nn.BatchNorm2d(in_channels)
-
-        self.conv2 = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=int(in_channels * 4),
-            kernel_size=1,
-            stride=1,
-            bias=False,
-        )
-
-        self.bn2 = nn.BatchNorm2d(int(in_channels * 4))
-
-        self.conv3 = nn.Conv2d(
-            in_channels=int(in_channels * 4),
-            out_channels=out_channels,
-            kernel_size=1,
-            stride=1,
-            bias=False,
-        )
-        self._c = [in_channels, out_channels]
-        if in_channels != out_channels:
-            self.conv_end = nn.Conv2d(
-                in_channels=in_channels,
-                out_channels=out_channels,
-                kernel_size=1,
-                stride=1,
-                bias=False,
-            )
-
-    def forward(self, inputs):
-        x = self.conv1(inputs)
-        x = self.bn1(x)
-
-        x = self.conv2(x)
-        x = self.bn2(x)
-        if self.if_act:
-            if self.act == "relu":
-                x = F.relu(x)
-            elif self.act == "hardswish":
-                x = hard_swish(x)
-            else:
-                print(
-                    "The activation function({}) is selected incorrectly.".format(
-                        self.act
-                    )
-                )
-                exit()
-
-        x = self.conv3(x)
-        if self._c[0] != self._c[1]:
-            x = x + self.conv_end(inputs)
-        return x
-
-
-class DBFPN(nn.Module):
-    def __init__(self, in_channels, out_channels, use_asf=False, **kwargs):
-        super(DBFPN, self).__init__()
-        self.out_channels = out_channels
-        self.use_asf = use_asf
-
-        self.in2_conv = nn.Conv2d(
-            in_channels=in_channels[0],
-            out_channels=self.out_channels,
-            kernel_size=1,
-            bias=False,
-        )
-        self.in3_conv = nn.Conv2d(
-            in_channels=in_channels[1],
-            out_channels=self.out_channels,
-            kernel_size=1,
-            bias=False,
-        )
-        self.in4_conv = nn.Conv2d(
-            in_channels=in_channels[2],
-            out_channels=self.out_channels,
-            kernel_size=1,
-            bias=False,
-        )
-        self.in5_conv = nn.Conv2d(
-            in_channels=in_channels[3],
-            out_channels=self.out_channels,
-            kernel_size=1,
-            bias=False,
-        )
-        self.p5_conv = nn.Conv2d(
-            in_channels=self.out_channels,
-            out_channels=self.out_channels // 4,
-            kernel_size=3,
-            padding=1,
-            bias=False,
-        )
-        self.p4_conv = nn.Conv2d(
-            in_channels=self.out_channels,
-            out_channels=self.out_channels // 4,
-            kernel_size=3,
-            padding=1,
-            bias=False,
-        )
-        self.p3_conv = nn.Conv2d(
-            in_channels=self.out_channels,
-            out_channels=self.out_channels // 4,
-            kernel_size=3,
-            padding=1,
-            bias=False,
-        )
-        self.p2_conv = nn.Conv2d(
-            in_channels=self.out_channels,
-            out_channels=self.out_channels // 4,
-            kernel_size=3,
-            padding=1,
-            bias=False,
-        )
-
-        if self.use_asf is True:
-            self.asf = ASFBlock(self.out_channels, self.out_channels // 4)
-
-    def forward(self, x):
-        c2, c3, c4, c5 = x
-
-        in5 = self.in5_conv(c5)
-        in4 = self.in4_conv(c4)
-        in3 = self.in3_conv(c3)
-        in2 = self.in2_conv(c2)
-
-        out4 = in4 + F.interpolate(
-            in5,
-            scale_factor=2,
-            mode="nearest",
-        )  # align_mode=1)  # 1/16
-        out3 = in3 + F.interpolate(
-            out4,
-            scale_factor=2,
-            mode="nearest",
-        )  # align_mode=1)  # 1/8
-        out2 = in2 + F.interpolate(
-            out3,
-            scale_factor=2,
-            mode="nearest",
-        )  # align_mode=1)  # 1/4
-
-        p5 = self.p5_conv(in5)
-        p4 = self.p4_conv(out4)
-        p3 = self.p3_conv(out3)
-        p2 = self.p2_conv(out2)
-        p5 = F.interpolate(
-            p5,
-            scale_factor=8,
-            mode="nearest",
-        )  # align_mode=1)
-        p4 = F.interpolate(
-            p4,
-            scale_factor=4,
-            mode="nearest",
-        )  # align_mode=1)
-        p3 = F.interpolate(
-            p3,
-            scale_factor=2,
-            mode="nearest",
-        )  # align_mode=1)
-
-        fuse = torch.cat([p5, p4, p3, p2], dim=1)
-
-        if self.use_asf is True:
-            fuse = self.asf(fuse, [p5, p4, p3, p2])
-
-        return fuse
-
-
-class RSELayer(nn.Module):
-    def __init__(self, in_channels, out_channels, kernel_size, shortcut=True):
-        super(RSELayer, self).__init__()
-        self.out_channels = out_channels
-        self.in_conv = nn.Conv2d(
-            in_channels=in_channels,
-            out_channels=self.out_channels,
-            kernel_size=kernel_size,
-            padding=int(kernel_size // 2),
-            bias=False,
-        )
-        self.se_block = SEModule(self.out_channels)
-        self.shortcut = shortcut
-
-    def forward(self, ins):
-        x = self.in_conv(ins)
-        if self.shortcut:
-            out = x + self.se_block(x)
-        else:
-            out = self.se_block(x)
-        return out
-
-
-class RSEFPN(nn.Module):
-    def __init__(self, in_channels, out_channels, shortcut=True, **kwargs):
-        super(RSEFPN, self).__init__()
-        self.out_channels = out_channels
-        self.ins_conv = nn.ModuleList()
-        self.inp_conv = nn.ModuleList()
-        self.intracl = False
-        if "intracl" in kwargs.keys() and kwargs["intracl"] is True:
-            self.intracl = kwargs["intracl"]
-            self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-            self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-            self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-            self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-
-        for i in range(len(in_channels)):
-            self.ins_conv.append(
-                RSELayer(in_channels[i], out_channels, kernel_size=1, shortcut=shortcut)
-            )
-            self.inp_conv.append(
-                RSELayer(
-                    out_channels, out_channels // 4, kernel_size=3, shortcut=shortcut
-                )
-            )
-
-    def forward(self, x):
-        c2, c3, c4, c5 = x
-
-        in5 = self.ins_conv[3](c5)
-        in4 = self.ins_conv[2](c4)
-        in3 = self.ins_conv[1](c3)
-        in2 = self.ins_conv[0](c2)
-
-        out4 = in4 + F.interpolate(in5, scale_factor=2, mode="nearest")  # 1/16
-        out3 = in3 + F.interpolate(out4, scale_factor=2, mode="nearest")  # 1/8
-        out2 = in2 + F.interpolate(out3, scale_factor=2, mode="nearest")  # 1/4
-
-        p5 = self.inp_conv[3](in5)
-        p4 = self.inp_conv[2](out4)
-        p3 = self.inp_conv[1](out3)
-        p2 = self.inp_conv[0](out2)
-
-        if self.intracl is True:
-            p5 = self.incl4(p5)
-            p4 = self.incl3(p4)
-            p3 = self.incl2(p3)
-            p2 = self.incl1(p2)
-
-        p5 = F.interpolate(p5, scale_factor=8, mode="nearest")
-        p4 = F.interpolate(p4, scale_factor=4, mode="nearest")
-        p3 = F.interpolate(p3, scale_factor=2, mode="nearest")
-
-        fuse = torch.cat([p5, p4, p3, p2], dim=1)
-        return fuse
-
-
-class LKPAN(nn.Module):
-    def __init__(self, in_channels, out_channels, mode="large", **kwargs):
-        super(LKPAN, self).__init__()
-        self.out_channels = out_channels
-
-        self.ins_conv = nn.ModuleList()
-        self.inp_conv = nn.ModuleList()
-        # pan head
-        self.pan_head_conv = nn.ModuleList()
-        self.pan_lat_conv = nn.ModuleList()
-
-        if mode.lower() == "lite":
-            p_layer = DSConv
-        elif mode.lower() == "large":
-            p_layer = nn.Conv2d
-        else:
-            raise ValueError(
-                "mode can only be one of ['lite', 'large'], but received {}".format(
-                    mode
-                )
-            )
-
-        for i in range(len(in_channels)):
-            self.ins_conv.append(
-                nn.Conv2d(
-                    in_channels=in_channels[i],
-                    out_channels=self.out_channels,
-                    kernel_size=1,
-                    bias=False,
-                )
-            )
-
-            self.inp_conv.append(
-                p_layer(
-                    in_channels=self.out_channels,
-                    out_channels=self.out_channels // 4,
-                    kernel_size=9,
-                    padding=4,
-                    bias=False,
-                )
-            )
-
-            if i > 0:
-                self.pan_head_conv.append(
-                    nn.Conv2d(
-                        in_channels=self.out_channels // 4,
-                        out_channels=self.out_channels // 4,
-                        kernel_size=3,
-                        padding=1,
-                        stride=2,
-                        bias=False,
-                    )
-                )
-            self.pan_lat_conv.append(
-                p_layer(
-                    in_channels=self.out_channels // 4,
-                    out_channels=self.out_channels // 4,
-                    kernel_size=9,
-                    padding=4,
-                    bias=False,
-                )
-            )
-            self.intracl = False
-            if "intracl" in kwargs.keys() and kwargs["intracl"] is True:
-                self.intracl = kwargs["intracl"]
-                self.incl1 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-                self.incl2 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-                self.incl3 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-                self.incl4 = IntraCLBlock(self.out_channels // 4, reduce_factor=2)
-
-    def forward(self, x):
-        c2, c3, c4, c5 = x
-
-        in5 = self.ins_conv[3](c5)
-        in4 = self.ins_conv[2](c4)
-        in3 = self.ins_conv[1](c3)
-        in2 = self.ins_conv[0](c2)
-
-        out4 = in4 + F.interpolate(in5, scale_factor=2, mode="nearest")  # 1/16
-        out3 = in3 + F.interpolate(out4, scale_factor=2, mode="nearest")  # 1/8
-        out2 = in2 + F.interpolate(out3, scale_factor=2, mode="nearest")  # 1/4
-
-        f5 = self.inp_conv[3](in5)
-        f4 = self.inp_conv[2](out4)
-        f3 = self.inp_conv[1](out3)
-        f2 = self.inp_conv[0](out2)
-
-        pan3 = f3 + self.pan_head_conv[0](f2)
-        pan4 = f4 + self.pan_head_conv[1](pan3)
-        pan5 = f5 + self.pan_head_conv[2](pan4)
-
-        p2 = self.pan_lat_conv[0](f2)
-        p3 = self.pan_lat_conv[1](pan3)
-        p4 = self.pan_lat_conv[2](pan4)
-        p5 = self.pan_lat_conv[3](pan5)
-
-        if self.intracl is True:
-            p5 = self.incl4(p5)
-            p4 = self.incl3(p4)
-            p3 = self.incl2(p3)
-            p2 = self.incl1(p2)
-
-        p5 = F.interpolate(p5, scale_factor=8, mode="nearest")
-        p4 = F.interpolate(p4, scale_factor=4, mode="nearest")
-        p3 = F.interpolate(p3, scale_factor=2, mode="nearest")
-
-        fuse = torch.cat([p5, p4, p3, p2], dim=1)
-        return fuse
-
-
-class ASFBlock(nn.Module):
-    """
-    This code is refered from:
-        https://github.com/MhLiao/DB/blob/master/decoders/feature_attention.py
-    """
-
-    def __init__(self, in_channels, inter_channels, out_features_num=4):
-        """
-        Adaptive Scale Fusion (ASF) block of DBNet++
-        Args:
-            in_channels: the number of channels in the input data
-            inter_channels: the number of middle channels
-            out_features_num: the number of fused stages
-        """
-        super(ASFBlock, self).__init__()
-        self.in_channels = in_channels
-        self.inter_channels = inter_channels
-        self.out_features_num = out_features_num
-        self.conv = nn.Conv2d(in_channels, inter_channels, 3, padding=1)
-
-        self.spatial_scale = nn.Sequential(
-            # Nx1xHxW
-            nn.Conv2d(
-                in_channels=1,
-                out_channels=1,
-                kernel_size=3,
-                bias=False,
-                padding=1,
-            ),
-            nn.ReLU(),
-            nn.Conv2d(
-                in_channels=1,
-                out_channels=1,
-                kernel_size=1,
-                bias=False,
-            ),
-            nn.Sigmoid(),
-        )
-
-        self.channel_scale = nn.Sequential(
-            nn.Conv2d(
-                in_channels=inter_channels,
-                out_channels=out_features_num,
-                kernel_size=1,
-                bias=False,
-            ),
-            nn.Sigmoid(),
-        )
-
-    def forward(self, fuse_features, features_list):
-        fuse_features = self.conv(fuse_features)
-        spatial_x = torch.mean(fuse_features, dim=1, keepdim=True)
-        attention_scores = self.spatial_scale(spatial_x) + fuse_features
-        attention_scores = self.channel_scale(attention_scores)
-        assert len(features_list) == self.out_features_num
-
-        out_list = []
-        for i in range(self.out_features_num):
-            out_list.append(attention_scores[:, i : i + 1] * features_list[i])
-        return torch.cat(out_list, dim=1)
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py
deleted file mode 100644
index 0ba85fa8086ff013491ef66beca49e0ee8475f2c..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/intracl.py
+++ /dev/null
@@ -1,117 +0,0 @@
-from torch import nn
-
-
-class IntraCLBlock(nn.Module):
-    def __init__(self, in_channels=96, reduce_factor=4):
-        super(IntraCLBlock, self).__init__()
-        self.channels = in_channels
-        self.rf = reduce_factor
-        self.conv1x1_reduce_channel = nn.Conv2d(
-            self.channels, self.channels // self.rf, kernel_size=1, stride=1, padding=0
-        )
-        self.conv1x1_return_channel = nn.Conv2d(
-            self.channels // self.rf, self.channels, kernel_size=1, stride=1, padding=0
-        )
-
-        self.v_layer_7x1 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(7, 1),
-            stride=(1, 1),
-            padding=(3, 0),
-        )
-        self.v_layer_5x1 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(5, 1),
-            stride=(1, 1),
-            padding=(2, 0),
-        )
-        self.v_layer_3x1 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(3, 1),
-            stride=(1, 1),
-            padding=(1, 0),
-        )
-
-        self.q_layer_1x7 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(1, 7),
-            stride=(1, 1),
-            padding=(0, 3),
-        )
-        self.q_layer_1x5 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(1, 5),
-            stride=(1, 1),
-            padding=(0, 2),
-        )
-        self.q_layer_1x3 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(1, 3),
-            stride=(1, 1),
-            padding=(0, 1),
-        )
-
-        # base
-        self.c_layer_7x7 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(7, 7),
-            stride=(1, 1),
-            padding=(3, 3),
-        )
-        self.c_layer_5x5 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(5, 5),
-            stride=(1, 1),
-            padding=(2, 2),
-        )
-        self.c_layer_3x3 = nn.Conv2d(
-            self.channels // self.rf,
-            self.channels // self.rf,
-            kernel_size=(3, 3),
-            stride=(1, 1),
-            padding=(1, 1),
-        )
-
-        self.bn = nn.BatchNorm2d(self.channels)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x_new = self.conv1x1_reduce_channel(x)
-
-        x_7_c = self.c_layer_7x7(x_new)
-        x_7_v = self.v_layer_7x1(x_new)
-        x_7_q = self.q_layer_1x7(x_new)
-        x_7 = x_7_c + x_7_v + x_7_q
-
-        x_5_c = self.c_layer_5x5(x_7)
-        x_5_v = self.v_layer_5x1(x_7)
-        x_5_q = self.q_layer_1x5(x_7)
-        x_5 = x_5_c + x_5_v + x_5_q
-
-        x_3_c = self.c_layer_3x3(x_5)
-        x_3_v = self.v_layer_3x1(x_5)
-        x_3_q = self.q_layer_1x3(x_5)
-        x_3 = x_3_c + x_3_v + x_3_q
-
-        x_relation = self.conv1x1_return_channel(x_3)
-
-        x_relation = self.bn(x_relation)
-        x_relation = self.relu(x_relation)
-
-        return x + x_relation
-
-
-def build_intraclblock_list(num_block):
-    IntraCLBlock_list = nn.ModuleList()
-    for i in range(num_block):
-        IntraCLBlock_list.append(IntraCLBlock())
-
-    return IntraCLBlock_list
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py
deleted file mode 100644
index 79c8af3028a02abbfbc31fec95d5088e3f59c506..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/modeling/necks/rnn.py
+++ /dev/null
@@ -1,241 +0,0 @@
-import torch
-from torch import nn
-
-from ..backbones.rec_svtrnet import Block, ConvBNLayer
-
-
-class Im2Seq(nn.Module):
-    def __init__(self, in_channels, **kwargs):
-        super().__init__()
-        self.out_channels = in_channels
-
-    # def forward(self, x):
-    #     B, C, H, W = x.shape
-    #     # assert H == 1
-    #     x = x.squeeze(dim=2)
-    #     # x = x.transpose([0, 2, 1])  # paddle (NTC)(batch, width, channels)
-    #     x = x.permute(0, 2, 1)
-    #     return x
-
-    def forward(self, x):
-        B, C, H, W = x.shape
-        # 处理四维张量，将空间维度展平为序列
-        if H == 1:
-            # 原来的处理逻辑，适用于H=1的情况
-            x = x.squeeze(dim=2)
-            x = x.permute(0, 2, 1)  # (B, W, C)
-        else:
-            # 处理H不为1的情况
-            x = x.permute(0, 2, 3, 1)  # (B, H, W, C)
-            x = x.reshape(B, H * W, C)  # (B, H*W, C)
-
-        return x
-
-class EncoderWithRNN_(nn.Module):
-    def __init__(self, in_channels, hidden_size):
-        super(EncoderWithRNN_, self).__init__()
-        self.out_channels = hidden_size * 2
-        self.rnn1 = nn.LSTM(
-            in_channels,
-            hidden_size,
-            bidirectional=False,
-            batch_first=True,
-            num_layers=2,
-        )
-        self.rnn2 = nn.LSTM(
-            in_channels,
-            hidden_size,
-            bidirectional=False,
-            batch_first=True,
-            num_layers=2,
-        )
-
-    def forward(self, x):
-        self.rnn1.flatten_parameters()
-        self.rnn2.flatten_parameters()
-        out1, h1 = self.rnn1(x)
-        out2, h2 = self.rnn2(torch.flip(x, [1]))
-        return torch.cat([out1, torch.flip(out2, [1])], 2)
-
-
-class EncoderWithRNN(nn.Module):
-    def __init__(self, in_channels, hidden_size):
-        super(EncoderWithRNN, self).__init__()
-        self.out_channels = hidden_size * 2
-        self.lstm = nn.LSTM(
-            in_channels, hidden_size, num_layers=2, batch_first=True, bidirectional=True
-        )  # batch_first:=True
-
-    def forward(self, x):
-        x, _ = self.lstm(x)
-        return x
-
-
-class EncoderWithFC(nn.Module):
-    def __init__(self, in_channels, hidden_size):
-        super(EncoderWithFC, self).__init__()
-        self.out_channels = hidden_size
-        self.fc = nn.Linear(
-            in_channels,
-            hidden_size,
-            bias=True,
-        )
-
-    def forward(self, x):
-        x = self.fc(x)
-        return x
-
-
-class EncoderWithSVTR(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        dims=64,  # XS
-        depth=2,
-        hidden_dims=120,
-        use_guide=False,
-        num_heads=8,
-        qkv_bias=True,
-        mlp_ratio=2.0,
-        drop_rate=0.1,
-        kernel_size=[3, 3],
-        attn_drop_rate=0.1,
-        drop_path=0.0,
-        qk_scale=None,
-    ):
-        super(EncoderWithSVTR, self).__init__()
-        self.depth = depth
-        self.use_guide = use_guide
-        self.conv1 = ConvBNLayer(
-            in_channels,
-            in_channels // 8,
-            kernel_size=kernel_size,
-            padding=[kernel_size[0] // 2, kernel_size[1] // 2],
-            act="swish",
-        )
-        self.conv2 = ConvBNLayer(
-            in_channels // 8, hidden_dims, kernel_size=1, act="swish"
-        )
-
-        self.svtr_block = nn.ModuleList(
-            [
-                Block(
-                    dim=hidden_dims,
-                    num_heads=num_heads,
-                    mixer="Global",
-                    HW=None,
-                    mlp_ratio=mlp_ratio,
-                    qkv_bias=qkv_bias,
-                    qk_scale=qk_scale,
-                    drop=drop_rate,
-                    act_layer="swish",
-                    attn_drop=attn_drop_rate,
-                    drop_path=drop_path,
-                    norm_layer="nn.LayerNorm",
-                    epsilon=1e-05,
-                    prenorm=False,
-                )
-                for i in range(depth)
-            ]
-        )
-        self.norm = nn.LayerNorm(hidden_dims, eps=1e-6)
-        self.conv3 = ConvBNLayer(hidden_dims, in_channels, kernel_size=1, act="swish")
-        # last conv-nxn, the input is concat of input tensor and conv3 output tensor
-        self.conv4 = ConvBNLayer(
-            2 * in_channels, in_channels // 8, padding=1, act="swish"
-        )
-
-        self.conv1x1 = ConvBNLayer(in_channels // 8, dims, kernel_size=1, act="swish")
-        self.out_channels = dims
-        self.apply(self._init_weights)
-
-    def _init_weights(self, m):
-        # weight initialization
-        if isinstance(m, nn.Conv2d):
-            nn.init.kaiming_normal_(m.weight, mode="fan_out")
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.BatchNorm2d):
-            nn.init.ones_(m.weight)
-            nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.Linear):
-            nn.init.normal_(m.weight, 0, 0.01)
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.ConvTranspose2d):
-            nn.init.kaiming_normal_(m.weight, mode="fan_out")
-            if m.bias is not None:
-                nn.init.zeros_(m.bias)
-        elif isinstance(m, nn.LayerNorm):
-            nn.init.ones_(m.weight)
-            nn.init.zeros_(m.bias)
-
-    def forward(self, x):
-        # for use guide
-        if self.use_guide:
-            z = x.clone()
-            z.stop_gradient = True
-        else:
-            z = x
-        # for short cut
-        h = z
-        # reduce dim
-        z = self.conv1(z)
-        z = self.conv2(z)
-        # SVTR global block
-        B, C, H, W = z.shape
-        z = z.flatten(2).permute(0, 2, 1)
-
-        for blk in self.svtr_block:
-            z = blk(z)
-
-        z = self.norm(z)
-        # last stage
-        z = z.reshape([-1, H, W, C]).permute(0, 3, 1, 2)
-        z = self.conv3(z)
-        z = torch.cat((h, z), dim=1)
-        z = self.conv1x1(self.conv4(z))
-
-        return z
-
-
-class SequenceEncoder(nn.Module):
-    def __init__(self, in_channels, encoder_type, hidden_size=48, **kwargs):
-        super(SequenceEncoder, self).__init__()
-        self.encoder_reshape = Im2Seq(in_channels)
-        self.out_channels = self.encoder_reshape.out_channels
-        self.encoder_type = encoder_type
-        if encoder_type == "reshape":
-            self.only_reshape = True
-        else:
-            support_encoder_dict = {
-                "reshape": Im2Seq,
-                "fc": EncoderWithFC,
-                "rnn": EncoderWithRNN,
-                "svtr": EncoderWithSVTR,
-            }
-            assert encoder_type in support_encoder_dict, "{} must in {}".format(
-                encoder_type, support_encoder_dict.keys()
-            )
-
-            if encoder_type == "svtr":
-                self.encoder = support_encoder_dict[encoder_type](
-                    self.encoder_reshape.out_channels, **kwargs
-                )
-            else:
-                self.encoder = support_encoder_dict[encoder_type](
-                    self.encoder_reshape.out_channels, hidden_size
-                )
-            self.out_channels = self.encoder.out_channels
-            self.only_reshape = False
-
-    def forward(self, x):
-        if self.encoder_type != "svtr":
-            x = self.encoder_reshape(x)
-            if not self.only_reshape:
-                x = self.encoder(x)
-            return x
-        else:
-            x = self.encoder(x)
-            x = self.encoder_reshape(x)
-            return x
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py
deleted file mode 100755
index 40603ade8895fb995e97310ff75e7e67696bd52b..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/__init__.py
+++ /dev/null
@@ -1,33 +0,0 @@
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
-import copy
-
-__all__ = ['build_post_process']
-
-
-def build_post_process(config, global_config=None):
-    from .db_postprocess import DBPostProcess
-    from .rec_postprocess import CTCLabelDecode, AttnLabelDecode, SRNLabelDecode, TableLabelDecode, \
-        NRTRLabelDecode, SARLabelDecode, ViTSTRLabelDecode, RFLLabelDecode
-    from .cls_postprocess import ClsPostProcess
-    from .rec_postprocess import CANLabelDecode
-
-    support_dict = [
-        'DBPostProcess', 'CTCLabelDecode',
-        'AttnLabelDecode', 'ClsPostProcess', 'SRNLabelDecode',
-        'TableLabelDecode', 'NRTRLabelDecode', 'SARLabelDecode',
-        'ViTSTRLabelDecode','CANLabelDecode', 'RFLLabelDecode'
-    ]
-
-    config = copy.deepcopy(config)
-    module_name = config.pop('name')
-    if global_config is not None:
-        config.update(global_config)
-    assert module_name in support_dict, Exception(
-        'post process only support {}, but got {}'.format(support_dict, module_name))
-    module_class = eval(module_name)(**config)
-    return module_class
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py
deleted file mode 100755
index c9c6affce380d827090faf67b0e63cde1cdd00fd..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/cls_postprocess.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import torch
-
-
-class ClsPostProcess(object):
-    """ Convert between text-label and text-index """
-
-    def __init__(self, label_list, **kwargs):
-        super(ClsPostProcess, self).__init__()
-        self.label_list = label_list
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        if isinstance(preds, torch.Tensor):
-            preds = preds.cpu().numpy()
-        pred_idxs = preds.argmax(axis=1)
-        decode_out = [(self.label_list[idx], preds[i, idx])
-                      for i, idx in enumerate(pred_idxs)]
-        if label is None:
-            return decode_out
-        label = [(self.label_list[idx], 1.0) for idx in label]
-        return decode_out, label
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py
deleted file mode 100755
index 309f7f3fe4bbaf3e9b7a472fba3c4dc0b91d202c..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/db_postprocess.py
+++ /dev/null
@@ -1,179 +0,0 @@
-"""
-This code is refered from:
-https://github.com/WenmuZhou/DBNet.pytorch/blob/master/post_processing/seg_detector_representer.py
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-import cv2
-import torch
-from shapely.geometry import Polygon
-import pyclipper
-
-
-class DBPostProcess(object):
-    """
-    The post process for Differentiable Binarization (DB).
-    """
-
-    def __init__(self,
-                 thresh=0.3,
-                 box_thresh=0.7,
-                 max_candidates=1000,
-                 unclip_ratio=2.0,
-                 use_dilation=False,
-                 score_mode="fast",
-                 **kwargs):
-        self.thresh = thresh
-        self.box_thresh = box_thresh
-        self.max_candidates = max_candidates
-        self.unclip_ratio = unclip_ratio
-        self.min_size = 3
-        self.score_mode = score_mode
-        assert score_mode in [
-            "slow", "fast"
-        ], "Score mode must be in [slow, fast] but got: {}".format(score_mode)
-
-        self.dilation_kernel = None if not use_dilation else np.array(
-            [[1, 1], [1, 1]])
-
-    def boxes_from_bitmap(self, pred, _bitmap, dest_width, dest_height):
-        '''
-        _bitmap: single map with shape (1, H, W),
-                whose values are binarized as {0, 1}
-        '''
-
-        bitmap = _bitmap
-        height, width = bitmap.shape
-
-        outs = cv2.findContours((bitmap * 255).astype(np.uint8), cv2.RETR_LIST,
-                                cv2.CHAIN_APPROX_SIMPLE)
-        if len(outs) == 3:
-            img, contours, _ = outs[0], outs[1], outs[2]
-        elif len(outs) == 2:
-            contours, _ = outs[0], outs[1]
-
-        num_contours = min(len(contours), self.max_candidates)
-
-        boxes = []
-        scores = []
-        for index in range(num_contours):
-            contour = contours[index]
-            points, sside = self.get_mini_boxes(contour)
-            if sside < self.min_size:
-                continue
-            points = np.array(points)
-            if self.score_mode == "fast":
-                score = self.box_score_fast(pred, points.reshape(-1, 2))
-            else:
-                score = self.box_score_slow(pred, contour)
-            if self.box_thresh > score:
-                continue
-
-            box = self.unclip(points).reshape(-1, 1, 2)
-            box, sside = self.get_mini_boxes(box)
-            if sside < self.min_size + 2:
-                continue
-            box = np.array(box)
-
-            box[:, 0] = np.clip(
-                np.round(box[:, 0] / width * dest_width), 0, dest_width)
-            box[:, 1] = np.clip(
-                np.round(box[:, 1] / height * dest_height), 0, dest_height)
-            boxes.append(box.astype(np.int16))
-            scores.append(score)
-        return np.array(boxes, dtype=np.int16), scores
-
-    def unclip(self, box):
-        unclip_ratio = self.unclip_ratio
-        poly = Polygon(box)
-        distance = poly.area * unclip_ratio / poly.length
-        offset = pyclipper.PyclipperOffset()
-        offset.AddPath(box, pyclipper.JT_ROUND, pyclipper.ET_CLOSEDPOLYGON)
-        expanded = np.array(offset.Execute(distance))
-        return expanded
-
-    def get_mini_boxes(self, contour):
-        bounding_box = cv2.minAreaRect(contour)
-        points = sorted(list(cv2.boxPoints(bounding_box)), key=lambda x: x[0])
-
-        index_1, index_2, index_3, index_4 = 0, 1, 2, 3
-        if points[1][1] > points[0][1]:
-            index_1 = 0
-            index_4 = 1
-        else:
-            index_1 = 1
-            index_4 = 0
-        if points[3][1] > points[2][1]:
-            index_2 = 2
-            index_3 = 3
-        else:
-            index_2 = 3
-            index_3 = 2
-
-        box = [
-            points[index_1], points[index_2], points[index_3], points[index_4]
-        ]
-        return box, min(bounding_box[1])
-
-    def box_score_fast(self, bitmap, _box):
-        '''
-        box_score_fast: use bbox mean score as the mean score
-        '''
-        h, w = bitmap.shape[:2]
-        box = _box.copy()
-        xmin = np.clip(np.floor(box[:, 0].min()).astype(np.int64), 0, w - 1)
-        xmax = np.clip(np.ceil(box[:, 0].max()).astype(np.int64), 0, w - 1)
-        ymin = np.clip(np.floor(box[:, 1].min()).astype(np.int64), 0, h - 1)
-        ymax = np.clip(np.ceil(box[:, 1].max()).astype(np.int64), 0, h - 1)
-
-        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
-        box[:, 0] = box[:, 0] - xmin
-        box[:, 1] = box[:, 1] - ymin
-        cv2.fillPoly(mask, box.reshape(1, -1, 2).astype(np.int32), 1)
-        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
-
-    def box_score_slow(self, bitmap, contour):
-        '''
-        box_score_slow: use polyon mean score as the mean score
-        '''
-        h, w = bitmap.shape[:2]
-        contour = contour.copy()
-        contour = np.reshape(contour, (-1, 2))
-
-        xmin = np.clip(np.min(contour[:, 0]), 0, w - 1)
-        xmax = np.clip(np.max(contour[:, 0]), 0, w - 1)
-        ymin = np.clip(np.min(contour[:, 1]), 0, h - 1)
-        ymax = np.clip(np.max(contour[:, 1]), 0, h - 1)
-
-        mask = np.zeros((ymax - ymin + 1, xmax - xmin + 1), dtype=np.uint8)
-
-        contour[:, 0] = contour[:, 0] - xmin
-        contour[:, 1] = contour[:, 1] - ymin
-
-        cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype(np.int32), 1)
-        return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
-
-    def __call__(self, outs_dict, shape_list):
-        pred = outs_dict['maps']
-        if isinstance(pred, torch.Tensor):
-            pred = pred.cpu().numpy()
-        pred = pred[:, 0, :, :]
-        segmentation = pred > self.thresh
-
-        boxes_batch = []
-        for batch_index in range(pred.shape[0]):
-            src_h, src_w, ratio_h, ratio_w = shape_list[batch_index]
-            if self.dilation_kernel is not None:
-                mask = cv2.dilate(
-                    np.array(segmentation[batch_index]).astype(np.uint8),
-                    self.dilation_kernel)
-            else:
-                mask = segmentation[batch_index]
-            boxes, scores = self.boxes_from_bitmap(pred[batch_index], mask,
-                                                   src_w, src_h)
-
-            boxes_batch.append({'points': boxes})
-        return boxes_batch
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py
deleted file mode 100755
index c83fe5c33dbee9be142880aa088f054131dac042..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/postprocess/rec_postprocess.py
+++ /dev/null
@@ -1,690 +0,0 @@
-# copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import numpy as np
-import torch
-
-
-class BaseRecLabelDecode(object):
-    """ Convert between text-label and text-index """
-
-    def __init__(self,
-                 character_dict_path=None,
-                 use_space_char=False):
-
-        self.beg_str = "sos"
-        self.end_str = "eos"
-
-        self.character_str = []
-        if character_dict_path is None:
-            self.character_str = "0123456789abcdefghijklmnopqrstuvwxyz"
-            dict_character = list(self.character_str)
-        else:
-            with open(character_dict_path, "rb") as fin:
-                lines = fin.readlines()
-                for line in lines:
-                    line = line.decode('utf-8').strip("\n").strip("\r\n")
-                    self.character_str.append(line)
-            if use_space_char:
-                self.character_str.append(" ")
-            dict_character = list(self.character_str)
-
-        dict_character = self.add_special_char(dict_character)
-        self.dict = {}
-        for i, char in enumerate(dict_character):
-            self.dict[char] = i
-        self.character = dict_character
-
-    def add_special_char(self, dict_character):
-        return dict_character
-
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        ignored_tokens = self.get_ignored_tokens()
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-            text = ''.join(char_list)
-            result_list.append((text, np.mean(conf_list)))
-        return result_list
-
-    def get_ignored_tokens(self):
-        return [0]  # for ctc blank
-
-
-class CTCLabelDecode(BaseRecLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self,
-                 character_dict_path=None,
-                 use_space_char=False,
-                 **kwargs):
-        super(CTCLabelDecode, self).__init__(character_dict_path,
-                                             use_space_char)
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        if isinstance(preds, torch.Tensor):
-            preds = preds.numpy()
-        preds_idx = preds.argmax(axis=2)
-        preds_prob = preds.max(axis=2)
-        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=True)
-
-        if label is None:
-            return text
-        label = self.decode(label)
-        return text, label
-
-    def add_special_char(self, dict_character):
-        dict_character = ['blank'] + dict_character
-        return dict_character
-
-
-class NRTRLabelDecode(BaseRecLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self, character_dict_path=None, use_space_char=True, **kwargs):
-        super(NRTRLabelDecode, self).__init__(character_dict_path,
-                                              use_space_char)
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-
-        if len(preds) == 2:
-            preds_id = preds[0]
-            preds_prob = preds[1]
-            if isinstance(preds_id, torch.Tensor):
-                preds_id = preds_id.numpy()
-            if isinstance(preds_prob, torch.Tensor):
-                preds_prob = preds_prob.numpy()
-            if preds_id[0][0] == 2:
-                preds_idx = preds_id[:, 1:]
-                preds_prob = preds_prob[:, 1:]
-            else:
-                preds_idx = preds_id
-            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-            if label is None:
-                return text
-            label = self.decode(label[:, 1:])
-        else:
-            if isinstance(preds, torch.Tensor):
-                preds = preds.numpy()
-            preds_idx = preds.argmax(axis=2)
-            preds_prob = preds.max(axis=2)
-            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-            if label is None:
-                return text
-            label = self.decode(label[:, 1:])
-        return text, label
-
-    def add_special_char(self, dict_character):
-        dict_character = ['blank', '<unk>', '<s>', '</s>'] + dict_character
-        return dict_character
-
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                try:
-                    char_idx = self.character[int(text_index[batch_idx][idx])]
-                except:
-                    continue
-                if char_idx == '</s>':  # end
-                    break
-                char_list.append(char_idx)
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-            text = ''.join(char_list)
-            result_list.append((text.lower(), np.mean(conf_list).tolist()))
-        return result_list
-
-class ViTSTRLabelDecode(NRTRLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self, character_dict_path=None, use_space_char=False,
-                 **kwargs):
-        super(ViTSTRLabelDecode, self).__init__(character_dict_path,
-                                                use_space_char)
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        if isinstance(preds, torch.Tensor):
-            preds = preds[:, 1:].numpy()
-        else:
-            preds = preds[:, 1:]
-        preds_idx = preds.argmax(axis=2)
-        preds_prob = preds.max(axis=2)
-        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-        if label is None:
-            return text
-        label = self.decode(label[:, 1:])
-        return text, label
-
-    def add_special_char(self, dict_character):
-        dict_character = ['<s>', '</s>'] + dict_character
-        return dict_character
-
-
-class AttnLabelDecode(BaseRecLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self,
-                 character_dict_path=None,
-                 use_space_char=False,
-                 **kwargs):
-        super(AttnLabelDecode, self).__init__(character_dict_path,
-                                              use_space_char)
-
-    def add_special_char(self, dict_character):
-        self.beg_str = "sos"
-        self.end_str = "eos"
-        dict_character = dict_character
-        dict_character = [self.beg_str] + dict_character + [self.end_str]
-        return dict_character
-
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        ignored_tokens = self.get_ignored_tokens()
-        [beg_idx, end_idx] = self.get_ignored_tokens()
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if int(text_index[batch_idx][idx]) == int(end_idx):
-                    break
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-            text = ''.join(char_list)
-            result_list.append((text, np.mean(conf_list)))
-        return result_list
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        """
-        text = self.decode(text)
-        if label is None:
-            return text
-        else:
-            label = self.decode(label, is_remove_duplicate=False)
-            return text, label
-        """
-        if isinstance(preds, torch.Tensor):
-            preds = preds.cpu().numpy()
-
-        preds_idx = preds.argmax(axis=2)
-        preds_prob = preds.max(axis=2)
-        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-        if label is None:
-            return text
-        label = self.decode(label, is_remove_duplicate=False)
-        return text, label
-
-    def get_ignored_tokens(self):
-        beg_idx = self.get_beg_end_flag_idx("beg")
-        end_idx = self.get_beg_end_flag_idx("end")
-        return [beg_idx, end_idx]
-
-    def get_beg_end_flag_idx(self, beg_or_end):
-        if beg_or_end == "beg":
-            idx = np.array(self.dict[self.beg_str])
-        elif beg_or_end == "end":
-            idx = np.array(self.dict[self.end_str])
-        else:
-            assert False, "unsupport type %s in get_beg_end_flag_idx" \
-                          % beg_or_end
-        return idx
-
-
-class RFLLabelDecode(BaseRecLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self, character_dict_path=None, use_space_char=False,
-                 **kwargs):
-        super(RFLLabelDecode, self).__init__(character_dict_path,
-                                             use_space_char)
-
-    def add_special_char(self, dict_character):
-        self.beg_str = "sos"
-        self.end_str = "eos"
-        dict_character = dict_character
-        dict_character = [self.beg_str] + dict_character + [self.end_str]
-        return dict_character
-
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        ignored_tokens = self.get_ignored_tokens()
-        [beg_idx, end_idx] = self.get_ignored_tokens()
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if int(text_index[batch_idx][idx]) == int(end_idx):
-                    break
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-            text = ''.join(char_list)
-            result_list.append((text, np.mean(conf_list).tolist()))
-        return result_list
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        # if seq_outputs is not None:
-        if isinstance(preds, tuple) or isinstance(preds, list):
-            cnt_outputs, seq_outputs = preds
-            if isinstance(seq_outputs, torch.Tensor):
-                seq_outputs = seq_outputs.numpy()
-            preds_idx = seq_outputs.argmax(axis=2)
-            preds_prob = seq_outputs.max(axis=2)
-            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-
-            if label is None:
-                return text
-            label = self.decode(label, is_remove_duplicate=False)
-            return text, label
-
-        else:
-            cnt_outputs = preds
-            if isinstance(cnt_outputs, torch.Tensor):
-                cnt_outputs = cnt_outputs.numpy()
-            cnt_length = []
-            for lens in cnt_outputs:
-                length = round(np.sum(lens))
-                cnt_length.append(length)
-            if label is None:
-                return cnt_length
-            label = self.decode(label, is_remove_duplicate=False)
-            length = [len(res[0]) for res in label]
-            return cnt_length, length
-
-    def get_ignored_tokens(self):
-        beg_idx = self.get_beg_end_flag_idx("beg")
-        end_idx = self.get_beg_end_flag_idx("end")
-        return [beg_idx, end_idx]
-
-    def get_beg_end_flag_idx(self, beg_or_end):
-        if beg_or_end == "beg":
-            idx = np.array(self.dict[self.beg_str])
-        elif beg_or_end == "end":
-            idx = np.array(self.dict[self.end_str])
-        else:
-            assert False, "unsupport type %s in get_beg_end_flag_idx" \
-                          % beg_or_end
-        return idx
-
-
-class SRNLabelDecode(BaseRecLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self,
-                 character_dict_path=None,
-                 use_space_char=False,
-                 **kwargs):
-        self.max_text_length = kwargs.get('max_text_length', 25)
-        super(SRNLabelDecode, self).__init__(character_dict_path,
-                                             use_space_char)
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        pred = preds['predict']
-        char_num = len(self.character_str) + 2
-        if isinstance(pred, torch.Tensor):
-            pred = pred.numpy()
-        pred = np.reshape(pred, [-1, char_num])
-
-        preds_idx = np.argmax(pred, axis=1)
-        preds_prob = np.max(pred, axis=1)
-
-        preds_idx = np.reshape(preds_idx, [-1, self.max_text_length])
-
-        preds_prob = np.reshape(preds_prob, [-1, self.max_text_length])
-
-        text = self.decode(preds_idx, preds_prob)
-
-        if label is None:
-            text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-            return text
-        label = self.decode(label)
-        return text, label
-
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        ignored_tokens = self.get_ignored_tokens()
-        batch_size = len(text_index)
-
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-
-            text = ''.join(char_list)
-            result_list.append((text, np.mean(conf_list)))
-        return result_list
-
-    def add_special_char(self, dict_character):
-        dict_character = dict_character + [self.beg_str, self.end_str]
-        return dict_character
-
-    def get_ignored_tokens(self):
-        beg_idx = self.get_beg_end_flag_idx("beg")
-        end_idx = self.get_beg_end_flag_idx("end")
-        return [beg_idx, end_idx]
-
-    def get_beg_end_flag_idx(self, beg_or_end):
-        if beg_or_end == "beg":
-            idx = np.array(self.dict[self.beg_str])
-        elif beg_or_end == "end":
-            idx = np.array(self.dict[self.end_str])
-        else:
-            assert False, "unsupport type %s in get_beg_end_flag_idx" \
-                          % beg_or_end
-        return idx
-
-
-class TableLabelDecode(object):
-    """  """
-
-    def __init__(self,
-                 character_dict_path,
-                 **kwargs):
-        list_character, list_elem = self.load_char_elem_dict(character_dict_path)
-        list_character = self.add_special_char(list_character)
-        list_elem = self.add_special_char(list_elem)
-        self.dict_character = {}
-        self.dict_idx_character = {}
-        for i, char in enumerate(list_character):
-            self.dict_idx_character[i] = char
-            self.dict_character[char] = i
-        self.dict_elem = {}
-        self.dict_idx_elem = {}
-        for i, elem in enumerate(list_elem):
-            self.dict_idx_elem[i] = elem
-            self.dict_elem[elem] = i
-
-    def load_char_elem_dict(self, character_dict_path):
-        list_character = []
-        list_elem = []
-        with open(character_dict_path, "rb") as fin:
-            lines = fin.readlines()
-            substr = lines[0].decode('utf-8').strip("\n").strip("\r\n").split("\t")
-            character_num = int(substr[0])
-            elem_num = int(substr[1])
-            for cno in range(1, 1 + character_num):
-                character = lines[cno].decode('utf-8').strip("\n").strip("\r\n")
-                list_character.append(character)
-            for eno in range(1 + character_num, 1 + character_num + elem_num):
-                elem = lines[eno].decode('utf-8').strip("\n").strip("\r\n")
-                list_elem.append(elem)
-        return list_character, list_elem
-
-    def add_special_char(self, list_character):
-        self.beg_str = "sos"
-        self.end_str = "eos"
-        list_character = [self.beg_str] + list_character + [self.end_str]
-        return list_character
-
-    def __call__(self, preds):
-        structure_probs = preds['structure_probs']
-        loc_preds = preds['loc_preds']
-        if isinstance(structure_probs,torch.Tensor):
-            structure_probs = structure_probs.numpy()
-        if isinstance(loc_preds,torch.Tensor):
-            loc_preds = loc_preds.numpy()
-        structure_idx = structure_probs.argmax(axis=2)
-        structure_probs = structure_probs.max(axis=2)
-        structure_str, structure_pos, result_score_list, result_elem_idx_list = self.decode(structure_idx,
-                                                                                            structure_probs, 'elem')
-        res_html_code_list = []
-        res_loc_list = []
-        batch_num = len(structure_str)
-        for bno in range(batch_num):
-            res_loc = []
-            for sno in range(len(structure_str[bno])):
-                text = structure_str[bno][sno]
-                if text in ['<td>', '<td']:
-                    pos = structure_pos[bno][sno]
-                    res_loc.append(loc_preds[bno, pos])
-            res_html_code = ''.join(structure_str[bno])
-            res_loc = np.array(res_loc)
-            res_html_code_list.append(res_html_code)
-            res_loc_list.append(res_loc)
-        return {'res_html_code': res_html_code_list, 'res_loc': res_loc_list, 'res_score_list': result_score_list,
-                'res_elem_idx_list': result_elem_idx_list,'structure_str_list':structure_str}
-
-    def decode(self, text_index, structure_probs, char_or_elem):
-        """convert text-label into text-index.
-        """
-        if char_or_elem == "char":
-            current_dict = self.dict_idx_character
-        else:
-            current_dict = self.dict_idx_elem
-            ignored_tokens = self.get_ignored_tokens('elem')
-            beg_idx, end_idx = ignored_tokens
-
-        result_list = []
-        result_pos_list = []
-        result_score_list = []
-        result_elem_idx_list = []
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            elem_pos_list = []
-            elem_idx_list = []
-            score_list = []
-            for idx in range(len(text_index[batch_idx])):
-                tmp_elem_idx = int(text_index[batch_idx][idx])
-                if idx > 0 and tmp_elem_idx == end_idx:
-                    break
-                if tmp_elem_idx in ignored_tokens:
-                    continue
-
-                char_list.append(current_dict[tmp_elem_idx])
-                elem_pos_list.append(idx)
-                score_list.append(structure_probs[batch_idx, idx])
-                elem_idx_list.append(tmp_elem_idx)
-            result_list.append(char_list)
-            result_pos_list.append(elem_pos_list)
-            result_score_list.append(score_list)
-            result_elem_idx_list.append(elem_idx_list)
-        return result_list, result_pos_list, result_score_list, result_elem_idx_list
-
-    def get_ignored_tokens(self, char_or_elem):
-        beg_idx = self.get_beg_end_flag_idx("beg", char_or_elem)
-        end_idx = self.get_beg_end_flag_idx("end", char_or_elem)
-        return [beg_idx, end_idx]
-
-    def get_beg_end_flag_idx(self, beg_or_end, char_or_elem):
-        if char_or_elem == "char":
-            if beg_or_end == "beg":
-                idx = self.dict_character[self.beg_str]
-            elif beg_or_end == "end":
-                idx = self.dict_character[self.end_str]
-            else:
-                assert False, "Unsupport type %s in get_beg_end_flag_idx of char" \
-                              % beg_or_end
-        elif char_or_elem == "elem":
-            if beg_or_end == "beg":
-                idx = self.dict_elem[self.beg_str]
-            elif beg_or_end == "end":
-                idx = self.dict_elem[self.end_str]
-            else:
-                assert False, "Unsupport type %s in get_beg_end_flag_idx of elem" \
-                              % beg_or_end
-        else:
-            assert False, "Unsupport type %s in char_or_elem" \
-                          % char_or_elem
-        return idx
-
-
-class SARLabelDecode(BaseRecLabelDecode):
-    """ Convert between text-label and text-index """
-
-    def __init__(self, character_dict_path=None, use_space_char=False,
-                 **kwargs):
-        super(SARLabelDecode, self).__init__(character_dict_path,
-                                             use_space_char)
-
-        self.rm_symbol = kwargs.get('rm_symbol', False)
-
-    def add_special_char(self, dict_character):
-        beg_end_str = "<BOS/EOS>"
-        unknown_str = "<UKN>"
-        padding_str = "<PAD>"
-        dict_character = dict_character + [unknown_str]
-        self.unknown_idx = len(dict_character) - 1
-        dict_character = dict_character + [beg_end_str]
-        self.start_idx = len(dict_character) - 1
-        self.end_idx = len(dict_character) - 1
-        dict_character = dict_character + [padding_str]
-        self.padding_idx = len(dict_character) - 1
-        return dict_character
-
-    def decode(self, text_index, text_prob=None, is_remove_duplicate=False):
-        """ convert text-index into text-label. """
-        result_list = []
-        ignored_tokens = self.get_ignored_tokens()
-
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            char_list = []
-            conf_list = []
-            for idx in range(len(text_index[batch_idx])):
-                if text_index[batch_idx][idx] in ignored_tokens:
-                    continue
-                if int(text_index[batch_idx][idx]) == int(self.end_idx):
-                    if text_prob is None and idx == 0:
-                        continue
-                    else:
-                        break
-                if is_remove_duplicate:
-                    # only for predict
-                    if idx > 0 and text_index[batch_idx][idx - 1] == text_index[
-                            batch_idx][idx]:
-                        continue
-                char_list.append(self.character[int(text_index[batch_idx][
-                    idx])])
-                if text_prob is not None:
-                    conf_list.append(text_prob[batch_idx][idx])
-                else:
-                    conf_list.append(1)
-            text = ''.join(char_list)
-            if self.rm_symbol:
-                comp = re.compile('[^A-Z^a-z^0-9^\u4e00-\u9fa5]')
-                text = text.lower()
-                text = comp.sub('', text)
-            result_list.append((text, np.mean(conf_list).tolist()))
-        return result_list
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        if isinstance(preds, torch.Tensor):
-            preds = preds.cpu().numpy()
-        preds_idx = preds.argmax(axis=2)
-        preds_prob = preds.max(axis=2)
-
-        text = self.decode(preds_idx, preds_prob, is_remove_duplicate=False)
-
-        if label is None:
-            return text
-        label = self.decode(label, is_remove_duplicate=False)
-        return text, label
-
-    def get_ignored_tokens(self):
-        return [self.padding_idx]
-
-
-class CANLabelDecode(BaseRecLabelDecode):
-    """ Convert between latex-symbol and symbol-index """
-
-    def __init__(self, character_dict_path=None, use_space_char=False,
-                 **kwargs):
-        super(CANLabelDecode, self).__init__(character_dict_path,
-                                             use_space_char)
-
-    def decode(self, text_index, preds_prob=None):
-        result_list = []
-        batch_size = len(text_index)
-        for batch_idx in range(batch_size):
-            seq_end = text_index[batch_idx].argmin(0)
-            idx_list = text_index[batch_idx][:seq_end].tolist()
-            symbol_list = [self.character[idx] for idx in idx_list]
-            probs = []
-            if preds_prob is not None:
-                probs = preds_prob[batch_idx][:len(symbol_list)].tolist()
-
-            result_list.append([' '.join(symbol_list), probs])
-        return result_list
-
-    def __call__(self, preds, label=None, *args, **kwargs):
-        pred_prob, _, _, _ = preds
-        preds_idx = pred_prob.argmax(axis=2)
-
-        text = self.decode(preds_idx)
-        if label is None:
-            return text
-        label = self.decode(label)
-        return text, label
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/__init__.py
deleted file mode 100755
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
deleted file mode 100644
index 2dd3b633a8c13d5c450ebc93f84c8f59ae5c8d93..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
+++ /dev/null
@@ -1,476 +0,0 @@
-ch_ptocr_mobile_v2.0_cls_infer:
-  model_type: cls
-  algorithm: CLS
-  Transform:
-  Backbone:
-    name: MobileNetV3
-    scale: 0.35
-    model_name: small
-  Neck:
-  Head:
-    name: ClsHead
-    class_dim: 2
-
-Multilingual_PP-OCRv3_det_infer:
-  model_type: det
-  algorithm: DB
-  Transform:
-  Backbone:
-    name: MobileNetV3
-    scale: 0.5
-    model_name: large
-    disable_se: True
-  Neck:
-    name: RSEFPN
-    out_channels: 96
-    shortcut: True
-  Head:
-    name: DBHead
-    k: 50
-
-en_PP-OCRv3_det_infer:
-  model_type: det
-  algorithm: DB
-  Transform:
-  Backbone:
-    name: MobileNetV3
-    scale: 0.5
-    model_name: large
-    disable_se: True
-  Neck:
-    name: RSEFPN
-    out_channels: 96
-    shortcut: True
-  Head:
-    name: DBHead
-    k: 50
-
-ch_PP-OCRv3_det_infer:
-  model_type: det
-  algorithm: DB
-  Transform:
-  Backbone:
-    name: MobileNetV3
-    scale: 0.5
-    model_name: large
-    disable_se: True
-  Neck:
-    name: RSEFPN
-    out_channels: 96
-    shortcut: True
-  Head:
-    name: DBHead
-    k: 50
-
-en_PP-OCRv4_rec_infer:
-  model_type: rec
-  algorithm: SVTR_LCNet
-  Transform:
-  Backbone:
-    name: PPLCNetV3
-    scale: 0.95
-  Head:
-    name: MultiHead
-    out_channels_list:
-      CTCLabelDecode: 97 #'blank' + ...(62) + ' '
-    head_list:
-      - CTCHead:
-          Neck:
-            name: svtr
-            dims: 120
-            depth: 2
-            hidden_dims: 120
-            kernel_size: [ 1, 3 ]
-            use_guide: True
-          Head:
-            fc_decay: 0.00001
-      - NRTRHead:
-          nrtr_dim: 384
-          max_text_length: 25
-
-ch_PP-OCRv4_det_infer:
-  model_type: det
-  algorithm: DB
-  Transform: null
-  Backbone:
-    name: PPLCNetV3
-    scale: 0.75
-    det: True
-  Neck:
-    name: RSEFPN
-    out_channels: 96
-    shortcut: True
-  Head:
-    name: DBHead
-    k: 50
-
-ch_PP-OCRv5_det_infer:
-  model_type: det
-  algorithm: DB
-  Transform: null
-  Backbone:
-    name: PPLCNetV3
-    scale: 0.75
-    det: True
-  Neck:
-    name: RSEFPN
-    out_channels: 96
-    shortcut: True
-  Head:
-    name: DBHead
-    k: 50
-
-ch_PP-OCRv4_det_server_infer:
-  model_type: det
-  algorithm: DB
-  Transform: null
-  Backbone:
-    name: PPHGNet_small
-    det: True
-  Neck:
-    name: LKPAN
-    out_channels: 256
-    intracl: true
-  Head:
-    name: PFHeadLocal
-    k: 50
-    mode: "large"
-
-ch_PP-OCRv4_rec_infer:
-  model_type: rec
-  algorithm: SVTR_LCNet
-  Transform:
-  Backbone:
-    name: PPLCNetV3
-    scale: 0.95
-  Head:
-    name: MultiHead
-    out_channels_list:
-      CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
-    head_list:
-      - CTCHead:
-          Neck:
-            name: svtr
-            dims: 120
-            depth: 2
-            hidden_dims: 120
-            kernel_size: [ 1, 3 ]
-            use_guide: True
-          Head:
-            fc_decay: 0.00001
-      - NRTRHead:
-          nrtr_dim: 384
-          max_text_length: 25
-
-ch_PP-OCRv4_rec_server_infer:
-  model_type: rec
-  algorithm: SVTR_HGNet
-  Transform:
-  Backbone:
-    name: PPHGNet_small
-  Head:
-    name: MultiHead
-    out_channels_list:
-      CTCLabelDecode: 6625 #'blank' + ...(6623) + ' '
-    head_list:
-      - CTCHead:
-          Neck:
-            name: svtr
-            dims: 120
-            depth: 2
-            hidden_dims: 120
-            kernel_size: [ 1, 3 ]
-            use_guide: True
-          Head:
-            fc_decay: 0.00001
-      - NRTRHead:
-          nrtr_dim: 384
-          max_text_length: 25
-
-ch_PP-OCRv4_rec_server_doc_infer:
-  model_type: rec
-  algorithm: SVTR_HGNet
-  Transform:
-  Backbone:
-    name: PPHGNet_small
-  Head:
-    name: MultiHead
-    out_channels_list:
-      CTCLabelDecode: 15631
-    head_list:
-      - CTCHead:
-          Neck:
-            name: svtr
-            dims: 120
-            depth: 2
-            hidden_dims: 120
-            kernel_size: [ 1, 3 ]
-            use_guide: True
-          Head:
-            fc_decay: 0.00001
-      - NRTRHead:
-          nrtr_dim: 384
-          max_text_length: 25
-
-ch_PP-OCRv5_rec_server_infer:
-  model_type: rec
-  algorithm: SVTR_HGNet
-  Transform:
-  Backbone:
-    name: PPHGNetV2_B4
-    text_rec: True
-  Head:
-    name: MultiHead
-    out_channels_list:
-      CTCLabelDecode: 18385
-    head_list:
-      - CTCHead:
-          Neck:
-            name: svtr
-            dims: 120
-            depth: 2
-            hidden_dims: 120
-            kernel_size: [ 1, 3 ]
-            use_guide: True
-          Head:
-            fc_decay: 0.00001
-      - NRTRHead:
-          nrtr_dim: 384
-          max_text_length: 25
-
-ch_PP-OCRv5_rec_infer:
-  model_type: rec
-  algorithm: SVTR_HGNet
-  Transform:
-  Backbone:
-    name: PPLCNetV3
-    scale: 0.95
-  Head:
-    name: MultiHead
-    out_channels_list:
-      CTCLabelDecode: 18385
-    head_list:
-      - CTCHead:
-          Neck:
-            name: svtr
-            dims: 120
-            depth: 2
-            hidden_dims: 120
-            kernel_size: [ 1, 3 ]
-            use_guide: True
-          Head:
-            fc_decay: 0.00001
-      - NRTRHead:
-          nrtr_dim: 384
-          max_text_length: 25
-
-chinese_cht_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [1, 2]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 8423
-    fc_decay: 0.00001
-
-latin_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 187
-    fc_decay: 0.00001
-
-cyrillic_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 165
-    fc_decay: 0.00001
-
-arabic_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 164
-    fc_decay: 0.00001
-
-korean_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 3690
-    fc_decay: 0.00001
-
-japan_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 4401
-    fc_decay: 0.00001
-
-ta_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 130
-    fc_decay: 0.00001
-
-te_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 153
-    fc_decay: 0.00001
-
-ka_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 155
-    fc_decay: 0.00001
-
-devanagari_PP-OCRv3_rec_infer:
-  model_type: rec
-  algorithm: SVTR
-  Transform:
-  Backbone:
-    name: MobileNetV1Enhance
-    scale: 0.5
-    last_conv_stride: [ 1, 2 ]
-    last_pool_type: avg
-  Neck:
-    name: SequenceEncoder
-    encoder_type: svtr
-    dims: 64
-    depth: 2
-    hidden_dims: 120
-    use_guide: True
-  Head:
-    name: CTCHead
-#    out_channels: 169
-    fc_decay: 0.00001
-
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt
deleted file mode 100644
index e97abf39274df77fbad066ee4635aebc6743140c..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/arabic_dict.txt
+++ /dev/null
@@ -1,162 +0,0 @@
- 
-!
-#
-$
-%
-&
-'
-(
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-_
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-É
-é
-ء
-آ
-أ
-ؤ
-إ
-ئ
-ا
-ب
-ة
-ت
-ث
-ج
-ح
-خ
-د
-ذ
-ر
-ز
-س
-ش
-ص
-ض
-ط
-ظ
-ع
-غ
-ف
-ق
-ك
-ل
-م
-ن
-ه
-و
-ى
-ي
-ً
-ٌ
-ٍ
-َ
-ُ
-ِ
-ّ
-ْ
-ٓ
-ٔ
-ٰ
-ٱ
-ٹ
-پ
-چ
-ڈ
-ڑ
-ژ
-ک
-ڭ
-گ
-ں
-ھ
-ۀ
-ہ
-ۂ
-ۃ
-ۆ
-ۇ
-ۈ
-ۋ
-ی
-ې
-ے
-ۓ
-ە
-١
-٢
-٣
-٤
-٥
-٦
-٧
-٨
-٩
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt
deleted file mode 100644
index cc1aa4724b9a6f0e15275bcf61c91c26b6550c3e..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/chinese_cht_dict.txt
+++ /dev/null
@@ -1,8421 +0,0 @@
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-<
-=
->
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-\
-]
-^
-_
-`
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-{
-|
-}
-~
-¥
-®
-°
-±
-²
-´
-·
-»
-É
-Ë
-Ó
-×
-Ü
-à
-á
-ä
-è
-é
-ì
-í
-ò
-ó
-÷
-ú
-ü
-ā
-ē
-ī
-ō
-ū
-ǐ
-ǒ
-ɔ
-ɡ
-ʌ
-ˋ
-Λ
-Ο
-Φ
-Ω
-α
-β
-ε
-θ
-μ
-π
-З
-И
-Й
-П
-Я
-г
-—
-‖
-‘
-’
-“
-”
-•
-…
-‧
-′
-″
-※
-℃
-№
-™
-Ⅱ
-Ⅲ
-Ⅳ
-←
-↑
-→
-↓
-⇋
-∈
-∑
-√
-∞
-∣
-∧
-∩
-∫
-∶
-≈
-≠
-≤
-≥
-⊙
-⊥
-①
-②
-③
-④
-⑧
-⑴
-⑵
-⑶
-─
-│
-┅
-┌
-├
-█
-▎
-▏
-▕
-■
-□
-▪
-▲
-△
-▼
-◆
-◇
-○
-◎
-●
-◥
-★
-☆
-❋
-❤
-　
-、
-。
-〇
-〉
-《
-》
-「
-」
-『
-』
-【
-】
-〔
-〕
-〖
-〗
-の
-サ
-シ
-ジ
-マ
-ㄱ
-ㆍ
-㎏
-㎡
-㐂
-㐱
-㙟
-㴪
-㸃
-䖝
-䝉
-䰾
-䲁
-一
-丁
-七
-丄
-丈
-三
-上
-下
-丌
-不
-与
-丏
-丐
-丑
-且
-丕
-世
-丘
-丙
-丞
-丟
-両
-並
-丨
-丫
-中
-丰
-串
-丶
-丸
-丹
-主
-丼
-丿
-乂
-乃
-久
-么
-之
-乍
-乎
-乏
-乒
-乓
-乖
-乗
-乘
-乙
-乚
-乜
-九
-乞
-也
-乩
-乭
-乳
-乸
-乹
-乾
-亀
-亂
-亅
-了
-予
-亊
-事
-二
-亍
-云
-互
-亓
-五
-井
-亘
-些
-亜
-亞
-亟
-亠
-亡
-亢
-交
-亥
-亦
-亨
-享
-京
-亭
-亮
-亰
-亳
-亶
-亹
-人
-亻
-什
-仁
-仂
-仃
-仄
-仇
-仉
-今
-介
-仍
-仏
-仔
-仕
-他
-仗
-付
-仙
-仛
-仝
-仞
-仟
-仡
-代
-令
-以
-仨
-仫
-仮
-仰
-仲
-仳
-仵
-件
-仺
-任
-仼
-份
-仿
-企
-伃
-伈
-伉
-伊
-伋
-伍
-伎
-伏
-伐
-休
-伕
-伙
-伝
-伢
-伯
-估
-伱
-伴
-伶
-伷
-伸
-伺
-似
-伽
-伾
-佀
-佁
-佃
-但
-佇
-佈
-佉
-佋
-位
-低
-住
-佐
-佑
-体
-佔
-何
-佗
-佘
-余
-佚
-佛
-作
-佝
-佞
-佟
-你
-佣
-佤
-佧
-佩
-佬
-佯
-佰
-佳
-併
-佶
-佹
-佺
-佼
-佾
-使
-侁
-侃
-侄
-侅
-來
-侈
-侊
-例
-侍
-侏
-侑
-侖
-侗
-侘
-侚
-供
-依
-侞
-価
-侮
-侯
-侵
-侶
-侷
-侹
-便
-俁
-係
-促
-俄
-俅
-俊
-俋
-俌
-俍
-俎
-俏
-俐
-俑
-俗
-俘
-俚
-俛
-保
-俞
-俟
-俠
-信
-俬
-修
-俯
-俱
-俳
-俴
-俵
-俶
-俸
-俺
-俽
-俾
-倆
-倈
-倉
-個
-倌
-倍
-們
-倒
-倓
-倔
-倖
-倗
-倘
-候
-倚
-倜
-倞
-借
-倡
-倢
-倣
-値
-倦
-倧
-倩
-倪
-倫
-倬
-倭
-倮
-倻
-值
-偁
-偃
-假
-偈
-偉
-偊
-偌
-偍
-偎
-偏
-偓
-偕
-做
-停
-健
-偪
-偲
-側
-偵
-偶
-偷
-偸
-偽
-傀
-傃
-傅
-傈
-傉
-傍
-傑
-傒
-傕
-傖
-傘
-備
-傜
-傢
-傣
-催
-傭
-傲
-傳
-債
-傷
-傻
-傾
-僅
-僉
-僊
-働
-像
-僑
-僔
-僕
-僖
-僙
-僚
-僜
-僡
-僧
-僩
-僭
-僮
-僰
-僱
-僳
-僴
-僵
-價
-僻
-儀
-儁
-儂
-億
-儆
-儇
-儈
-儉
-儋
-儐
-儒
-儔
-儕
-儘
-儚
-儞
-償
-儡
-儥
-儦
-優
-儫
-儱
-儲
-儷
-儺
-儻
-儼
-兀
-允
-元
-兄
-充
-兆
-先
-光
-克
-兌
-免
-児
-兒
-兔
-兕
-兗
-兜
-入
-內
-全
-兩
-兪
-八
-公
-六
-兮
-共
-兵
-其
-具
-典
-兼
-兿
-冀
-冂
-円
-冇
-冉
-冊
-再
-冏
-冑
-冒
-冕
-冖
-冗
-冚
-冠
-冢
-冤
-冥
-冧
-冨
-冪
-冫
-冬
-冮
-冰
-冴
-冶
-冷
-冼
-冽
-凃
-凄
-准
-凈
-凋
-凌
-凍
-凖
-凜
-凝
-凞
-几
-凡
-処
-凪
-凬
-凰
-凱
-凳
-凵
-凶
-凸
-凹
-出
-函
-刀
-刁
-刂
-刃
-刄
-分
-切
-刈
-刊
-刎
-刑
-划
-列
-初
-判
-別
-刦
-刧
-刨
-利
-刪
-刮
-到
-制
-刷
-券
-刺
-刻
-刼
-剁
-剃
-則
-削
-剋
-剌
-前
-剎
-剏
-剔
-剖
-剛
-剝
-剡
-剣
-剩
-剪
-剮
-副
-割
-創
-剿
-劃
-劄
-劇
-劈
-劉
-劊
-劌
-劍
-劑
-劔
-力
-功
-加
-劣
-助
-努
-劫
-劬
-劭
-劵
-効
-劼
-劾
-勁
-勃
-勅
-勇
-勉
-勐
-勑
-勒
-勔
-動
-勖
-勗
-勘
-務
-勛
-勝
-勞
-募
-勢
-勣
-勤
-勦
-勰
-勱
-勲
-勳
-勵
-勷
-勸
-勺
-勻
-勾
-勿
-匂
-匄
-包
-匆
-匈
-匋
-匍
-匏
-匐
-匕
-化
-北
-匙
-匚
-匝
-匠
-匡
-匣
-匪
-匯
-匱
-匸
-匹
-匾
-匿
-區
-十
-千
-卅
-升
-午
-卉
-半
-卋
-卍
-卐
-卑
-卒
-卓
-協
-南
-博
-卜
-卞
-卟
-占
-卡
-卣
-卦
-卧
-卩
-卬
-卮
-卯
-印
-危
-卲
-即
-卵
-卷
-卸
-卹
-卺
-卻
-卽
-卿
-厄
-厓
-厔
-厙
-厚
-厝
-原
-厥
-厭
-厰
-厲
-厴
-厶
-去
-參
-叄
-又
-叉
-及
-友
-反
-収
-叔
-叕
-取
-受
-叛
-叟
-叡
-叢
-口
-古
-句
-另
-叨
-叩
-只
-叫
-召
-叭
-叮
-可
-台
-叱
-史
-右
-叵
-司
-叻
-叼
-吁
-吃
-各
-吆
-合
-吉
-吊
-吋
-同
-名
-后
-吏
-吐
-向
-吒
-吔
-吖
-君
-吝
-吞
-吟
-吠
-吡
-吥
-否
-吧
-吩
-含
-吮
-吱
-吲
-吳
-吵
-吶
-吸
-吹
-吻
-吼
-吾
-呀
-呂
-呃
-呈
-呉
-告
-呋
-呎
-呢
-呤
-呦
-周
-呱
-味
-呵
-呷
-呸
-呼
-命
-呾
-咀
-咁
-咂
-咄
-咅
-咆
-咋
-和
-咎
-咑
-咒
-咔
-咕
-咖
-咗
-咘
-咚
-咟
-咤
-咥
-咧
-咨
-咩
-咪
-咫
-咬
-咭
-咯
-咱
-咲
-咳
-咸
-咻
-咼
-咽
-咾
-咿
-哀
-品
-哂
-哄
-哆
-哇
-哈
-哉
-哌
-哎
-哏
-哐
-哖
-哚
-哞
-員
-哥
-哦
-哨
-哩
-哪
-哭
-哮
-哱
-哲
-哺
-哼
-唃
-唄
-唆
-唇
-唉
-唏
-唐
-唑
-唔
-唘
-唧
-唫
-唬
-唭
-售
-唯
-唱
-唳
-唵
-唷
-唸
-唻
-唾
-啁
-啃
-啄
-商
-啉
-啊
-啍
-問
-啓
-啖
-啚
-啜
-啞
-啟
-啡
-啣
-啤
-啥
-啦
-啪
-啫
-啯
-啰
-啱
-啲
-啵
-啶
-啷
-啻
-啼
-啾
-喀
-喂
-喃
-善
-喆
-喇
-喈
-喉
-喊
-喋
-喏
-喔
-喘
-喙
-喚
-喜
-喝
-喢
-喦
-喧
-喪
-喫
-喬
-單
-喰
-喱
-喲
-喳
-喵
-喹
-喻
-喼
-嗄
-嗅
-嗆
-嗇
-嗊
-嗎
-嗑
-嗒
-嗓
-嗔
-嗖
-嗚
-嗜
-嗝
-嗞
-嗡
-嗢
-嗣
-嗦
-嗨
-嗩
-嗪
-嗮
-嗯
-嗲
-嗶
-嗹
-嗽
-嘀
-嘅
-嘆
-嘉
-嘌
-嘍
-嘎
-嘏
-嘔
-嘗
-嘚
-嘛
-嘜
-嘞
-嘟
-嘢
-嘣
-嘥
-嘧
-嘩
-嘬
-嘮
-嘯
-嘰
-嘲
-嘴
-嘶
-嘸
-嘹
-嘻
-嘿
-噁
-噌
-噍
-噏
-噓
-噗
-噝
-噠
-噢
-噤
-噥
-噦
-器
-噩
-噪
-噬
-噯
-噰
-噲
-噴
-噶
-噸
-噹
-噻
-嚇
-嚈
-嚎
-嚏
-嚐
-嚒
-嚓
-嚕
-嚗
-嚙
-嚞
-嚟
-嚤
-嚦
-嚧
-嚨
-嚩
-嚮
-嚳
-嚴
-嚶
-嚷
-嚼
-嚿
-囀
-囂
-囃
-囉
-囊
-囍
-囑
-囒
-囓
-囗
-囚
-四
-囝
-回
-因
-囡
-団
-囤
-囧
-囪
-囮
-囯
-困
-囲
-図
-囶
-囷
-囹
-固
-囿
-圂
-圃
-圄
-圈
-圉
-國
-圍
-圏
-園
-圓
-圖
-圗
-團
-圜
-土
-圧
-在
-圩
-圪
-圭
-圯
-地
-圳
-圻
-圾
-址
-均
-坊
-坋
-坌
-坍
-坎
-坐
-坑
-坖
-坡
-坣
-坤
-坦
-坨
-坩
-坪
-坫
-坬
-坭
-坮
-坯
-坳
-坵
-坶
-坷
-坻
-垂
-垃
-垈
-型
-垍
-垓
-垕
-垚
-垛
-垞
-垟
-垠
-垢
-垣
-垮
-垯
-垰
-垵
-垸
-垻
-垿
-埃
-埅
-埇
-埈
-埋
-埌
-城
-埏
-埒
-埔
-埕
-埗
-埜
-域
-埠
-埡
-埤
-埧
-埨
-埪
-埭
-埮
-埴
-埵
-執
-培
-基
-埻
-埼
-堀
-堂
-堃
-堅
-堆
-堇
-堈
-堉
-堊
-堍
-堖
-堝
-堡
-堤
-堦
-堪
-堮
-堯
-堰
-報
-場
-堵
-堷
-堺
-塀
-塅
-塆
-塊
-塋
-塌
-塍
-塏
-塑
-塔
-塗
-塘
-塙
-塜
-塞
-塡
-塢
-塤
-塨
-塩
-填
-塬
-塭
-塰
-塱
-塲
-塵
-塹
-塽
-塾
-墀
-境
-墅
-墉
-墊
-墎
-墓
-増
-墘
-墜
-增
-墟
-墡
-墣
-墨
-墩
-墫
-墬
-墮
-墱
-墳
-墺
-墼
-墾
-壁
-壄
-壆
-壇
-壋
-壌
-壎
-壐
-壑
-壓
-壔
-壕
-壘
-壙
-壞
-壟
-壠
-壢
-壤
-壩
-士
-壬
-壯
-壱
-壴
-壹
-壺
-壽
-夀
-夆
-変
-夊
-夋
-夌
-夏
-夔
-夕
-外
-夙
-多
-夜
-夠
-夢
-夤
-夥
-大
-天
-太
-夫
-夬
-夭
-央
-夯
-失
-夷
-夾
-奀
-奄
-奇
-奈
-奉
-奎
-奏
-奐
-契
-奓
-奔
-奕
-套
-奘
-奚
-奠
-奢
-奣
-奧
-奩
-奪
-奫
-奭
-奮
-女
-奴
-奶
-她
-好
-妀
-妁
-如
-妃
-妄
-妊
-妍
-妏
-妑
-妒
-妓
-妖
-妙
-妝
-妞
-妠
-妤
-妥
-妧
-妨
-妭
-妮
-妯
-妲
-妳
-妸
-妹
-妺
-妻
-妾
-姀
-姁
-姃
-姆
-姈
-姉
-姊
-始
-姌
-姍
-姐
-姑
-姒
-姓
-委
-姚
-姜
-姝
-姣
-姥
-姦
-姨
-姪
-姫
-姬
-姮
-姵
-姶
-姸
-姻
-姿
-威
-娃
-娉
-娋
-娌
-娍
-娎
-娑
-娖
-娘
-娛
-娜
-娟
-娠
-娣
-娥
-娩
-娫
-娳
-娶
-娸
-娼
-娽
-婀
-婁
-婆
-婉
-婊
-婑
-婕
-婚
-婢
-婦
-婧
-婪
-婭
-婯
-婷
-婺
-婻
-婼
-婿
-媃
-媄
-媊
-媐
-媒
-媓
-媖
-媗
-媚
-媛
-媜
-媞
-媧
-媭
-媯
-媲
-媳
-媺
-媼
-媽
-媾
-媿
-嫁
-嫂
-嫄
-嫈
-嫉
-嫌
-嫖
-嫘
-嫚
-嫡
-嫣
-嫦
-嫩
-嫪
-嫲
-嫳
-嫵
-嫺
-嫻
-嬅
-嬈
-嬉
-嬋
-嬌
-嬗
-嬛
-嬝
-嬡
-嬤
-嬨
-嬪
-嬬
-嬭
-嬰
-嬴
-嬸
-嬾
-嬿
-孀
-孃
-孆
-孋
-孌
-子
-孑
-孔
-孕
-孖
-字
-存
-孚
-孛
-孜
-孝
-孟
-孢
-季
-孤
-孩
-孫
-孬
-孮
-孰
-孳
-孵
-學
-孺
-孻
-孽
-孿
-宀
-它
-宅
-宇
-守
-安
-宋
-完
-宍
-宏
-宓
-宕
-宗
-官
-宙
-定
-宛
-宜
-実
-客
-宣
-室
-宥
-宦
-宧
-宮
-宰
-害
-宴
-宵
-家
-宸
-容
-宿
-寀
-寁
-寂
-寄
-寅
-密
-寇
-寈
-寊
-富
-寐
-寒
-寓
-寔
-寕
-寖
-寗
-寘
-寛
-寜
-寞
-察
-寡
-寢
-寤
-寥
-實
-寧
-寨
-審
-寫
-寬
-寮
-寯
-寰
-寳
-寵
-寶
-寸
-寺
-対
-封
-専
-尃
-射
-將
-專
-尉
-尊
-尋
-對
-導
-小
-尐
-少
-尓
-尕
-尖
-尗
-尙
-尚
-尢
-尤
-尨
-尪
-尬
-就
-尷
-尹
-尺
-尻
-尼
-尾
-尿
-局
-屁
-屄
-居
-屆
-屇
-屈
-屋
-屌
-屍
-屎
-屏
-屐
-屑
-屓
-展
-屚
-屜
-屠
-屢
-層
-履
-屬
-屭
-屯
-山
-屹
-屺
-屻
-岀
-岈
-岌
-岐
-岑
-岔
-岡
-岢
-岣
-岧
-岩
-岪
-岫
-岬
-岰
-岱
-岳
-岵
-岷
-岸
-岻
-峁
-峅
-峇
-峋
-峍
-峒
-峘
-峙
-峚
-峠
-峨
-峩
-峪
-峭
-峯
-峰
-峴
-島
-峻
-峼
-峽
-崁
-崆
-崇
-崈
-崋
-崍
-崎
-崐
-崑
-崒
-崔
-崖
-崗
-崘
-崙
-崚
-崛
-崞
-崟
-崠
-崢
-崤
-崧
-崩
-崬
-崮
-崱
-崴
-崵
-崶
-崽
-嵇
-嵊
-嵋
-嵌
-嵎
-嵐
-嵒
-嵕
-嵖
-嵗
-嵙
-嵛
-嵜
-嵨
-嵩
-嵬
-嵮
-嵯
-嵰
-嵴
-嵻
-嵿
-嶁
-嶂
-嶃
-嶄
-嶇
-嶋
-嶌
-嶍
-嶒
-嶔
-嶗
-嶝
-嶠
-嶢
-嶦
-嶧
-嶪
-嶬
-嶰
-嶲
-嶴
-嶷
-嶸
-嶺
-嶼
-嶽
-巂
-巄
-巆
-巋
-巌
-巍
-巎
-巑
-巒
-巔
-巖
-巘
-巛
-川
-州
-巡
-巢
-工
-左
-巧
-巨
-巫
-差
-巰
-己
-已
-巳
-巴
-巶
-巷
-巻
-巽
-巾
-巿
-市
-布
-帆
-希
-帑
-帔
-帕
-帖
-帘
-帙
-帚
-帛
-帝
-帡
-帢
-帥
-師
-席
-帯
-帰
-帳
-帶
-帷
-常
-帽
-幀
-幃
-幄
-幅
-幌
-幔
-幕
-幗
-幚
-幛
-幟
-幡
-幢
-幣
-幪
-幫
-干
-平
-年
-幵
-幷
-幸
-幹
-幺
-幻
-幼
-幽
-幾
-庀
-庁
-広
-庇
-床
-序
-底
-庖
-店
-庚
-府
-庠
-庢
-庥
-度
-座
-庫
-庭
-庲
-庵
-庶
-康
-庸
-庹
-庼
-庾
-廁
-廂
-廄
-廆
-廈
-廉
-廊
-廋
-廌
-廍
-廑
-廓
-廔
-廕
-廖
-廙
-廚
-廝
-廞
-廟
-廠
-廡
-廢
-廣
-廧
-廨
-廩
-廬
-廰
-廱
-廳
-延
-廷
-廸
-建
-廻
-廼
-廿
-弁
-弄
-弅
-弇
-弈
-弉
-弊
-弋
-弍
-式
-弐
-弒
-弓
-弔
-引
-弖
-弗
-弘
-弛
-弟
-弢
-弦
-弧
-弨
-弩
-弭
-弱
-張
-強
-弸
-弼
-弾
-彀
-彄
-彅
-彆
-彈
-彊
-彌
-彎
-彐
-彔
-彖
-彗
-彘
-彙
-彜
-彞
-彠
-彡
-形
-彣
-彤
-彥
-彧
-彩
-彪
-彫
-彬
-彭
-彰
-影
-彳
-彷
-役
-彼
-彿
-往
-征
-徂
-待
-徇
-很
-徉
-徊
-律
-後
-徐
-徑
-徒
-得
-徘
-徙
-徜
-從
-徠
-御
-徧
-徨
-復
-循
-徫
-徬
-徭
-微
-徳
-徴
-徵
-德
-徸
-徹
-徽
-心
-忄
-必
-忉
-忌
-忍
-忐
-忑
-忒
-志
-忘
-忙
-応
-忝
-忞
-忠
-快
-忬
-忯
-忱
-忳
-念
-忻
-忽
-忿
-怍
-怎
-怒
-怕
-怖
-怙
-怛
-思
-怠
-怡
-急
-怦
-性
-怨
-怪
-怯
-怵
-恁
-恂
-恃
-恆
-恊
-恍
-恐
-恕
-恙
-恢
-恣
-恤
-恥
-恨
-恩
-恪
-恬
-恭
-息
-恰
-恵
-恿
-悄
-悅
-悆
-悉
-悌
-悍
-悔
-悖
-悚
-悛
-悝
-悞
-悟
-悠
-患
-悧
-您
-悪
-悰
-悲
-悳
-悵
-悶
-悸
-悼
-情
-惆
-惇
-惑
-惔
-惕
-惘
-惚
-惜
-惟
-惠
-惡
-惣
-惦
-惰
-惱
-惲
-想
-惶
-惹
-惺
-愁
-愃
-愆
-愈
-愉
-愍
-意
-愐
-愒
-愔
-愕
-愚
-愛
-愜
-感
-愣
-愧
-愨
-愫
-愭
-愴
-愷
-愼
-愾
-愿
-慄
-慈
-態
-慌
-慎
-慕
-慘
-慚
-慜
-慟
-慢
-慣
-慥
-慧
-慨
-慮
-慰
-慳
-慵
-慶
-慷
-慾
-憂
-憊
-憋
-憍
-憎
-憐
-憑
-憓
-憕
-憙
-憚
-憤
-憧
-憨
-憩
-憫
-憬
-憲
-憶
-憺
-憻
-憾
-懂
-懃
-懇
-懈
-應
-懋
-懌
-懍
-懐
-懣
-懦
-懮
-懲
-懵
-懶
-懷
-懸
-懺
-懼
-懽
-懾
-懿
-戀
-戇
-戈
-戊
-戌
-戍
-戎
-成
-我
-戒
-戔
-戕
-或
-戙
-戚
-戛
-戟
-戡
-戢
-戥
-戦
-戩
-截
-戮
-戰
-戱
-戲
-戳
-戴
-戶
-戸
-戻
-戽
-戾
-房
-所
-扁
-扆
-扇
-扈
-扉
-手
-扌
-才
-扎
-扒
-打
-扔
-托
-扙
-扛
-扞
-扣
-扥
-扦
-扭
-扮
-扯
-扳
-扶
-批
-扼
-找
-承
-技
-抃
-抄
-抇
-抉
-把
-抑
-抒
-抓
-投
-抖
-抗
-折
-抦
-披
-抬
-抱
-抵
-抹
-抻
-押
-抽
-抿
-拂
-拆
-拇
-拈
-拉
-拋
-拌
-拍
-拎
-拏
-拐
-拒
-拓
-拔
-拖
-拗
-拘
-拙
-拚
-招
-拜
-拝
-拡
-括
-拭
-拮
-拯
-拱
-拳
-拴
-拷
-拺
-拼
-拽
-拾
-拿
-持
-指
-按
-挎
-挑
-挖
-挙
-挨
-挪
-挫
-振
-挲
-挵
-挹
-挺
-挻
-挾
-捂
-捆
-捉
-捌
-捍
-捎
-捏
-捐
-捒
-捕
-捜
-捦
-捧
-捨
-捩
-捫
-捭
-捱
-捲
-捶
-捷
-捺
-捻
-掀
-掂
-掃
-掄
-掇
-授
-掉
-掌
-掏
-掐
-排
-掖
-掘
-掙
-掛
-掞
-掟
-掠
-採
-探
-掣
-接
-控
-推
-掩
-措
-掬
-掰
-掾
-揀
-揄
-揆
-揉
-揍
-描
-提
-插
-揔
-揖
-揚
-換
-握
-揪
-揭
-揮
-援
-揸
-揺
-損
-搏
-搐
-搓
-搔
-搖
-搗
-搜
-搞
-搠
-搢
-搪
-搬
-搭
-搳
-搴
-搵
-搶
-搽
-搾
-摂
-摒
-摔
-摘
-摜
-摞
-摟
-摠
-摧
-摩
-摭
-摯
-摳
-摴
-摵
-摶
-摸
-摹
-摺
-摻
-摽
-撃
-撇
-撈
-撐
-撒
-撓
-撕
-撖
-撙
-撚
-撞
-撣
-撤
-撥
-撩
-撫
-撬
-播
-撮
-撰
-撲
-撳
-撻
-撼
-撾
-撿
-擀
-擁
-擂
-擅
-擇
-擊
-擋
-操
-擎
-擒
-擔
-擘
-據
-擠
-擢
-擥
-擦
-擬
-擯
-擰
-擱
-擲
-擴
-擷
-擺
-擼
-擾
-攀
-攏
-攔
-攖
-攘
-攜
-攝
-攞
-攢
-攣
-攤
-攪
-攫
-攬
-支
-攴
-攵
-收
-攷
-攸
-改
-攻
-攽
-放
-政
-故
-效
-敍
-敎
-敏
-救
-敔
-敕
-敖
-敗
-敘
-教
-敝
-敞
-敟
-敢
-散
-敦
-敫
-敬
-敭
-敲
-整
-敵
-敷
-數
-敻
-敾
-斂
-斃
-文
-斌
-斎
-斐
-斑
-斕
-斖
-斗
-料
-斛
-斜
-斝
-斟
-斡
-斤
-斥
-斧
-斬
-斯
-新
-斷
-方
-於
-施
-斿
-旁
-旂
-旃
-旄
-旅
-旉
-旋
-旌
-旎
-族
-旖
-旗
-旙
-旛
-旡
-既
-日
-旦
-旨
-早
-旬
-旭
-旱
-旲
-旳
-旺
-旻
-旼
-旽
-旾
-旿
-昀
-昂
-昃
-昆
-昇
-昉
-昊
-昌
-昍
-明
-昏
-昐
-易
-昔
-昕
-昚
-昛
-昜
-昝
-昞
-星
-映
-昡
-昣
-昤
-春
-昧
-昨
-昪
-昫
-昭
-是
-昰
-昱
-昴
-昵
-昶
-昺
-晁
-時
-晃
-晈
-晉
-晊
-晏
-晗
-晙
-晚
-晛
-晝
-晞
-晟
-晤
-晦
-晧
-晨
-晩
-晪
-晫
-晭
-普
-景
-晰
-晳
-晴
-晶
-晷
-晸
-智
-晾
-暃
-暄
-暅
-暇
-暈
-暉
-暊
-暌
-暎
-暏
-暐
-暑
-暕
-暖
-暗
-暘
-暝
-暟
-暠
-暢
-暦
-暨
-暫
-暮
-暱
-暲
-暴
-暸
-暹
-暻
-暾
-曄
-曅
-曆
-曇
-曉
-曌
-曔
-曖
-曙
-曜
-曝
-曠
-曦
-曧
-曨
-曩
-曬
-曮
-曰
-曲
-曳
-更
-曶
-曷
-書
-曹
-曺
-曼
-曽
-曾
-替
-最
-會
-月
-有
-朊
-朋
-服
-朏
-朐
-朓
-朔
-朕
-朖
-朗
-望
-朝
-期
-朦
-朧
-木
-未
-末
-本
-札
-朱
-朴
-朵
-朶
-朽
-朿
-杁
-杉
-杋
-杌
-李
-杏
-材
-村
-杓
-杖
-杙
-杜
-杞
-束
-杠
-杣
-杤
-杧
-杬
-杭
-杯
-東
-杲
-杳
-杴
-杵
-杷
-杻
-杼
-松
-板
-极
-枇
-枉
-枋
-枏
-析
-枕
-枖
-林
-枚
-枛
-果
-枝
-枠
-枡
-枯
-枰
-枱
-枲
-枳
-架
-枷
-枸
-枹
-枼
-柁
-柃
-柄
-柉
-柊
-柎
-柏
-某
-柑
-柒
-染
-柔
-柘
-柚
-柜
-柝
-柞
-柟
-查
-柩
-柬
-柯
-柰
-柱
-柳
-柴
-柵
-柶
-柷
-査
-柾
-柿
-栃
-栄
-栐
-栒
-栓
-栜
-栝
-栞
-校
-栢
-栨
-栩
-株
-栲
-栴
-核
-根
-栻
-格
-栽
-桀
-桁
-桂
-桃
-桄
-桅
-框
-案
-桉
-桌
-桎
-桐
-桑
-桓
-桔
-桕
-桖
-桙
-桜
-桝
-桫
-桱
-桲
-桴
-桶
-桷
-桼
-桿
-梀
-梁
-梂
-梃
-梅
-梆
-梉
-梏
-梓
-梔
-梗
-梘
-條
-梟
-梠
-梢
-梣
-梧
-梨
-梫
-梭
-梯
-械
-梱
-梳
-梵
-梶
-梽
-棄
-棆
-棉
-棋
-棍
-棐
-棒
-棓
-棕
-棖
-棗
-棘
-棚
-棛
-棟
-棠
-棡
-棣
-棧
-棨
-棩
-棪
-棫
-森
-棱
-棲
-棵
-棶
-棹
-棺
-棻
-棼
-棽
-椅
-椆
-椇
-椋
-植
-椎
-椏
-椒
-椙
-椥
-椪
-椰
-椲
-椴
-椵
-椹
-椽
-椿
-楂
-楊
-楓
-楔
-楗
-楙
-楚
-楝
-楞
-楠
-楡
-楢
-楣
-楤
-楦
-楧
-楨
-楫
-業
-楮
-楯
-楳
-極
-楷
-楸
-楹
-楽
-楿
-概
-榆
-榊
-榍
-榎
-榑
-榔
-榕
-榖
-榗
-榘
-榛
-榜
-榞
-榢
-榣
-榤
-榦
-榧
-榨
-榫
-榭
-榮
-榲
-榴
-榷
-榻
-榿
-槀
-槁
-槃
-槊
-構
-槌
-槍
-槎
-槐
-槓
-槔
-槗
-様
-槙
-槤
-槩
-槭
-槰
-槱
-槲
-槳
-槺
-槻
-槼
-槽
-槿
-樀
-樁
-樂
-樅
-樆
-樊
-樋
-樑
-樓
-樗
-樘
-標
-樞
-樟
-模
-樣
-樨
-権
-樫
-樵
-樸
-樹
-樺
-樻
-樽
-樾
-橄
-橇
-橈
-橋
-橐
-橒
-橓
-橘
-橙
-橚
-機
-橡
-橢
-橪
-橫
-橿
-檀
-檄
-檇
-檉
-檊
-檎
-檐
-檔
-檗
-檜
-檞
-檠
-檡
-檢
-檣
-檦
-檨
-檫
-檬
-檯
-檳
-檵
-檸
-檻
-檽
-櫂
-櫃
-櫆
-櫈
-櫓
-櫚
-櫛
-櫞
-櫟
-櫥
-櫨
-櫪
-櫱
-櫸
-櫻
-櫾
-櫿
-欄
-欉
-權
-欏
-欒
-欖
-欞
-欠
-次
-欣
-欥
-欲
-欸
-欹
-欺
-欽
-款
-歆
-歇
-歉
-歊
-歌
-歎
-歐
-歓
-歙
-歛
-歡
-止
-正
-此
-步
-武
-歧
-歩
-歪
-歲
-歳
-歴
-歷
-歸
-歹
-死
-歿
-殂
-殃
-殄
-殆
-殉
-殊
-殑
-殖
-殘
-殛
-殞
-殟
-殤
-殭
-殮
-殯
-殲
-殳
-段
-殷
-殺
-殻
-殼
-殿
-毀
-毅
-毆
-毉
-毋
-毌
-母
-毎
-每
-毐
-毒
-毓
-比
-毖
-毗
-毘
-毛
-毫
-毬
-毯
-毴
-毸
-毽
-毿
-氂
-氈
-氍
-氏
-氐
-民
-氓
-氖
-気
-氘
-氙
-氚
-氛
-氟
-氣
-氦
-氧
-氨
-氪
-氫
-氬
-氮
-氯
-氰
-水
-氵
-氷
-永
-氹
-氻
-氽
-氾
-汀
-汁
-求
-汊
-汎
-汐
-汕
-汗
-汛
-汜
-汝
-汞
-江
-池
-污
-汧
-汨
-汩
-汪
-汭
-汰
-汲
-汴
-汶
-決
-汽
-汾
-沁
-沂
-沃
-沄
-沅
-沆
-沇
-沈
-沉
-沌
-沍
-沏
-沐
-沒
-沓
-沔
-沖
-沘
-沙
-沚
-沛
-沜
-沢
-沨
-沫
-沭
-沮
-沯
-沱
-河
-沸
-油
-沺
-治
-沼
-沽
-沾
-沿
-況
-泂
-泄
-泆
-泇
-泉
-泊
-泌
-泐
-泓
-泔
-法
-泖
-泗
-泚
-泛
-泠
-泡
-波
-泣
-泥
-泩
-泫
-泮
-泯
-泰
-泱
-泳
-泵
-洄
-洋
-洌
-洎
-洗
-洙
-洛
-洞
-洢
-洣
-洤
-津
-洨
-洩
-洪
-洮
-洱
-洲
-洳
-洵
-洸
-洹
-洺
-活
-洽
-派
-流
-浄
-浙
-浚
-浛
-浜
-浞
-浟
-浠
-浡
-浣
-浤
-浥
-浦
-浩
-浪
-浮
-浯
-浴
-浵
-海
-浸
-浹
-涅
-涇
-消
-涉
-涌
-涎
-涑
-涓
-涔
-涕
-涙
-涪
-涫
-涮
-涯
-液
-涵
-涸
-涼
-涿
-淄
-淅
-淆
-淇
-淋
-淌
-淍
-淎
-淏
-淑
-淓
-淖
-淘
-淙
-淚
-淛
-淝
-淞
-淠
-淡
-淤
-淥
-淦
-淨
-淩
-淪
-淫
-淬
-淮
-淯
-淰
-深
-淳
-淵
-淶
-混
-淸
-淹
-淺
-添
-淼
-淽
-渃
-清
-済
-渉
-渋
-渕
-渙
-渚
-減
-渝
-渟
-渠
-渡
-渣
-渤
-渥
-渦
-渫
-測
-渭
-港
-渲
-渴
-游
-渺
-渼
-渽
-渾
-湃
-湄
-湉
-湊
-湍
-湓
-湔
-湖
-湘
-湛
-湜
-湞
-湟
-湣
-湥
-湧
-湫
-湮
-湯
-湳
-湴
-湼
-満
-溁
-溇
-溈
-溉
-溋
-溎
-溏
-源
-準
-溙
-溜
-溝
-溟
-溢
-溥
-溦
-溧
-溪
-溫
-溯
-溱
-溲
-溴
-溵
-溶
-溺
-溼
-滀
-滁
-滂
-滄
-滅
-滇
-滈
-滉
-滋
-滌
-滎
-滏
-滑
-滓
-滔
-滕
-滘
-滙
-滝
-滬
-滯
-滲
-滴
-滷
-滸
-滹
-滻
-滽
-滾
-滿
-漁
-漂
-漆
-漇
-漈
-漎
-漏
-漓
-演
-漕
-漚
-漠
-漢
-漣
-漩
-漪
-漫
-漬
-漯
-漱
-漲
-漳
-漴
-漵
-漷
-漸
-漼
-漾
-漿
-潁
-潑
-潔
-潘
-潛
-潞
-潟
-潢
-潤
-潭
-潮
-潯
-潰
-潲
-潺
-潼
-潽
-潾
-潿
-澀
-澁
-澂
-澄
-澆
-澇
-澈
-澉
-澋
-澌
-澍
-澎
-澔
-澗
-澠
-澡
-澣
-澤
-澥
-澧
-澪
-澮
-澯
-澱
-澳
-澶
-澹
-澻
-激
-濁
-濂
-濃
-濉
-濊
-濋
-濕
-濘
-濙
-濛
-濞
-濟
-濠
-濡
-濤
-濫
-濬
-濮
-濯
-濰
-濱
-濲
-濶
-濺
-濼
-濾
-瀁
-瀅
-瀆
-瀉
-瀍
-瀏
-瀑
-瀔
-瀕
-瀘
-瀚
-瀛
-瀝
-瀞
-瀟
-瀠
-瀣
-瀦
-瀧
-瀨
-瀬
-瀰
-瀲
-瀴
-瀶
-瀹
-瀾
-灃
-灊
-灌
-灑
-灘
-灝
-灞
-灡
-灣
-灤
-灧
-火
-灰
-灴
-灸
-灼
-災
-炁
-炅
-炆
-炊
-炎
-炒
-炔
-炕
-炘
-炙
-炟
-炣
-炤
-炫
-炬
-炭
-炮
-炯
-炱
-炲
-炳
-炷
-炸
-為
-炻
-烈
-烉
-烊
-烋
-烏
-烒
-烔
-烘
-烙
-烜
-烝
-烤
-烯
-烱
-烴
-烷
-烹
-烺
-烽
-焃
-焄
-焉
-焊
-焌
-焓
-焗
-焙
-焚
-焜
-焞
-無
-焦
-焯
-焰
-焱
-焴
-然
-焻
-焼
-焿
-煇
-煉
-煊
-煌
-煎
-煐
-煒
-煔
-煕
-煖
-煙
-煚
-煜
-煞
-煠
-煤
-煥
-煦
-照
-煨
-煩
-煬
-煮
-煲
-煳
-煵
-煶
-煸
-煽
-熄
-熅
-熇
-熈
-熊
-熏
-熒
-熔
-熖
-熗
-熘
-熙
-熜
-熟
-熠
-熤
-熥
-熨
-熬
-熯
-熱
-熲
-熳
-熵
-熹
-熺
-熼
-熾
-熿
-燁
-燃
-燄
-燈
-燉
-燊
-燎
-燏
-燐
-燒
-燔
-燕
-燘
-燙
-燚
-燜
-燝
-營
-燥
-燦
-燧
-燫
-燬
-燭
-燮
-燴
-燹
-燻
-燼
-燾
-燿
-爀
-爆
-爌
-爍
-爐
-爔
-爚
-爛
-爝
-爨
-爪
-爬
-爭
-爯
-爰
-爲
-爵
-父
-爸
-爹
-爺
-爻
-爽
-爾
-爿
-牁
-牂
-牆
-片
-版
-牌
-牒
-牕
-牖
-牘
-牙
-牛
-牝
-牟
-牠
-牡
-牢
-牧
-物
-牯
-牲
-特
-牻
-牼
-牽
-犀
-犁
-犂
-犇
-犍
-犎
-犖
-犛
-犢
-犧
-犨
-犬
-犯
-犰
-犴
-犽
-狀
-狂
-狄
-狍
-狎
-狐
-狒
-狓
-狗
-狙
-狛
-狟
-狠
-狡
-狦
-狨
-狩
-狳
-狶
-狷
-狸
-狹
-狻
-狼
-猁
-猄
-猇
-猊
-猗
-猙
-猛
-猜
-猝
-猞
-猢
-猥
-猨
-猩
-猳
-猴
-猶
-猷
-猺
-猻
-猾
-猿
-獁
-獃
-獄
-獅
-獇
-獎
-獏
-獐
-獒
-獠
-獢
-獣
-獨
-獬
-獮
-獯
-獰
-獲
-獴
-獵
-獷
-獸
-獺
-獻
-獼
-獾
-玀
-玄
-玆
-率
-玉
-王
-玎
-玏
-玓
-玕
-玖
-玗
-玘
-玙
-玟
-玠
-玡
-玢
-玥
-玧
-玨
-玩
-玫
-玭
-玲
-玳
-玶
-玷
-玹
-玻
-玾
-珀
-珂
-珅
-珈
-珉
-珊
-珌
-珍
-珎
-珏
-珖
-珙
-珝
-珞
-珠
-珡
-珣
-珤
-珥
-珦
-珧
-珩
-珪
-班
-珮
-珵
-珹
-珺
-珽
-現
-琁
-球
-琄
-琅
-理
-琇
-琉
-琊
-琍
-琎
-琚
-琛
-琡
-琢
-琤
-琥
-琦
-琨
-琪
-琬
-琮
-琯
-琰
-琱
-琳
-琴
-琵
-琶
-琹
-琺
-琿
-瑀
-瑁
-瑂
-瑄
-瑅
-瑆
-瑈
-瑊
-瑋
-瑑
-瑒
-瑕
-瑗
-瑙
-瑚
-瑛
-瑜
-瑝
-瑞
-瑟
-瑠
-瑢
-瑣
-瑤
-瑥
-瑧
-瑨
-瑩
-瑪
-瑭
-瑯
-瑰
-瑱
-瑳
-瑴
-瑺
-瑾
-璀
-璁
-璃
-璄
-璆
-璇
-璈
-璉
-璋
-璌
-璐
-璕
-璘
-璙
-璚
-璜
-璞
-璟
-璠
-璡
-璣
-璥
-璦
-璧
-璨
-璩
-璪
-璫
-璬
-璮
-環
-璱
-璵
-璸
-璹
-璽
-璿
-瓈
-瓊
-瓌
-瓏
-瓑
-瓔
-瓖
-瓘
-瓚
-瓛
-瓜
-瓞
-瓠
-瓢
-瓣
-瓤
-瓦
-瓮
-瓴
-瓶
-瓷
-瓿
-甂
-甄
-甌
-甍
-甑
-甕
-甘
-甙
-甚
-甜
-生
-甡
-產
-産
-甥
-甦
-用
-甩
-甪
-甫
-甬
-甯
-田
-由
-甲
-申
-男
-甸
-甹
-町
-甾
-畀
-畇
-畈
-畊
-畋
-界
-畎
-畏
-畐
-畑
-畔
-留
-畜
-畝
-畠
-畢
-略
-畦
-畧
-番
-畫
-畬
-畯
-異
-畲
-畳
-畵
-當
-畷
-畸
-畹
-畿
-疃
-疆
-疇
-疊
-疋
-疌
-疍
-疏
-疑
-疒
-疕
-疙
-疚
-疝
-疣
-疤
-疥
-疫
-疲
-疳
-疵
-疸
-疹
-疼
-疽
-疾
-痂
-病
-症
-痊
-痍
-痔
-痕
-痘
-痙
-痛
-痞
-痟
-痠
-痢
-痣
-痤
-痧
-痩
-痰
-痱
-痲
-痴
-痹
-痺
-痿
-瘀
-瘁
-瘊
-瘋
-瘍
-瘓
-瘙
-瘜
-瘞
-瘟
-瘠
-瘡
-瘢
-瘤
-瘦
-瘧
-瘩
-瘰
-瘴
-瘺
-癀
-療
-癆
-癇
-癌
-癒
-癖
-癘
-癜
-癟
-癡
-癢
-癤
-癥
-癩
-癬
-癭
-癮
-癯
-癰
-癱
-癲
-癸
-発
-登
-發
-白
-百
-皂
-的
-皆
-皇
-皈
-皋
-皎
-皐
-皓
-皖
-皙
-皚
-皛
-皝
-皞
-皮
-皰
-皴
-皷
-皸
-皺
-皿
-盂
-盃
-盅
-盆
-盈
-益
-盋
-盌
-盎
-盒
-盔
-盛
-盜
-盞
-盟
-盡
-監
-盤
-盥
-盦
-盧
-盨
-盩
-盪
-盫
-目
-盯
-盱
-盲
-直
-盷
-相
-盹
-盺
-盼
-盾
-眀
-省
-眉
-看
-県
-眙
-眛
-眜
-眞
-真
-眠
-眥
-眨
-眩
-眭
-眯
-眵
-眶
-眷
-眸
-眺
-眼
-眾
-着
-睇
-睛
-睜
-睞
-睡
-睢
-督
-睥
-睦
-睨
-睪
-睫
-睭
-睹
-睺
-睽
-睾
-睿
-瞄
-瞅
-瞋
-瞌
-瞎
-瞑
-瞓
-瞞
-瞢
-瞥
-瞧
-瞪
-瞫
-瞬
-瞭
-瞰
-瞳
-瞻
-瞼
-瞽
-瞿
-矇
-矍
-矗
-矚
-矛
-矜
-矞
-矢
-矣
-知
-矧
-矩
-短
-矮
-矯
-石
-矸
-矽
-砂
-砋
-砌
-砍
-砒
-研
-砝
-砢
-砥
-砦
-砧
-砩
-砫
-砭
-砮
-砯
-砰
-砲
-砳
-破
-砵
-砷
-砸
-砼
-硂
-硃
-硅
-硇
-硏
-硐
-硒
-硓
-硚
-硜
-硝
-硤
-硨
-硫
-硬
-硭
-硯
-硼
-碁
-碇
-碉
-碌
-碎
-碑
-碓
-碕
-碗
-碘
-碚
-碟
-碡
-碣
-碧
-碩
-碪
-碭
-碰
-碲
-碳
-碴
-碶
-碸
-確
-碻
-碼
-碽
-碾
-磁
-磅
-磊
-磋
-磐
-磔
-磕
-磘
-磙
-磚
-磜
-磡
-磨
-磪
-磬
-磯
-磱
-磲
-磵
-磷
-磺
-磻
-磾
-礁
-礄
-礎
-礐
-礑
-礒
-礙
-礠
-礦
-礪
-礫
-礬
-礮
-礱
-礴
-示
-礻
-礽
-社
-祀
-祁
-祂
-祆
-祇
-祈
-祉
-祋
-祏
-祐
-祓
-祕
-祖
-祗
-祙
-祚
-祛
-祜
-祝
-神
-祟
-祠
-祥
-祧
-票
-祭
-祹
-祺
-祼
-祿
-禁
-禃
-禇
-禍
-禎
-福
-禑
-禓
-禔
-禕
-禘
-禛
-禟
-禠
-禤
-禦
-禧
-禨
-禩
-禪
-禮
-禰
-禱
-禵
-禹
-禺
-禼
-禽
-禾
-禿
-秀
-私
-秈
-秉
-秋
-科
-秒
-秕
-秘
-租
-秠
-秣
-秤
-秦
-秧
-秩
-秭
-秳
-秸
-移
-稀
-稅
-稈
-稉
-程
-稍
-稑
-稔
-稗
-稘
-稙
-稚
-稜
-稞
-稟
-稠
-種
-稱
-稲
-稷
-稹
-稺
-稻
-稼
-稽
-稾
-稿
-穀
-穂
-穆
-穈
-穉
-穌
-積
-穎
-穗
-穟
-穠
-穡
-穢
-穣
-穩
-穫
-穰
-穴
-穵
-究
-穹
-空
-穿
-突
-窄
-窅
-窈
-窋
-窒
-窕
-窖
-窗
-窘
-窟
-窠
-窣
-窨
-窩
-窪
-窮
-窯
-窰
-窶
-窺
-窿
-竄
-竅
-竇
-竈
-竊
-立
-竑
-站
-竜
-竟
-章
-竣
-童
-竦
-竩
-竭
-端
-競
-竹
-竺
-竻
-竿
-笄
-笆
-笈
-笏
-笑
-笘
-笙
-笛
-笞
-笠
-笥
-符
-笨
-笩
-笪
-第
-笭
-笮
-笯
-笱
-笳
-笹
-筅
-筆
-等
-筊
-筋
-筌
-筍
-筏
-筐
-筒
-答
-策
-筘
-筠
-筥
-筦
-筧
-筬
-筭
-筱
-筲
-筳
-筵
-筶
-筷
-筻
-箆
-箇
-箋
-箍
-箏
-箐
-箑
-箒
-箔
-箕
-算
-箜
-管
-箬
-箭
-箱
-箴
-箸
-節
-篁
-範
-篆
-篇
-築
-篊
-篋
-篌
-篔
-篙
-篝
-篠
-篡
-篤
-篥
-篦
-篩
-篪
-篭
-篯
-篳
-篷
-簀
-簃
-簇
-簉
-簋
-簍
-簑
-簕
-簗
-簞
-簠
-簡
-簧
-簪
-簫
-簷
-簸
-簹
-簺
-簽
-簾
-簿
-籀
-籃
-籌
-籍
-籐
-籙
-籛
-籜
-籝
-籟
-籠
-籣
-籤
-籥
-籪
-籬
-籮
-籲
-米
-籽
-籾
-粄
-粉
-粍
-粑
-粒
-粕
-粗
-粘
-粟
-粢
-粥
-粦
-粧
-粩
-粱
-粲
-粳
-粵
-粹
-粼
-粽
-精
-粿
-糀
-糅
-糊
-糌
-糍
-糎
-糕
-糖
-糙
-糜
-糝
-糞
-糟
-糠
-糢
-糧
-糬
-糯
-糰
-糴
-糶
-糸
-糹
-糺
-系
-糾
-紀
-紂
-約
-紅
-紆
-紇
-紈
-紉
-紊
-紋
-納
-紐
-紑
-紓
-純
-紕
-紗
-紘
-紙
-級
-紛
-紜
-紝
-紞
-素
-紡
-索
-紫
-紮
-累
-細
-紱
-紲
-紳
-紵
-紹
-紺
-紿
-終
-絃
-組
-絆
-経
-絎
-結
-絕
-絛
-絜
-絞
-絡
-絢
-給
-絨
-絪
-絮
-統
-絲
-絳
-絵
-絶
-絹
-絺
-綁
-綃
-綈
-綉
-綎
-綏
-經
-綖
-継
-続
-綜
-綝
-綞
-綠
-綢
-綣
-綦
-綧
-綫
-綬
-維
-綮
-綰
-綱
-網
-綳
-綴
-綸
-綺
-綻
-綽
-綾
-綿
-緁
-緃
-緄
-緈
-緊
-緋
-総
-緑
-緒
-緖
-緘
-線
-緜
-緝
-緞
-締
-緡
-緣
-緤
-編
-緩
-緬
-緯
-緱
-緲
-練
-緹
-緻
-縂
-縄
-縈
-縉
-縊
-縕
-縛
-縝
-縞
-縠
-縡
-縣
-縤
-縫
-縮
-縯
-縱
-縴
-縵
-縷
-縹
-縻
-總
-績
-繁
-繃
-繆
-繇
-繒
-織
-繕
-繖
-繙
-繚
-繞
-繡
-繩
-繪
-繫
-繭
-繰
-繳
-繹
-繻
-繼
-繽
-繾
-纁
-纂
-纈
-續
-纍
-纏
-纓
-纔
-纕
-纖
-纘
-纛
-纜
-缐
-缶
-缸
-缺
-缽
-罃
-罄
-罅
-罈
-罉
-罌
-罍
-罐
-罔
-罕
-罘
-罟
-罡
-罨
-罩
-罪
-置
-罰
-罱
-署
-罳
-罵
-罶
-罷
-罹
-罽
-羂
-羅
-羆
-羈
-羊
-羋
-羌
-美
-羔
-羕
-羗
-羙
-羚
-羞
-羡
-羣
-群
-羥
-羧
-羨
-義
-羯
-羰
-羱
-羲
-羸
-羹
-羽
-羿
-翀
-翁
-翂
-翃
-翅
-翊
-翌
-翎
-翏
-習
-翔
-翕
-翙
-翜
-翟
-翠
-翡
-翥
-翦
-翩
-翬
-翮
-翰
-翱
-翳
-翹
-翻
-翼
-耀
-老
-考
-耄
-者
-耆
-而
-耍
-耎
-耐
-耑
-耒
-耔
-耕
-耗
-耘
-耙
-耜
-耦
-耨
-耬
-耳
-耵
-耶
-耷
-耽
-耿
-聃
-聆
-聊
-聒
-聖
-聘
-聚
-聞
-聟
-聨
-聯
-聰
-聱
-聲
-聳
-聴
-聶
-職
-聽
-聾
-聿
-肄
-肅
-肆
-肇
-肉
-肋
-肌
-肏
-肖
-肘
-肚
-肛
-肜
-肝
-肟
-股
-肢
-肥
-肩
-肪
-肫
-肯
-肱
-育
-肸
-肹
-肺
-肼
-肽
-胂
-胃
-胄
-胅
-胇
-胊
-背
-胍
-胎
-胖
-胗
-胙
-胚
-胛
-胝
-胞
-胡
-胤
-胥
-胬
-胭
-胰
-胱
-胳
-胴
-胸
-胺
-胼
-能
-脂
-脅
-脆
-脇
-脈
-脊
-脒
-脖
-脘
-脛
-脣
-脩
-脫
-脬
-脭
-脯
-脲
-脳
-脷
-脹
-脾
-腆
-腈
-腊
-腋
-腌
-腎
-腐
-腑
-腓
-腔
-腕
-腥
-腦
-腧
-腩
-腫
-腮
-腰
-腱
-腳
-腴
-腸
-腹
-腺
-腿
-膀
-膂
-膈
-膊
-膏
-膚
-膛
-膜
-膝
-膠
-膣
-膥
-膦
-膨
-膩
-膮
-膳
-膺
-膽
-膾
-膿
-臀
-臂
-臃
-臆
-臉
-臊
-臍
-臏
-臘
-臚
-臞
-臟
-臠
-臣
-臧
-臨
-自
-臭
-臯
-至
-致
-臺
-臻
-臼
-臾
-舂
-舅
-與
-興
-舉
-舊
-舌
-舍
-舎
-舒
-舔
-舖
-舘
-舛
-舜
-舞
-舟
-舢
-舥
-舨
-舩
-航
-舫
-般
-舲
-舵
-舶
-舷
-舸
-船
-舺
-艅
-艇
-艉
-艋
-艎
-艏
-艔
-艘
-艙
-艚
-艦
-艮
-良
-艱
-色
-艶
-艷
-艸
-艽
-艾
-艿
-芃
-芊
-芋
-芍
-芎
-芑
-芒
-芘
-芙
-芛
-芝
-芡
-芥
-芨
-芩
-芪
-芫
-芬
-芭
-芮
-芯
-花
-芳
-芴
-芷
-芸
-芹
-芻
-芽
-芾
-苄
-苅
-苑
-苒
-苓
-苔
-苕
-苗
-苛
-苜
-苝
-苞
-苟
-苡
-苣
-苤
-若
-苦
-苧
-苪
-苫
-苯
-英
-苳
-苴
-苷
-苺
-苻
-苼
-苾
-茀
-茁
-茂
-范
-茄
-茅
-茆
-茇
-茈
-茉
-茌
-茗
-茘
-茚
-茛
-茜
-茝
-茨
-茫
-茬
-茭
-茮
-茯
-茱
-茲
-茴
-茵
-茶
-茷
-茸
-茹
-茺
-茼
-荀
-荃
-荅
-荇
-草
-荊
-荎
-荏
-荒
-荔
-荖
-荘
-荳
-荷
-荸
-荻
-荼
-荽
-莆
-莉
-莊
-莎
-莒
-莓
-莕
-莖
-莘
-莙
-莛
-莜
-莞
-莠
-莢
-莧
-莨
-莩
-莪
-莫
-莽
-莿
-菀
-菁
-菅
-菇
-菈
-菉
-菊
-菌
-菍
-菏
-菑
-菓
-菔
-菖
-菘
-菜
-菝
-菟
-菠
-菡
-菥
-菩
-菪
-菫
-華
-菰
-菱
-菲
-菴
-菶
-菸
-菹
-菺
-菼
-菽
-菾
-萁
-萃
-萄
-萇
-萊
-萌
-萍
-萎
-萐
-萘
-萜
-萠
-萡
-萣
-萩
-萬
-萭
-萱
-萵
-萸
-萹
-萼
-落
-葃
-葆
-葉
-葊
-葎
-葑
-葒
-著
-葙
-葚
-葛
-葜
-葝
-葡
-董
-葦
-葩
-葫
-葬
-葭
-葯
-葰
-葳
-葵
-葶
-葷
-葺
-蒂
-蒄
-蒍
-蒎
-蒐
-蒓
-蒔
-蒗
-蒙
-蒜
-蒞
-蒟
-蒡
-蒢
-蒤
-蒧
-蒨
-蒭
-蒯
-蒲
-蒴
-蒸
-蒹
-蒺
-蒻
-蒼
-蒽
-蒾
-蒿
-蓀
-蓁
-蓂
-蓄
-蓆
-蓉
-蓋
-蓍
-蓑
-蓓
-蓖
-蓘
-蓚
-蓧
-蓨
-蓪
-蓬
-蓭
-蓮
-蓯
-蓳
-蓼
-蓽
-蓿
-蔆
-蔎
-蔑
-蔓
-蔔
-蔕
-蔗
-蔘
-蔚
-蔝
-蔞
-蔡
-蔣
-蔥
-蔦
-蔬
-蔭
-蔴
-蔵
-蔻
-蔽
-蕁
-蕃
-蕅
-蕈
-蕉
-蕊
-蕎
-蕑
-蕒
-蕖
-蕘
-蕙
-蕚
-蕟
-蕡
-蕢
-蕤
-蕨
-蕩
-蕪
-蕭
-蕷
-蕹
-蕺
-蕻
-蕾
-薀
-薄
-薆
-薇
-薈
-薊
-薌
-薏
-薐
-薑
-薔
-薗
-薘
-薙
-薛
-薜
-薞
-薟
-薡
-薦
-薨
-薩
-薪
-薫
-薬
-薯
-薰
-薲
-薷
-薸
-薹
-薺
-薾
-薿
-藁
-藉
-藍
-藎
-藏
-藐
-藔
-藕
-藜
-藝
-藟
-藤
-藥
-藦
-藨
-藩
-藪
-藶
-藸
-藹
-藺
-藻
-藿
-蘂
-蘄
-蘅
-蘆
-蘇
-蘊
-蘋
-蘐
-蘑
-蘓
-蘗
-蘘
-蘚
-蘞
-蘢
-蘧
-蘩
-蘭
-蘵
-蘶
-蘸
-蘼
-蘿
-虉
-虎
-虐
-虓
-虔
-處
-虖
-虛
-虜
-虞
-號
-虢
-虧
-虨
-虯
-虱
-虵
-虹
-虺
-虻
-蚆
-蚊
-蚋
-蚌
-蚍
-蚓
-蚖
-蚜
-蚝
-蚡
-蚢
-蚣
-蚤
-蚧
-蚨
-蚩
-蚪
-蚯
-蚱
-蚴
-蚵
-蚶
-蚺
-蚼
-蛀
-蛄
-蛇
-蛉
-蛋
-蛍
-蛐
-蛑
-蛔
-蛙
-蛛
-蛞
-蛟
-蛤
-蛭
-蛯
-蛸
-蛹
-蛺
-蛻
-蛾
-蜀
-蜂
-蜃
-蜆
-蜇
-蜈
-蜉
-蜊
-蜍
-蜑
-蜒
-蜓
-蜘
-蜚
-蜛
-蜜
-蜞
-蜢
-蜣
-蜥
-蜨
-蜮
-蜯
-蜱
-蜴
-蜷
-蜻
-蜾
-蜿
-蝀
-蝌
-蝍
-蝎
-蝓
-蝕
-蝗
-蝘
-蝙
-蝚
-蝟
-蝠
-蝣
-蝤
-蝦
-蝨
-蝮
-蝯
-蝰
-蝲
-蝴
-蝶
-蝸
-蝽
-螂
-螃
-螄
-螅
-螈
-螋
-融
-螐
-螔
-螞
-螟
-螠
-螢
-螣
-螥
-螫
-螭
-螯
-螳
-螶
-螺
-螻
-螽
-螾
-蟀
-蟄
-蟅
-蟆
-蟊
-蟋
-蟌
-蟎
-蟑
-蟒
-蟜
-蟠
-蟥
-蟪
-蟫
-蟬
-蟯
-蟲
-蟳
-蟴
-蟶
-蟹
-蟻
-蟾
-蠂
-蠃
-蠄
-蠅
-蠆
-蠊
-蠋
-蠍
-蠐
-蠑
-蠓
-蠔
-蠕
-蠖
-蠘
-蠙
-蠟
-蠡
-蠢
-蠣
-蠱
-蠲
-蠵
-蠶
-蠷
-蠹
-蠻
-血
-衂
-衆
-行
-衍
-衎
-術
-衕
-衖
-街
-衙
-衚
-衛
-衜
-衝
-衞
-衡
-衢
-衣
-表
-衩
-衫
-衰
-衲
-衷
-衽
-衾
-衿
-袁
-袂
-袈
-袋
-袍
-袓
-袖
-袛
-袞
-袤
-袪
-被
-袱
-袴
-袾
-裁
-裂
-裊
-裎
-裒
-裔
-裕
-裖
-裘
-裙
-補
-裝
-裟
-裡
-裨
-裬
-裱
-裳
-裴
-裵
-裸
-裹
-製
-裾
-裿
-褀
-褂
-複
-褌
-褍
-褎
-褐
-褒
-褓
-褔
-褘
-褙
-褚
-褞
-褥
-褧
-褪
-褫
-褭
-褲
-褶
-褸
-褻
-襄
-襌
-襖
-襞
-襟
-襠
-襤
-襦
-襪
-襯
-襲
-襴
-襶
-襻
-襾
-西
-要
-覃
-覆
-覇
-覈
-見
-覌
-規
-覓
-視
-覚
-覡
-覦
-覧
-親
-覬
-覲
-観
-覺
-覽
-覿
-觀
-角
-觔
-觙
-觚
-觜
-解
-觭
-觱
-觴
-觶
-觸
-觿
-言
-訁
-訂
-訃
-訇
-計
-訊
-訌
-討
-訏
-訐
-訒
-訓
-訔
-訕
-訖
-託
-記
-訛
-訝
-訟
-訣
-訥
-訪
-設
-許
-訴
-訶
-診
-註
-証
-訾
-詁
-詆
-詈
-詐
-詒
-詔
-評
-詛
-詞
-詠
-詡
-詢
-詣
-詥
-試
-詧
-詩
-詫
-詭
-詮
-詰
-話
-該
-詳
-詵
-詹
-詼
-誄
-誅
-誇
-誌
-認
-誒
-誓
-誕
-誘
-語
-誠
-誡
-誣
-誤
-誥
-誦
-誨
-說
-説
-読
-誰
-課
-誴
-誹
-誼
-誾
-調
-談
-請
-諍
-諏
-諒
-論
-諗
-諜
-諟
-諠
-諡
-諤
-諦
-諧
-諪
-諫
-諭
-諮
-諱
-諲
-諳
-諴
-諶
-諷
-諸
-諺
-諼
-諾
-謀
-謁
-謂
-謄
-謇
-謊
-謌
-謎
-謏
-謐
-謔
-謖
-謗
-謙
-謚
-講
-謜
-謝
-謠
-謢
-謤
-謨
-謩
-謫
-謬
-謳
-謹
-謾
-證
-譏
-譓
-譔
-識
-譙
-譚
-譜
-譞
-警
-譫
-譬
-譭
-譯
-議
-譲
-譳
-譴
-護
-譽
-譿
-讀
-讃
-變
-讌
-讎
-讓
-讖
-讙
-讚
-讜
-讞
-谷
-谿
-豁
-豆
-豇
-豈
-豉
-豊
-豌
-豎
-豐
-豔
-豕
-豚
-象
-豢
-豨
-豪
-豫
-豬
-豳
-豸
-豹
-豺
-豿
-貂
-貅
-貉
-貊
-貌
-貐
-貒
-貓
-貔
-貘
-貝
-貞
-負
-財
-貢
-貤
-貧
-貨
-販
-貪
-貫
-責
-貭
-貮
-貯
-貲
-貳
-貴
-貶
-買
-貸
-貺
-費
-貼
-貽
-貿
-賀
-賁
-賂
-賃
-賄
-資
-賈
-賊
-賑
-賒
-賓
-賔
-賕
-賚
-賜
-賞
-賠
-賡
-賢
-賣
-賤
-賦
-賨
-質
-賬
-賭
-賴
-賹
-賺
-賻
-購
-賽
-賾
-贄
-贅
-贇
-贈
-贊
-贌
-贍
-贏
-贓
-贔
-贖
-贛
-赤
-赦
-赧
-赫
-赬
-赭
-走
-赳
-赴
-起
-趁
-超
-越
-趐
-趕
-趖
-趙
-趟
-趣
-趨
-足
-趴
-趵
-趺
-趼
-趾
-跅
-跆
-跋
-跌
-跏
-跑
-跖
-跗
-跛
-距
-跟
-跡
-跣
-跤
-跨
-跩
-跪
-路
-跳
-踎
-踏
-踐
-踝
-踞
-踢
-踩
-踰
-踴
-踹
-踺
-蹂
-蹄
-蹇
-蹈
-蹉
-蹊
-蹋
-蹕
-蹙
-蹟
-蹠
-蹤
-蹦
-蹬
-蹭
-蹯
-蹲
-蹴
-蹶
-蹺
-蹻
-蹼
-躁
-躂
-躄
-躉
-躋
-躍
-躑
-躒
-躔
-躝
-躪
-身
-躬
-躰
-躲
-躺
-軀
-車
-軋
-軌
-軍
-軎
-軒
-軔
-軛
-軟
-転
-軫
-軲
-軸
-軹
-軺
-軻
-軼
-軽
-軾
-較
-輄
-輅
-載
-輋
-輒
-輓
-輔
-輕
-輛
-輝
-輞
-輟
-輥
-輦
-輩
-輪
-輬
-輭
-輯
-輶
-輸
-輻
-輾
-輿
-轀
-轂
-轄
-轅
-轆
-轉
-轍
-轎
-轘
-轝
-轟
-轤
-辛
-辜
-辟
-辣
-辦
-辧
-辨
-辭
-辮
-辯
-辰
-辱
-農
-辵
-辺
-辻
-込
-迂
-迄
-迅
-迎
-近
-返
-迢
-迤
-迥
-迦
-迪
-迫
-迭
-迮
-述
-迴
-迵
-迷
-迸
-迺
-追
-退
-送
-逃
-逄
-逅
-逆
-逈
-逋
-逌
-逍
-逎
-透
-逐
-逑
-途
-逕
-逖
-逗
-這
-通
-逛
-逝
-逞
-速
-造
-逢
-連
-逤
-逨
-逮
-逯
-進
-逴
-逵
-逸
-逹
-逺
-逼
-逾
-遁
-遂
-遄
-遇
-遊
-運
-遍
-過
-遏
-遐
-遒
-道
-達
-違
-遘
-遙
-遛
-遜
-遞
-遠
-遢
-遣
-遨
-適
-遭
-遮
-遯
-遲
-遴
-遵
-遶
-遷
-選
-遹
-遺
-遼
-避
-邀
-邁
-邂
-邃
-還
-邇
-邈
-邉
-邊
-邋
-邏
-邑
-邕
-邗
-邙
-邛
-邠
-邡
-邢
-那
-邦
-邨
-邪
-邯
-邰
-邱
-邲
-邳
-邴
-邵
-邸
-邽
-邾
-郁
-郃
-郄
-郅
-郇
-郊
-郋
-郎
-郗
-郛
-郜
-郝
-郞
-郟
-郡
-郢
-郤
-部
-郪
-郫
-郭
-郯
-郳
-郴
-郵
-郷
-都
-郾
-郿
-鄂
-鄃
-鄄
-鄆
-鄉
-鄋
-鄑
-鄒
-鄔
-鄖
-鄗
-鄘
-鄙
-鄚
-鄜
-鄞
-鄠
-鄢
-鄣
-鄤
-鄧
-鄩
-鄫
-鄭
-鄯
-鄰
-鄱
-鄲
-鄳
-鄴
-鄺
-酃
-酆
-酈
-酉
-酊
-酋
-酌
-配
-酎
-酏
-酐
-酒
-酔
-酗
-酚
-酞
-酡
-酢
-酣
-酥
-酩
-酪
-酬
-酮
-酯
-酰
-酴
-酵
-酶
-酷
-酸
-酺
-酼
-醁
-醂
-醃
-醅
-醇
-醉
-醋
-醌
-醍
-醐
-醒
-醚
-醛
-醜
-醞
-醢
-醣
-醪
-醫
-醬
-醮
-醯
-醴
-醺
-醾
-醿
-釀
-釁
-釆
-采
-釉
-釋
-里
-重
-野
-量
-釐
-金
-釒
-釓
-釔
-釕
-釗
-釘
-釙
-釚
-釜
-針
-釣
-釤
-釦
-釧
-釩
-釪
-釭
-釴
-釵
-釷
-釹
-釺
-鈀
-鈁
-鈄
-鈇
-鈈
-鈉
-鈊
-鈍
-鈏
-鈐
-鈑
-鈔
-鈕
-鈖
-鈞
-鈢
-鈣
-鈥
-鈦
-鈫
-鈮
-鈰
-鈳
-鈴
-鈷
-鈸
-鈹
-鈺
-鈾
-鈿
-鉀
-鉄
-鉅
-鉆
-鉈
-鉉
-鉋
-鉌
-鉍
-鉏
-鉑
-鉓
-鉗
-鉚
-鉛
-鉞
-鉟
-鉤
-鉦
-鉬
-鉭
-鉲
-鉶
-鉷
-鉸
-鉻
-鉾
-鉿
-銀
-銂
-銃
-銅
-銋
-銍
-銑
-銓
-銕
-銖
-銘
-銚
-銜
-銠
-銣
-銥
-銦
-銨
-銩
-銪
-銫
-銬
-銭
-銱
-銲
-銳
-銶
-銷
-銹
-銻
-銼
-銾
-鋁
-鋅
-鋆
-鋇
-鋌
-鋏
-鋐
-鋒
-鋕
-鋗
-鋙
-鋡
-鋤
-鋥
-鋦
-鋨
-鋪
-鋮
-鋯
-鋰
-鋱
-鋳
-鋶
-鋸
-鋹
-鋼
-錀
-錄
-錏
-錐
-錒
-錕
-錘
-錚
-錞
-錟
-錠
-錡
-錢
-錦
-錨
-錫
-錬
-錮
-錯
-錳
-錶
-錸
-錻
-鍀
-鍇
-鍈
-鍉
-鍊
-鍋
-鍍
-鍏
-鍔
-鍘
-鍛
-鍝
-鍟
-鍠
-鍥
-鍩
-鍬
-鍱
-鍳
-鍵
-鍶
-鍷
-鍺
-鍼
-鍾
-鎂
-鎅
-鎊
-鎌
-鎏
-鎓
-鎔
-鎖
-鎗
-鎘
-鎚
-鎛
-鎢
-鎣
-鎦
-鎧
-鎪
-鎬
-鎭
-鎮
-鎰
-鎳
-鎵
-鎻
-鏃
-鏇
-鏈
-鏊
-鏌
-鏐
-鏑
-鏓
-鏖
-鏗
-鏘
-鏜
-鏝
-鏞
-鏟
-鏡
-鏢
-鏤
-鏦
-鏳
-鏴
-鏵
-鏷
-鏻
-鏽
-鐃
-鐇
-鐈
-鐓
-鐔
-鐘
-鐙
-鐠
-鐡
-鐤
-鐦
-鐧
-鐫
-鐬
-鐭
-鐮
-鐲
-鐳
-鐵
-鐸
-鐺
-鐽
-鐿
-鑀
-鑁
-鑂
-鑄
-鑅
-鑊
-鑌
-鑑
-鑒
-鑛
-鑠
-鑣
-鑨
-鑪
-鑫
-鑭
-鑰
-鑲
-鑴
-鑷
-鑼
-鑽
-鑾
-鑿
-長
-門
-閂
-閃
-閆
-閉
-開
-閎
-閏
-閑
-閒
-間
-閔
-閘
-閜
-閞
-閟
-関
-閣
-閥
-閦
-閨
-閩
-閬
-閭
-閰
-閱
-閶
-閹
-閻
-閼
-閾
-閿
-闆
-闇
-闈
-闊
-闋
-闌
-闍
-闐
-闓
-闔
-闕
-闖
-闘
-關
-闞
-闡
-闢
-闥
-阜
-阝
-阡
-阪
-阭
-阮
-阯
-阱
-防
-阻
-阿
-陀
-陁
-陂
-附
-陋
-陌
-降
-限
-陔
-陘
-陛
-陜
-陝
-陞
-陟
-陡
-院
-陣
-除
-陪
-陬
-陰
-陲
-陳
-陵
-陶
-陷
-陸
-険
-陽
-隄
-隅
-隆
-隈
-隊
-隋
-隍
-階
-隔
-隕
-隗
-隘
-隙
-際
-障
-隣
-隧
-隨
-險
-隰
-隱
-隲
-隳
-隴
-隷
-隸
-隹
-隻
-隼
-雀
-雁
-雄
-雅
-集
-雇
-雉
-雋
-雌
-雍
-雎
-雑
-雒
-雕
-雖
-雙
-雛
-雜
-雝
-雞
-離
-難
-雨
-雩
-雪
-雫
-雯
-雱
-雲
-零
-雷
-雹
-電
-需
-霄
-霅
-霆
-震
-霈
-霉
-霊
-霍
-霎
-霏
-霑
-霓
-霖
-霙
-霜
-霞
-霤
-霧
-霨
-霰
-露
-霶
-霸
-霹
-霽
-霾
-靁
-靂
-靄
-靈
-靉
-靑
-青
-靖
-靚
-靛
-靜
-非
-靠
-靡
-面
-革
-靫
-靬
-靭
-靳
-靴
-靶
-靺
-靼
-鞅
-鞆
-鞋
-鞍
-鞏
-鞘
-鞞
-鞠
-鞣
-鞥
-鞦
-鞨
-鞭
-鞮
-鞴
-韁
-韃
-韆
-韋
-韌
-韑
-韓
-韙
-韜
-韞
-韠
-韡
-韭
-韮
-音
-韶
-韺
-韻
-韾
-響
-頁
-頂
-頃
-項
-順
-須
-頊
-頌
-頍
-頎
-頏
-預
-頑
-頒
-頓
-頔
-頗
-領
-頜
-頠
-頡
-頤
-頦
-頫
-頭
-頰
-頴
-頵
-頷
-頸
-頹
-頻
-頼
-顆
-題
-額
-顎
-顏
-顒
-顓
-顔
-顕
-顗
-願
-顙
-顛
-類
-顥
-顧
-顫
-顯
-顰
-顱
-顳
-顴
-風
-颮
-颯
-颱
-颶
-颺
-颼
-飄
-飆
-飈
-飛
-食
-飠
-飡
-飢
-飥
-飩
-飪
-飫
-飬
-飭
-飮
-飯
-飲
-飴
-飼
-飽
-飾
-餃
-餄
-餅
-餉
-養
-餌
-餎
-餐
-餒
-餓
-餗
-餘
-餚
-餛
-餞
-餠
-餡
-館
-餮
-餵
-餺
-餾
-餿
-饃
-饅
-饋
-饌
-饑
-饒
-饕
-饗
-饞
-饟
-饢
-首
-馗
-馘
-香
-馛
-馥
-馦
-馨
-馬
-馭
-馮
-馯
-馱
-馳
-馴
-馼
-駁
-駄
-駅
-駆
-駐
-駑
-駒
-駔
-駕
-駘
-駙
-駛
-駝
-駟
-駢
-駭
-駰
-駱
-駿
-騁
-騂
-騄
-騅
-騋
-騎
-騏
-験
-騖
-騙
-騤
-騨
-騫
-騭
-騮
-騰
-騶
-騷
-騾
-驁
-驃
-驄
-驅
-驊
-驌
-驍
-驎
-驒
-驕
-驗
-驚
-驛
-驟
-驢
-驤
-驥
-驩
-驪
-骨
-骯
-骰
-骶
-骷
-骸
-骼
-髀
-髂
-髎
-髏
-髑
-髒
-髓
-體
-高
-髙
-髡
-髦
-髪
-髭
-髮
-髯
-髲
-髷
-髹
-髻
-鬃
-鬄
-鬅
-鬆
-鬍
-鬚
-鬟
-鬢
-鬣
-鬥
-鬧
-鬨
-鬩
-鬪
-鬬
-鬮
-鬯
-鬱
-鬲
-鬹
-鬻
-鬼
-魁
-魂
-魃
-魄
-魅
-魈
-魋
-魍
-魎
-魏
-魔
-魕
-魘
-魚
-魛
-魞
-魟
-魣
-魨
-魩
-魮
-魯
-魴
-魷
-鮀
-鮁
-鮃
-鮄
-鮊
-鮋
-鮍
-鮐
-鮑
-鮒
-鮓
-鮗
-鮜
-鮟
-鮠
-鮡
-鮣
-鮨
-鮪
-鮫
-鮭
-鮮
-鮰
-鮸
-鮹
-鮻
-鯀
-鯁
-鯃
-鯇
-鯉
-鯊
-鯏
-鯒
-鯓
-鯔
-鯕
-鯖
-鯗
-鯙
-鯛
-鯡
-鯢
-鯤
-鯧
-鯨
-鯪
-鯭
-鯮
-鯰
-鯶
-鯷
-鯻
-鯽
-鯿
-鰂
-鰃
-鰆
-鰈
-鰉
-鰍
-鰏
-鰒
-鰓
-鰕
-鰗
-鰛
-鰜
-鰟
-鰣
-鰤
-鰧
-鰨
-鰩
-鰭
-鰮
-鰱
-鰲
-鰳
-鰶
-鰷
-鰹
-鰺
-鰻
-鰼
-鰾
-鱀
-鱂
-鱅
-鱇
-鱈
-鱉
-鱊
-鱒
-鱓
-鱔
-鱖
-鱗
-鱘
-鱚
-鱝
-鱟
-鱠
-鱣
-鱥
-鱧
-鱨
-鱬
-鱮
-鱰
-鱲
-鱵
-鱷
-鱸
-鱺
-鱻
-鳥
-鳧
-鳩
-鳯
-鳰
-鳳
-鳴
-鳶
-鳽
-鴆
-鴇
-鴉
-鴒
-鴓
-鴕
-鴗
-鴛
-鴝
-鴞
-鴟
-鴡
-鴣
-鴦
-鴨
-鴫
-鴯
-鴰
-鴴
-鴻
-鴿
-鵂
-鵄
-鵎
-鵐
-鵑
-鵒
-鵓
-鵙
-鵜
-鵝
-鵞
-鵟
-鵠
-鵡
-鵪
-鵬
-鵯
-鵰
-鵲
-鵵
-鵼
-鵾
-鶆
-鶇
-鶉
-鶏
-鶒
-鶓
-鶘
-鶚
-鶡
-鶥
-鶩
-鶬
-鶯
-鶲
-鶴
-鶹
-鶺
-鶻
-鶼
-鶿
-鷂
-鷄
-鷉
-鷎
-鷓
-鷗
-鷙
-鷚
-鷟
-鷥
-鷦
-鷫
-鷯
-鷲
-鷳
-鷸
-鷹
-鷺
-鸊
-鸌
-鸐
-鸑
-鸕
-鸘
-鸚
-鸛
-鸜
-鸝
-鸞
-鹮
-鹵
-鹹
-鹼
-鹽
-鹿
-麂
-麅
-麇
-麈
-麊
-麋
-麐
-麒
-麓
-麗
-麝
-麞
-麟
-麥
-麩
-麪
-麯
-麴
-麵
-麹
-麺
-麻
-麼
-麽
-麾
-麿
-黁
-黃
-黇
-黌
-黍
-黎
-黏
-黐
-黑
-黒
-黔
-默
-黙
-黛
-黜
-黝
-點
-黟
-黥
-黧
-黨
-黯
-黴
-黶
-黻
-黼
-黽
-黿
-鼂
-鼇
-鼈
-鼉
-鼎
-鼐
-鼒
-鼓
-鼕
-鼙
-鼠
-鼢
-鼩
-鼬
-鼯
-鼱
-鼴
-鼷
-鼻
-鼽
-鼾
-齊
-齋
-齒
-齕
-齡
-齣
-齦
-齧
-齲
-齶
-龍
-龎
-龐
-龑
-龔
-龕
-龜
-龝
-龠
-龢
-郎
-凉
-﹑
-﹗
-﹝
-﹞
-﹢
-！
-＂
-＃
-＄
-％
-＆
-＇
-（
-）
-＊
-＋
-，
-－
-．
-／
-０
-１
-２
-３
-４
-５
-６
-７
-８
-９
-：
-；
-＜
-＝
-＞
-？
-Ａ
-Ｂ
-Ｃ
-Ｄ
-Ｅ
-Ｆ
-Ｇ
-Ｈ
-Ｉ
-Ｋ
-Ｌ
-Ｍ
-Ｎ
-Ｏ
-Ｐ
-Ｒ
-Ｓ
-Ｔ
-Ｕ
-Ｖ
-Ｗ
-Ｙ
-Ｚ
-［
-］
-｀
-ａ
-ｂ
-ｃ
-ｄ
-ｅ
-ｆ
-ｇ
-ｈ
-ｉ
-ｊ
-ｋ
-ｌ
-ｍ
-ｎ
-ｏ
-ｐ
-ｒ
-ｓ
-ｔ
-ｕ
-ｚ
-｛
-｜
-｝
-～
-￥
-𣇉
- 
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt
deleted file mode 100644
index 2b6f66494d5417e18bbd225719aa72690e09e126..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/cyrillic_dict.txt
+++ /dev/null
@@ -1,163 +0,0 @@
- 
-!
-#
-$
-%
-&
-'
-(
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-_
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-É
-é
-Ё
-Є
-І
-Ј
-Љ
-Ў
-А
-Б
-В
-Г
-Д
-Е
-Ж
-З
-И
-Й
-К
-Л
-М
-Н
-О
-П
-Р
-С
-Т
-У
-Ф
-Х
-Ц
-Ч
-Ш
-Щ
-Ъ
-Ы
-Ь
-Э
-Ю
-Я
-а
-б
-в
-г
-д
-е
-ж
-з
-и
-й
-к
-л
-м
-н
-о
-п
-р
-с
-т
-у
-ф
-х
-ц
-ч
-ш
-щ
-ъ
-ы
-ь
-э
-ю
-я
-ё
-ђ
-є
-і
-ј
-љ
-њ
-ћ
-ў
-џ
-Ґ
-ґ
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt
deleted file mode 100644
index f55923061bfd480b875bb3679d7a75a9157387a9..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/devanagari_dict.txt
+++ /dev/null
@@ -1,167 +0,0 @@
- 
-!
-#
-$
-%
-&
-'
-(
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-_
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-É
-é
-ँ
-ं
-ः
-अ
-आ
-इ
-ई
-उ
-ऊ
-ऋ
-ए
-ऐ
-ऑ
-ओ
-औ
-क
-ख
-ग
-घ
-ङ
-च
-छ
-ज
-झ
-ञ
-ट
-ठ
-ड
-ढ
-ण
-त
-थ
-द
-ध
-न
-ऩ
-प
-फ
-ब
-भ
-म
-य
-र
-ऱ
-ल
-ळ
-व
-श
-ष
-स
-ह
-़
-ा
-ि
-ी
-ु
-ू
-ृ
-ॅ
-े
-ै
-ॉ
-ो
-ौ
-्
-॒
-क़
-ख़
-ग़
-ज़
-ड़
-ढ़
-फ़
-ॠ
-।
-०
-१
-२
-३
-४
-५
-६
-७
-८
-९
-॰
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt
deleted file mode 100644
index 7677d31b9d3f08eef2823c2cf051beeab1f0470b..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/en_dict.txt
+++ /dev/null
@@ -1,95 +0,0 @@
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-<
-=
->
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-\
-]
-^
-_
-`
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-{
-|
-}
-~
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
--
-.
-/
- 
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt
deleted file mode 100644
index 339d4b89e5159a346636641a0814874faa59754a..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/japan_dict.txt
+++ /dev/null
@@ -1,4399 +0,0 @@
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-<
-=
->
-?
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-]
-_
-`
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-©
-°
-²
-´
-½
-Á
-Ä
-Å
-Ç
-È
-É
-Í
-Ó
-Ö
-×
-Ü
-ß
-à
-á
-â
-ã
-ä
-å
-æ
-ç
-è
-é
-ê
-ë
-í
-ð
-ñ
-ò
-ó
-ô
-õ
-ö
-ø
-ú
-û
-ü
-ý
-ā
-ă
-ą
-ć
-Č
-č
-đ
-ē
-ė
-ę
-ğ
-ī
-ı
-Ł
-ł
-ń
-ň
-ō
-ř
-Ş
-ş
-Š
-š
-ţ
-ū
-ż
-Ž
-ž
-Ș
-ș
-ț
-Δ
-α
-λ
-μ
-φ
-Г
-О
-а
-в
-л
-о
-р
-с
-т
-я
-ồ
-​
-—
-―
-’
-“
-”
-…
-℃
-→
-∇
-−
-■
-☆
-　
-、
-。
-々
-〆
-〈
-〉
-「
-」
-『
-』
-〔
-〕
-〜
-ぁ
-あ
-ぃ
-い
-う
-ぇ
-え
-ぉ
-お
-か
-が
-き
-ぎ
-く
-ぐ
-け
-げ
-こ
-ご
-さ
-ざ
-し
-じ
-す
-ず
-せ
-ぜ
-そ
-ぞ
-た
-だ
-ち
-ぢ
-っ
-つ
-づ
-て
-で
-と
-ど
-な
-に
-ぬ
-ね
-の
-は
-ば
-ぱ
-ひ
-び
-ぴ
-ふ
-ぶ
-ぷ
-へ
-べ
-ぺ
-ほ
-ぼ
-ぽ
-ま
-み
-む
-め
-も
-ゃ
-や
-ゅ
-ゆ
-ょ
-よ
-ら
-り
-る
-れ
-ろ
-わ
-ゑ
-を
-ん
-ゝ
-ゞ
-ァ
-ア
-ィ
-イ
-ゥ
-ウ
-ェ
-エ
-ォ
-オ
-カ
-ガ
-キ
-ギ
-ク
-グ
-ケ
-ゲ
-コ
-ゴ
-サ
-ザ
-シ
-ジ
-ス
-ズ
-セ
-ゼ
-ソ
-ゾ
-タ
-ダ
-チ
-ヂ
-ッ
-ツ
-ヅ
-テ
-デ
-ト
-ド
-ナ
-ニ
-ヌ
-ネ
-ノ
-ハ
-バ
-パ
-ヒ
-ビ
-ピ
-フ
-ブ
-プ
-ヘ
-ベ
-ペ
-ホ
-ボ
-ポ
-マ
-ミ
-ム
-メ
-モ
-ャ
-ヤ
-ュ
-ユ
-ョ
-ヨ
-ラ
-リ
-ル
-レ
-ロ
-ワ
-ヰ
-ン
-ヴ
-ヵ
-ヶ
-・
-ー
-㈱
-一
-丁
-七
-万
-丈
-三
-上
-下
-不
-与
-丑
-且
-世
-丘
-丙
-丞
-両
-並
-中
-串
-丸
-丹
-主
-丼
-丿
-乃
-久
-之
-乎
-乏
-乗
-乘
-乙
-九
-乞
-也
-乱
-乳
-乾
-亀
-了
-予
-争
-事
-二
-于
-互
-五
-井
-亘
-亙
-些
-亜
-亟
-亡
-交
-亥
-亦
-亨
-享
-京
-亭
-亮
-人
-什
-仁
-仇
-今
-介
-仍
-仏
-仔
-仕
-他
-仗
-付
-仙
-代
-令
-以
-仮
-仰
-仲
-件
-任
-企
-伊
-伍
-伎
-伏
-伐
-休
-会
-伝
-伯
-估
-伴
-伶
-伸
-伺
-似
-伽
-佃
-但
-位
-低
-住
-佐
-佑
-体
-何
-余
-佚
-佛
-作
-佩
-佳
-併
-佶
-使
-侈
-例
-侍
-侏
-侑
-侘
-供
-依
-侠
-価
-侮
-侯
-侵
-侶
-便
-係
-促
-俄
-俊
-俔
-俗
-俘
-保
-信
-俣
-俤
-修
-俯
-俳
-俵
-俸
-俺
-倉
-個
-倍
-倒
-候
-借
-倣
-値
-倫
-倭
-倶
-倹
-偃
-假
-偈
-偉
-偏
-偐
-偕
-停
-健
-側
-偵
-偶
-偽
-傀
-傅
-傍
-傑
-傘
-備
-催
-傭
-傲
-傳
-債
-傷
-傾
-僊
-働
-像
-僑
-僕
-僚
-僧
-僭
-僮
-儀
-億
-儇
-儒
-儛
-償
-儡
-優
-儲
-儺
-儼
-兀
-允
-元
-兄
-充
-兆
-先
-光
-克
-兌
-免
-兎
-児
-党
-兜
-入
-全
-八
-公
-六
-共
-兵
-其
-具
-典
-兼
-内
-円
-冊
-再
-冑
-冒
-冗
-写
-冠
-冤
-冥
-冨
-冬
-冲
-决
-冶
-冷
-准
-凉
-凋
-凌
-凍
-凛
-凝
-凞
-几
-凡
-処
-凪
-凰
-凱
-凶
-凸
-凹
-出
-函
-刀
-刃
-分
-切
-刈
-刊
-刎
-刑
-列
-初
-判
-別
-利
-刪
-到
-制
-刷
-券
-刹
-刺
-刻
-剃
-則
-削
-剋
-前
-剖
-剛
-剣
-剤
-剥
-剪
-副
-剰
-割
-創
-剽
-劇
-劉
-劔
-力
-功
-加
-劣
-助
-努
-劫
-劭
-励
-労
-効
-劾
-勃
-勅
-勇
-勉
-勒
-動
-勘
-務
-勝
-募
-勢
-勤
-勧
-勲
-勺
-勾
-勿
-匁
-匂
-包
-匏
-化
-北
-匙
-匝
-匠
-匡
-匣
-匯
-匲
-匹
-区
-医
-匿
-十
-千
-升
-午
-卉
-半
-卍
-卑
-卒
-卓
-協
-南
-単
-博
-卜
-占
-卦
-卯
-印
-危
-即
-却
-卵
-卸
-卿
-厄
-厚
-原
-厠
-厨
-厩
-厭
-厳
-去
-参
-又
-叉
-及
-友
-双
-反
-収
-叔
-取
-受
-叙
-叛
-叟
-叡
-叢
-口
-古
-句
-叩
-只
-叫
-召
-可
-台
-叱
-史
-右
-叶
-号
-司
-吃
-各
-合
-吉
-吊
-同
-名
-后
-吏
-吐
-向
-君
-吝
-吟
-吠
-否
-含
-吸
-吹
-吻
-吽
-吾
-呂
-呆
-呈
-呉
-告
-呑
-周
-呪
-呰
-味
-呼
-命
-咀
-咄
-咋
-和
-咒
-咫
-咲
-咳
-咸
-哀
-品
-哇
-哉
-員
-哨
-哩
-哭
-哲
-哺
-唄
-唆
-唇
-唐
-唖
-唯
-唱
-唳
-唸
-唾
-啄
-商
-問
-啓
-啼
-善
-喋
-喚
-喜
-喝
-喧
-喩
-喪
-喫
-喬
-單
-喰
-営
-嗅
-嗇
-嗔
-嗚
-嗜
-嗣
-嘆
-嘉
-嘗
-嘘
-嘩
-嘯
-嘱
-嘲
-嘴
-噂
-噌
-噛
-器
-噴
-噺
-嚆
-嚢
-囀
-囃
-囉
-囚
-四
-回
-因
-団
-困
-囲
-図
-固
-国
-圀
-圃
-國
-圏
-園
-圓
-團
-圜
-土
-圧
-在
-圭
-地
-址
-坂
-均
-坊
-坐
-坑
-坡
-坤
-坦
-坪
-垂
-型
-垢
-垣
-埃
-埋
-城
-埒
-埔
-域
-埠
-埴
-埵
-執
-培
-基
-埼
-堀
-堂
-堅
-堆
-堕
-堤
-堪
-堯
-堰
-報
-場
-堵
-堺
-塀
-塁
-塊
-塑
-塔
-塗
-塘
-塙
-塚
-塞
-塩
-填
-塵
-塾
-境
-墉
-墓
-増
-墜
-墟
-墨
-墳
-墺
-墻
-墾
-壁
-壇
-壊
-壌
-壕
-士
-壬
-壮
-声
-壱
-売
-壷
-壹
-壺
-壽
-変
-夏
-夕
-外
-夙
-多
-夜
-夢
-夥
-大
-天
-太
-夫
-夬
-夭
-央
-失
-夷
-夾
-奄
-奇
-奈
-奉
-奎
-奏
-契
-奔
-奕
-套
-奘
-奠
-奢
-奥
-奨
-奪
-奮
-女
-奴
-奸
-好
-如
-妃
-妄
-妊
-妍
-妓
-妖
-妙
-妥
-妨
-妬
-妲
-妹
-妻
-妾
-姉
-始
-姐
-姓
-委
-姚
-姜
-姞
-姥
-姦
-姨
-姪
-姫
-姶
-姻
-姿
-威
-娑
-娘
-娟
-娠
-娩
-娯
-娼
-婆
-婉
-婚
-婢
-婦
-婬
-婿
-媄
-媒
-媓
-媚
-媛
-媞
-媽
-嫁
-嫄
-嫉
-嫌
-嫐
-嫗
-嫡
-嬉
-嬌
-嬢
-嬪
-嬬
-嬾
-孁
-子
-孔
-字
-存
-孚
-孝
-孟
-季
-孤
-学
-孫
-孵
-學
-宅
-宇
-守
-安
-宋
-完
-宍
-宏
-宕
-宗
-官
-宙
-定
-宛
-宜
-宝
-実
-客
-宣
-室
-宥
-宮
-宰
-害
-宴
-宵
-家
-宸
-容
-宿
-寂
-寄
-寅
-密
-寇
-富
-寒
-寓
-寔
-寛
-寝
-察
-寡
-實
-寧
-審
-寮
-寵
-寶
-寸
-寺
-対
-寿
-封
-専
-射
-将
-尉
-尊
-尋
-對
-導
-小
-少
-尖
-尚
-尤
-尪
-尭
-就
-尹
-尺
-尻
-尼
-尽
-尾
-尿
-局
-居
-屈
-届
-屋
-屍
-屎
-屏
-屑
-屓
-展
-属
-屠
-層
-履
-屯
-山
-岐
-岑
-岡
-岩
-岫
-岬
-岳
-岷
-岸
-峠
-峡
-峨
-峯
-峰
-島
-峻
-崇
-崋
-崎
-崑
-崖
-崗
-崛
-崩
-嵌
-嵐
-嵩
-嵯
-嶂
-嶋
-嶠
-嶺
-嶼
-嶽
-巀
-巌
-巒
-巖
-川
-州
-巡
-巣
-工
-左
-巧
-巨
-巫
-差
-己
-巳
-巴
-巷
-巻
-巽
-巾
-市
-布
-帆
-希
-帖
-帚
-帛
-帝
-帥
-師
-席
-帯
-帰
-帳
-帷
-常
-帽
-幄
-幅
-幇
-幌
-幔
-幕
-幟
-幡
-幢
-幣
-干
-平
-年
-并
-幸
-幹
-幻
-幼
-幽
-幾
-庁
-広
-庄
-庇
-床
-序
-底
-庖
-店
-庚
-府
-度
-座
-庫
-庭
-庵
-庶
-康
-庸
-廂
-廃
-廉
-廊
-廓
-廟
-廠
-廣
-廬
-延
-廷
-建
-廻
-廼
-廿
-弁
-弄
-弉
-弊
-弌
-式
-弐
-弓
-弔
-引
-弖
-弗
-弘
-弛
-弟
-弥
-弦
-弧
-弱
-張
-強
-弼
-弾
-彈
-彊
-彌
-彎
-当
-彗
-彙
-彝
-形
-彦
-彩
-彫
-彬
-彭
-彰
-影
-彷
-役
-彼
-往
-征
-徂
-径
-待
-律
-後
-徐
-徑
-徒
-従
-得
-徠
-御
-徧
-徨
-復
-循
-徭
-微
-徳
-徴
-德
-徹
-徽
-心
-必
-忉
-忌
-忍
-志
-忘
-忙
-応
-忠
-快
-忯
-念
-忻
-忽
-忿
-怒
-怖
-思
-怠
-怡
-急
-性
-怨
-怪
-怯
-恂
-恋
-恐
-恒
-恕
-恣
-恤
-恥
-恨
-恩
-恬
-恭
-息
-恵
-悉
-悌
-悍
-悔
-悟
-悠
-患
-悦
-悩
-悪
-悲
-悼
-情
-惇
-惑
-惚
-惜
-惟
-惠
-惣
-惧
-惨
-惰
-想
-惹
-惺
-愈
-愉
-愍
-意
-愔
-愚
-愛
-感
-愷
-愿
-慈
-態
-慌
-慎
-慕
-慢
-慣
-慧
-慨
-慮
-慰
-慶
-憂
-憎
-憐
-憑
-憙
-憤
-憧
-憩
-憬
-憲
-憶
-憾
-懇
-應
-懌
-懐
-懲
-懸
-懺
-懽
-懿
-戈
-戊
-戌
-戎
-成
-我
-戒
-戔
-或
-戚
-戟
-戦
-截
-戮
-戯
-戴
-戸
-戻
-房
-所
-扁
-扇
-扈
-扉
-手
-才
-打
-払
-托
-扮
-扱
-扶
-批
-承
-技
-抄
-把
-抑
-抓
-投
-抗
-折
-抜
-択
-披
-抱
-抵
-抹
-押
-抽
-担
-拇
-拈
-拉
-拍
-拏
-拐
-拒
-拓
-拘
-拙
-招
-拝
-拠
-拡
-括
-拭
-拳
-拵
-拶
-拾
-拿
-持
-挂
-指
-按
-挑
-挙
-挟
-挨
-振
-挺
-挽
-挿
-捉
-捕
-捗
-捜
-捧
-捨
-据
-捺
-捻
-掃
-掄
-授
-掌
-排
-掖
-掘
-掛
-掟
-採
-探
-掣
-接
-控
-推
-掩
-措
-掬
-掲
-掴
-掻
-掾
-揃
-揄
-揆
-揉
-描
-提
-揖
-揚
-換
-握
-揮
-援
-揶
-揺
-損
-搦
-搬
-搭
-携
-搾
-摂
-摘
-摩
-摸
-摺
-撃
-撒
-撞
-撤
-撥
-撫
-播
-撮
-撰
-撲
-撹
-擁
-操
-擔
-擦
-擬
-擾
-攘
-攝
-攣
-支
-收
-改
-攻
-放
-政
-故
-敏
-救
-敗
-教
-敢
-散
-敦
-敬
-数
-整
-敵
-敷
-斂
-文
-斉
-斎
-斐
-斑
-斗
-料
-斜
-斟
-斤
-斥
-斧
-斬
-断
-斯
-新
-方
-於
-施
-旁
-旅
-旋
-旌
-族
-旗
-旛
-无
-旡
-既
-日
-旦
-旧
-旨
-早
-旬
-旭
-旺
-旻
-昂
-昆
-昇
-昉
-昌
-明
-昏
-易
-昔
-星
-映
-春
-昧
-昨
-昪
-昭
-是
-昵
-昼
-晁
-時
-晃
-晋
-晏
-晒
-晟
-晦
-晧
-晩
-普
-景
-晴
-晶
-智
-暁
-暇
-暈
-暉
-暑
-暖
-暗
-暘
-暢
-暦
-暫
-暮
-暲
-暴
-暹
-暾
-曄
-曇
-曉
-曖
-曙
-曜
-曝
-曠
-曰
-曲
-曳
-更
-書
-曹
-曼
-曽
-曾
-替
-最
-會
-月
-有
-朋
-服
-朏
-朔
-朕
-朗
-望
-朝
-期
-朧
-木
-未
-末
-本
-札
-朱
-朴
-机
-朽
-杁
-杉
-李
-杏
-材
-村
-杓
-杖
-杜
-杞
-束
-条
-杢
-杣
-来
-杭
-杮
-杯
-東
-杲
-杵
-杷
-杼
-松
-板
-枅
-枇
-析
-枓
-枕
-林
-枚
-果
-枝
-枠
-枡
-枢
-枯
-枳
-架
-柄
-柊
-柏
-某
-柑
-染
-柔
-柘
-柚
-柯
-柱
-柳
-柴
-柵
-査
-柾
-柿
-栂
-栃
-栄
-栖
-栗
-校
-株
-栲
-栴
-核
-根
-栻
-格
-栽
-桁
-桂
-桃
-框
-案
-桐
-桑
-桓
-桔
-桜
-桝
-桟
-桧
-桴
-桶
-桾
-梁
-梅
-梆
-梓
-梔
-梗
-梛
-條
-梟
-梢
-梧
-梨
-械
-梱
-梲
-梵
-梶
-棄
-棋
-棒
-棗
-棘
-棚
-棟
-棠
-森
-棲
-棹
-棺
-椀
-椅
-椋
-植
-椎
-椏
-椒
-椙
-検
-椥
-椹
-椿
-楊
-楓
-楕
-楚
-楞
-楠
-楡
-楢
-楨
-楪
-楫
-業
-楮
-楯
-楳
-極
-楷
-楼
-楽
-概
-榊
-榎
-榕
-榛
-榜
-榮
-榱
-榴
-槃
-槇
-槊
-構
-槌
-槍
-槐
-様
-槙
-槻
-槽
-槿
-樂
-樋
-樓
-樗
-標
-樟
-模
-権
-横
-樫
-樵
-樹
-樺
-樽
-橇
-橋
-橘
-機
-橿
-檀
-檄
-檎
-檐
-檗
-檜
-檣
-檥
-檬
-檮
-檸
-檻
-櫃
-櫓
-櫛
-櫟
-櫨
-櫻
-欄
-欅
-欠
-次
-欣
-欧
-欲
-欺
-欽
-款
-歌
-歎
-歓
-止
-正
-此
-武
-歩
-歪
-歯
-歳
-歴
-死
-殆
-殉
-殊
-残
-殖
-殯
-殴
-段
-殷
-殺
-殻
-殿
-毀
-毅
-母
-毎
-毒
-比
-毘
-毛
-毫
-毬
-氈
-氏
-民
-気
-水
-氷
-永
-氾
-汀
-汁
-求
-汎
-汐
-汗
-汚
-汝
-江
-池
-汪
-汰
-汲
-決
-汽
-沂
-沃
-沅
-沆
-沈
-沌
-沐
-沓
-沖
-沙
-没
-沢
-沱
-河
-沸
-油
-治
-沼
-沽
-沿
-況
-泉
-泊
-泌
-法
-泗
-泡
-波
-泣
-泥
-注
-泯
-泰
-泳
-洋
-洒
-洗
-洛
-洞
-津
-洩
-洪
-洲
-洸
-洹
-活
-洽
-派
-流
-浄
-浅
-浙
-浚
-浜
-浣
-浦
-浩
-浪
-浮
-浴
-海
-浸
-涅
-消
-涌
-涙
-涛
-涯
-液
-涵
-涼
-淀
-淄
-淆
-淇
-淋
-淑
-淘
-淡
-淤
-淨
-淫
-深
-淳
-淵
-混
-淹
-添
-清
-済
-渉
-渋
-渓
-渕
-渚
-減
-渟
-渠
-渡
-渤
-渥
-渦
-温
-渫
-測
-港
-游
-渾
-湊
-湖
-湘
-湛
-湧
-湫
-湯
-湾
-湿
-満
-源
-準
-溜
-溝
-溢
-溥
-溪
-溶
-溺
-滄
-滅
-滋
-滌
-滑
-滕
-滝
-滞
-滴
-滸
-滹
-滿
-漁
-漂
-漆
-漉
-漏
-漑
-演
-漕
-漠
-漢
-漣
-漫
-漬
-漱
-漸
-漿
-潅
-潔
-潙
-潜
-潟
-潤
-潭
-潮
-潰
-潴
-澁
-澂
-澄
-澎
-澗
-澤
-澪
-澱
-澳
-激
-濁
-濃
-濟
-濠
-濡
-濤
-濫
-濯
-濱
-濾
-瀉
-瀋
-瀑
-瀕
-瀞
-瀟
-瀧
-瀬
-瀾
-灌
-灑
-灘
-火
-灯
-灰
-灸
-災
-炉
-炊
-炎
-炒
-炭
-炮
-炷
-点
-為
-烈
-烏
-烙
-烝
-烹
-焔
-焙
-焚
-無
-焦
-然
-焼
-煇
-煉
-煌
-煎
-煕
-煙
-煤
-煥
-照
-煩
-煬
-煮
-煽
-熈
-熊
-熙
-熟
-熨
-熱
-熹
-熾
-燃
-燈
-燎
-燔
-燕
-燗
-燥
-燭
-燻
-爆
-爐
-爪
-爬
-爲
-爵
-父
-爺
-爼
-爽
-爾
-片
-版
-牌
-牒
-牘
-牙
-牛
-牝
-牟
-牡
-牢
-牧
-物
-牲
-特
-牽
-犂
-犠
-犬
-犯
-状
-狂
-狄
-狐
-狗
-狙
-狛
-狡
-狩
-独
-狭
-狷
-狸
-狼
-猊
-猛
-猟
-猥
-猨
-猩
-猪
-猫
-献
-猴
-猶
-猷
-猾
-猿
-獄
-獅
-獏
-獣
-獲
-玄
-玅
-率
-玉
-王
-玖
-玩
-玲
-珀
-珂
-珈
-珉
-珊
-珍
-珎
-珞
-珠
-珣
-珥
-珪
-班
-現
-球
-理
-琉
-琢
-琥
-琦
-琮
-琲
-琳
-琴
-琵
-琶
-瑁
-瑋
-瑙
-瑚
-瑛
-瑜
-瑞
-瑠
-瑤
-瑩
-瑪
-瑳
-瑾
-璃
-璋
-璜
-璞
-璧
-璨
-環
-璵
-璽
-璿
-瓊
-瓔
-瓜
-瓢
-瓦
-瓶
-甍
-甑
-甕
-甘
-甚
-甞
-生
-産
-甥
-用
-甫
-田
-由
-甲
-申
-男
-町
-画
-界
-畏
-畑
-畔
-留
-畜
-畝
-畠
-畢
-略
-番
-異
-畳
-當
-畷
-畸
-畺
-畿
-疆
-疇
-疋
-疎
-疏
-疑
-疫
-疱
-疲
-疹
-疼
-疾
-病
-症
-痒
-痔
-痕
-痘
-痙
-痛
-痢
-痩
-痴
-痺
-瘍
-瘡
-瘧
-療
-癇
-癌
-癒
-癖
-癡
-癪
-発
-登
-白
-百
-的
-皆
-皇
-皋
-皐
-皓
-皮
-皺
-皿
-盂
-盃
-盆
-盈
-益
-盒
-盗
-盛
-盞
-盟
-盡
-監
-盤
-盥
-盧
-目
-盲
-直
-相
-盾
-省
-眉
-看
-県
-眞
-真
-眠
-眷
-眺
-眼
-着
-睡
-督
-睦
-睨
-睿
-瞋
-瞑
-瞞
-瞬
-瞭
-瞰
-瞳
-瞻
-瞼
-瞿
-矍
-矛
-矜
-矢
-知
-矧
-矩
-短
-矮
-矯
-石
-砂
-砌
-研
-砕
-砥
-砦
-砧
-砲
-破
-砺
-硝
-硫
-硬
-硯
-碁
-碇
-碌
-碑
-碓
-碕
-碗
-碣
-碧
-碩
-確
-碾
-磁
-磐
-磔
-磧
-磨
-磬
-磯
-礁
-礎
-礒
-礙
-礫
-礬
-示
-礼
-社
-祀
-祁
-祇
-祈
-祉
-祐
-祓
-祕
-祖
-祗
-祚
-祝
-神
-祟
-祠
-祢
-祥
-票
-祭
-祷
-祺
-禁
-禄
-禅
-禊
-禍
-禎
-福
-禔
-禖
-禛
-禦
-禧
-禮
-禰
-禹
-禽
-禿
-秀
-私
-秋
-科
-秒
-秘
-租
-秤
-秦
-秩
-称
-移
-稀
-程
-税
-稔
-稗
-稙
-稚
-稜
-稠
-種
-稱
-稲
-稷
-稻
-稼
-稽
-稿
-穀
-穂
-穆
-積
-穎
-穏
-穗
-穜
-穢
-穣
-穫
-穴
-究
-空
-突
-窃
-窄
-窒
-窓
-窟
-窠
-窩
-窪
-窮
-窯
-竃
-竄
-竈
-立
-站
-竜
-竝
-竟
-章
-童
-竪
-竭
-端
-竴
-競
-竹
-竺
-竽
-竿
-笄
-笈
-笏
-笑
-笙
-笛
-笞
-笠
-笥
-符
-第
-笹
-筅
-筆
-筇
-筈
-等
-筋
-筌
-筍
-筏
-筐
-筑
-筒
-答
-策
-筝
-筥
-筧
-筬
-筮
-筯
-筰
-筵
-箆
-箇
-箋
-箏
-箒
-箔
-箕
-算
-箙
-箜
-管
-箪
-箭
-箱
-箸
-節
-篁
-範
-篆
-篇
-築
-篋
-篌
-篝
-篠
-篤
-篥
-篦
-篩
-篭
-篳
-篷
-簀
-簒
-簡
-簧
-簪
-簫
-簺
-簾
-簿
-籀
-籃
-籌
-籍
-籐
-籟
-籠
-籤
-籬
-米
-籾
-粂
-粉
-粋
-粒
-粕
-粗
-粘
-粛
-粟
-粥
-粧
-粮
-粳
-精
-糊
-糖
-糜
-糞
-糟
-糠
-糧
-糯
-糸
-糺
-系
-糾
-紀
-約
-紅
-紋
-納
-紐
-純
-紗
-紘
-紙
-級
-紛
-素
-紡
-索
-紫
-紬
-累
-細
-紳
-紵
-紹
-紺
-絁
-終
-絃
-組
-絅
-経
-結
-絖
-絞
-絡
-絣
-給
-統
-絲
-絵
-絶
-絹
-絽
-綏
-經
-継
-続
-綜
-綟
-綬
-維
-綱
-網
-綴
-綸
-綺
-綽
-綾
-綿
-緊
-緋
-総
-緑
-緒
-線
-締
-緥
-編
-緩
-緬
-緯
-練
-緻
-縁
-縄
-縅
-縒
-縛
-縞
-縢
-縣
-縦
-縫
-縮
-縹
-總
-績
-繁
-繊
-繋
-繍
-織
-繕
-繝
-繦
-繧
-繰
-繹
-繼
-纂
-纈
-纏
-纐
-纒
-纛
-缶
-罔
-罠
-罧
-罪
-置
-罰
-署
-罵
-罷
-罹
-羂
-羅
-羆
-羇
-羈
-羊
-羌
-美
-群
-羨
-義
-羯
-羲
-羹
-羽
-翁
-翅
-翌
-習
-翔
-翛
-翠
-翡
-翫
-翰
-翺
-翻
-翼
-耀
-老
-考
-者
-耆
-而
-耐
-耕
-耗
-耨
-耳
-耶
-耽
-聊
-聖
-聘
-聚
-聞
-聟
-聡
-聨
-聯
-聰
-聲
-聴
-職
-聾
-肄
-肆
-肇
-肉
-肋
-肌
-肖
-肘
-肛
-肝
-股
-肢
-肥
-肩
-肪
-肯
-肱
-育
-肴
-肺
-胃
-胆
-背
-胎
-胖
-胚
-胝
-胞
-胡
-胤
-胱
-胴
-胸
-能
-脂
-脅
-脆
-脇
-脈
-脊
-脚
-脛
-脩
-脱
-脳
-腋
-腎
-腐
-腑
-腔
-腕
-腫
-腰
-腱
-腸
-腹
-腺
-腿
-膀
-膏
-膚
-膜
-膝
-膠
-膣
-膨
-膩
-膳
-膵
-膾
-膿
-臂
-臆
-臈
-臍
-臓
-臘
-臚
-臣
-臥
-臨
-自
-臭
-至
-致
-臺
-臼
-舂
-舅
-與
-興
-舌
-舍
-舎
-舒
-舖
-舗
-舘
-舜
-舞
-舟
-舩
-航
-般
-舳
-舶
-船
-艇
-艘
-艦
-艮
-良
-色
-艶
-芋
-芒
-芙
-芝
-芥
-芦
-芬
-芭
-芯
-花
-芳
-芸
-芹
-芻
-芽
-芿
-苅
-苑
-苔
-苗
-苛
-苞
-苡
-若
-苦
-苧
-苫
-英
-苴
-苻
-茂
-范
-茄
-茅
-茎
-茗
-茘
-茜
-茨
-茲
-茵
-茶
-茸
-茹
-草
-荊
-荏
-荒
-荘
-荷
-荻
-荼
-莞
-莪
-莫
-莬
-莱
-莵
-莽
-菅
-菊
-菌
-菓
-菖
-菘
-菜
-菟
-菩
-菫
-華
-菱
-菴
-萄
-萊
-萌
-萍
-萎
-萠
-萩
-萬
-萱
-落
-葉
-著
-葛
-葡
-董
-葦
-葩
-葬
-葭
-葱
-葵
-葺
-蒋
-蒐
-蒔
-蒙
-蒟
-蒡
-蒲
-蒸
-蒻
-蒼
-蒿
-蓄
-蓆
-蓉
-蓋
-蓑
-蓬
-蓮
-蓼
-蔀
-蔑
-蔓
-蔚
-蔡
-蔦
-蔬
-蔭
-蔵
-蔽
-蕃
-蕉
-蕊
-蕎
-蕨
-蕩
-蕪
-蕭
-蕾
-薄
-薇
-薊
-薔
-薗
-薙
-薛
-薦
-薨
-薩
-薪
-薫
-薬
-薭
-薮
-藁
-藉
-藍
-藏
-藐
-藝
-藤
-藩
-藪
-藷
-藹
-藺
-藻
-蘂
-蘆
-蘇
-蘊
-蘭
-虎
-虐
-虔
-虚
-虜
-虞
-號
-虫
-虹
-虻
-蚊
-蚕
-蛇
-蛉
-蛍
-蛎
-蛙
-蛛
-蛟
-蛤
-蛭
-蛮
-蛸
-蛹
-蛾
-蜀
-蜂
-蜃
-蜆
-蜊
-蜘
-蜜
-蜷
-蜻
-蝉
-蝋
-蝕
-蝙
-蝠
-蝦
-蝶
-蝿
-螂
-融
-螣
-螺
-蟄
-蟇
-蟠
-蟷
-蟹
-蟻
-蠢
-蠣
-血
-衆
-行
-衍
-衒
-術
-街
-衙
-衛
-衝
-衞
-衡
-衢
-衣
-表
-衫
-衰
-衵
-衷
-衽
-衾
-衿
-袁
-袈
-袋
-袍
-袒
-袖
-袙
-袞
-袢
-被
-袰
-袱
-袴
-袷
-袿
-裁
-裂
-裃
-装
-裏
-裔
-裕
-裘
-裙
-補
-裟
-裡
-裲
-裳
-裴
-裸
-裹
-製
-裾
-褂
-褄
-複
-褌
-褐
-褒
-褥
-褪
-褶
-褻
-襄
-襖
-襞
-襟
-襠
-襦
-襪
-襲
-襴
-襷
-西
-要
-覆
-覇
-覈
-見
-規
-視
-覗
-覚
-覧
-親
-覲
-観
-覺
-觀
-角
-解
-触
-言
-訂
-計
-討
-訓
-託
-記
-訛
-訟
-訢
-訥
-訪
-設
-許
-訳
-訴
-訶
-診
-註
-証
-詐
-詔
-評
-詛
-詞
-詠
-詢
-詣
-試
-詩
-詫
-詮
-詰
-話
-該
-詳
-誄
-誅
-誇
-誉
-誌
-認
-誓
-誕
-誘
-語
-誠
-誡
-誣
-誤
-誥
-誦
-説
-読
-誰
-課
-誼
-誾
-調
-談
-請
-諌
-諍
-諏
-諒
-論
-諚
-諜
-諟
-諡
-諦
-諧
-諫
-諭
-諮
-諱
-諶
-諷
-諸
-諺
-諾
-謀
-謄
-謌
-謎
-謗
-謙
-謚
-講
-謝
-謡
-謫
-謬
-謹
-證
-識
-譚
-譛
-譜
-警
-譬
-譯
-議
-譲
-譴
-護
-讀
-讃
-讐
-讒
-谷
-谿
-豅
-豆
-豊
-豎
-豐
-豚
-象
-豪
-豫
-豹
-貌
-貝
-貞
-負
-財
-貢
-貧
-貨
-販
-貪
-貫
-責
-貯
-貰
-貴
-買
-貸
-費
-貼
-貿
-賀
-賁
-賂
-賃
-賄
-資
-賈
-賊
-賎
-賑
-賓
-賛
-賜
-賞
-賠
-賢
-賣
-賤
-賦
-質
-賭
-購
-賽
-贄
-贅
-贈
-贋
-贔
-贖
-赤
-赦
-走
-赴
-起
-超
-越
-趙
-趣
-足
-趺
-趾
-跋
-跏
-距
-跡
-跨
-跪
-路
-跳
-践
-踊
-踏
-踐
-踞
-踪
-踵
-蹄
-蹉
-蹊
-蹟
-蹲
-蹴
-躅
-躇
-躊
-躍
-躑
-躙
-躪
-身
-躬
-躯
-躰
-車
-軋
-軌
-軍
-軒
-軟
-転
-軸
-軻
-軽
-軾
-較
-載
-輌
-輔
-輜
-輝
-輦
-輩
-輪
-輯
-輸
-輿
-轄
-轍
-轟
-轢
-辛
-辞
-辟
-辥
-辦
-辨
-辰
-辱
-農
-辺
-辻
-込
-迂
-迅
-迎
-近
-返
-迢
-迦
-迪
-迫
-迭
-述
-迷
-迹
-追
-退
-送
-逃
-逅
-逆
-逍
-透
-逐
-逓
-途
-逕
-逗
-這
-通
-逝
-逞
-速
-造
-逢
-連
-逮
-週
-進
-逸
-逼
-遁
-遂
-遅
-遇
-遊
-運
-遍
-過
-遐
-道
-達
-違
-遙
-遜
-遠
-遡
-遣
-遥
-適
-遭
-遮
-遯
-遵
-遷
-選
-遺
-遼
-避
-邀
-邁
-邂
-邃
-還
-邇
-邉
-邊
-邑
-那
-邦
-邨
-邪
-邯
-邵
-邸
-郁
-郊
-郎
-郡
-郢
-部
-郭
-郴
-郵
-郷
-都
-鄂
-鄙
-鄭
-鄰
-鄲
-酉
-酋
-酌
-配
-酎
-酒
-酔
-酢
-酥
-酪
-酬
-酵
-酷
-酸
-醍
-醐
-醒
-醗
-醜
-醤
-醪
-醵
-醸
-采
-釈
-釉
-釋
-里
-重
-野
-量
-釐
-金
-釘
-釜
-針
-釣
-釧
-釿
-鈍
-鈎
-鈐
-鈔
-鈞
-鈦
-鈴
-鈷
-鈸
-鈿
-鉄
-鉇
-鉉
-鉋
-鉛
-鉢
-鉤
-鉦
-鉱
-鉾
-銀
-銃
-銅
-銈
-銑
-銕
-銘
-銚
-銜
-銭
-鋏
-鋒
-鋤
-鋭
-鋲
-鋳
-鋸
-鋺
-鋼
-錆
-錍
-錐
-錘
-錠
-錣
-錦
-錫
-錬
-錯
-録
-錵
-鍋
-鍍
-鍑
-鍔
-鍛
-鍬
-鍮
-鍵
-鍼
-鍾
-鎌
-鎖
-鎗
-鎚
-鎧
-鎬
-鎮
-鎰
-鎹
-鏃
-鏑
-鏡
-鐃
-鐇
-鐐
-鐔
-鐘
-鐙
-鐚
-鐡
-鐵
-鐸
-鑁
-鑊
-鑑
-鑒
-鑚
-鑠
-鑢
-鑰
-鑵
-鑷
-鑼
-鑽
-鑿
-長
-門
-閃
-閇
-閉
-開
-閏
-閑
-間
-閔
-閘
-関
-閣
-閤
-閥
-閦
-閨
-閬
-閲
-閻
-閼
-閾
-闇
-闍
-闔
-闕
-闘
-關
-闡
-闢
-闥
-阜
-阪
-阮
-阯
-防
-阻
-阿
-陀
-陂
-附
-陌
-降
-限
-陛
-陞
-院
-陣
-除
-陥
-陪
-陬
-陰
-陳
-陵
-陶
-陸
-険
-陽
-隅
-隆
-隈
-隊
-隋
-階
-随
-隔
-際
-障
-隠
-隣
-隧
-隷
-隻
-隼
-雀
-雁
-雄
-雅
-集
-雇
-雉
-雊
-雋
-雌
-雍
-雑
-雖
-雙
-雛
-離
-難
-雨
-雪
-雫
-雰
-雲
-零
-雷
-雹
-電
-需
-震
-霊
-霍
-霖
-霜
-霞
-霧
-霰
-露
-靈
-青
-靖
-静
-靜
-非
-面
-革
-靫
-靭
-靱
-靴
-靺
-鞁
-鞄
-鞆
-鞋
-鞍
-鞏
-鞘
-鞠
-鞨
-鞭
-韋
-韓
-韜
-韮
-音
-韶
-韻
-響
-頁
-頂
-頃
-項
-順
-須
-頌
-預
-頑
-頒
-頓
-領
-頚
-頬
-頭
-頴
-頸
-頻
-頼
-顆
-題
-額
-顎
-顔
-顕
-顗
-願
-顛
-類
-顧
-顯
-風
-飛
-食
-飢
-飩
-飫
-飯
-飲
-飴
-飼
-飽
-飾
-餃
-餅
-餉
-養
-餌
-餐
-餓
-餘
-餝
-餡
-館
-饂
-饅
-饉
-饋
-饌
-饒
-饗
-首
-馗
-香
-馨
-馬
-馳
-馴
-駄
-駅
-駆
-駈
-駐
-駒
-駕
-駝
-駿
-騁
-騎
-騏
-騒
-験
-騙
-騨
-騰
-驕
-驚
-驛
-驢
-骨
-骸
-髄
-體
-高
-髙
-髢
-髪
-髭
-髮
-髷
-髻
-鬘
-鬚
-鬢
-鬨
-鬯
-鬱
-鬼
-魁
-魂
-魄
-魅
-魏
-魔
-魚
-魯
-鮎
-鮑
-鮒
-鮪
-鮫
-鮭
-鮮
-鯉
-鯔
-鯖
-鯛
-鯨
-鯰
-鯱
-鰐
-鰒
-鰭
-鰯
-鰰
-鰹
-鰻
-鱈
-鱒
-鱗
-鱧
-鳥
-鳩
-鳰
-鳳
-鳴
-鳶
-鴈
-鴉
-鴎
-鴛
-鴟
-鴦
-鴨
-鴫
-鴻
-鵄
-鵜
-鵞
-鵡
-鵬
-鵲
-鵺
-鶉
-鶏
-鶯
-鶴
-鷄
-鷙
-鷲
-鷹
-鷺
-鸚
-鸞
-鹸
-鹽
-鹿
-麁
-麒
-麓
-麗
-麝
-麞
-麟
-麦
-麩
-麹
-麺
-麻
-麾
-麿
-黄
-黌
-黍
-黒
-黙
-黛
-黠
-鼈
-鼉
-鼎
-鼓
-鼠
-鼻
-齊
-齋
-齟
-齢
-齬
-龍
-龕
-龗
-！
-＃
-％
-＆
-（
-）
-＋
-，
-－
-．
-／
-０
-１
-２
-３
-４
-５
-６
-７
-８
-９
-：
-；
-＝
-？
-＠
-Ａ
-Ｂ
-Ｃ
-Ｄ
-Ｅ
-Ｆ
-Ｇ
-Ｈ
-Ｉ
-Ｊ
-Ｋ
-Ｌ
-Ｍ
-Ｎ
-Ｏ
-Ｐ
-Ｒ
-Ｓ
-Ｔ
-Ｕ
-Ｖ
-Ｗ
-Ｘ
-Ｚ
-ａ
-ｃ
-ｄ
-ｅ
-ｆ
-ｈ
-ｉ
-ｊ
-ｋ
-ｌ
-ｍ
-ｎ
-ｏ
-ｐ
-ｒ
-ｓ
-ｔ
-ｕ
-ｙ
-ｚ
-～
-･
- 
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt
deleted file mode 100644
index d506b691bd1a6c55299ad89a72cf3a69a2c879a9..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ka_dict.txt
+++ /dev/null
@@ -1,153 +0,0 @@
-k
-a
-_
-i
-m
-g
-/
-1
-2
-I
-L
-S
-V
-R
-C
-0
-v
-l
-6
-4
-8
-.
-j
-p
-ಗ
-ು
-ಣ
-ಪ
-ಡ
-ಿ
-ಸ
-ಲ
-ಾ
-ದ
-್
-7
-5
-3
-ವ
-ಷ
-ಬ
-ಹ
-ೆ
-9
-ಅ
-ಳ
-ನ
-ರ
-ಉ
-ಕ
-ಎ
-ೇ
-ಂ
-ೈ
-ೊ
-ೀ
-ಯ
-ೋ
-ತ
-ಶ
-ಭ
-ಧ
-ಚ
-ಜ
-ೂ
-ಮ
-ಒ
-ೃ
-ಥ
-ಇ
-ಟ
-ಖ
-ಆ
-ಞ
-ಫ
--
-ಢ
-ಊ
-ಓ
-ಐ
-ಃ
-ಘ
-ಝ
-ೌ
-ಠ
-ಛ
-ಔ
-ಏ
-ಈ
-ಋ
-೨
-೦
-೧
-೮
-೯
-೪
-,
-೫
-೭
-೩
-೬
-ಙ
-s
-c
-e
-n
-w
-o
-u
-t
-d
-E
-A
-T
-B
-Z
-N
-G
-O
-q
-z
-r
-x
-P
-K
-M
-J
-U
-D
-f
-F
-h
-b
-W
-Y
-y
-H
-X
-Q
-'
-#
-&
-!
-@
-$
-:
-%
-é
-É
-(
-?
-+
- 
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt
deleted file mode 100644
index a13899f14dfe3bfc25b34904390c7b1e4ed8674b..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/korean_dict.txt
+++ /dev/null
@@ -1,3688 +0,0 @@
-!
-"
-#
-$
-%
-&
-'
-*
-+
--
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-<
-=
->
-?
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-\
-]
-^
-_
-`
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-{
-|
-}
-~
-©
-°
-²
-½
-Á
-Ä
-Å
-Ç
-É
-Í
-Î
-Ó
-Ö
-×
-Ü
-ß
-à
-á
-â
-ã
-ä
-å
-æ
-ç
-è
-é
-ê
-ë
-ì
-í
-î
-ï
-ð
-ñ
-ò
-ó
-ô
-õ
-ö
-ø
-ú
-û
-ü
-ý
-ā
-ă
-ą
-ć
-Č
-č
-đ
-ē
-ė
-ę
-ě
-ğ
-ī
-İ
-ı
-Ł
-ł
-ń
-ň
-ō
-ř
-Ş
-ş
-Š
-š
-ţ
-ū
-ź
-ż
-Ž
-ž
-Ș
-ș
-Α
-Δ
-α
-λ
-φ
-Г
-О
-а
-в
-л
-о
-р
-с
-т
-я
-​
-’
-“
-”
-→
-∇
-∼
-「
-」
-ア
-カ
-グ
-ニ
-ラ
-ン
-ㄱ
-ㄴ
-ㄷ
-ㄸ
-ㄹ
-ㅂ
-ㅅ
-ㅆ
-ㅇ
-ㅈ
-ㅊ
-ㅋ
-ㅌ
-ㅎ
-ㅓ
-ㅜ
-ㅣ
-一
-丁
-七
-三
-上
-下
-不
-丑
-世
-丘
-丞
-中
-丸
-丹
-主
-乃
-久
-之
-乎
-乘
-九
-也
-乳
-乾
-事
-二
-云
-互
-五
-井
-亞
-亡
-交
-亥
-亨
-享
-京
-亭
-人
-仁
-今
-他
-仙
-代
-令
-以
-仰
-仲
-件
-任
-企
-伊
-伍
-伎
-伏
-伐
-休
-伯
-伴
-伸
-佃
-佈
-位
-低
-住
-佐
-何
-佛
-作
-使
-來
-供
-依
-侯
-侵
-侶
-便
-俗
-保
-俠
-信
-修
-俱
-俳
-倉
-個
-倍
-倒
-候
-借
-値
-倫
-倭
-假
-偈
-偉
-偏
-停
-偶
-傅
-傑
-傳
-傷
-傾
-像
-僞
-僥
-僧
-價
-儀
-儉
-儒
-優
-儼
-兀
-允
-元
-兆
-先
-光
-克
-兒
-入
-內
-全
-八
-公
-六
-共
-兵
-其
-具
-典
-兼
-再
-冠
-冥
-冶
-准
-凞
-凡
-凱
-出
-函
-刀
-分
-刊
-刑
-列
-初
-判
-別
-利
-到
-制
-券
-刺
-刻
-則
-前
-剛
-副
-創
-劃
-劑
-力
-功
-加
-劣
-助
-劫
-勇
-動
-務
-勝
-勢
-勳
-勸
-匈
-化
-北
-匠
-區
-十
-千
-午
-半
-卍
-卑
-卒
-卓
-南
-博
-卜
-占
-卦
-印
-危
-卵
-卷
-卽
-卿
-厄
-原
-厦
-去
-參
-又
-叉
-友
-反
-叔
-受
-口
-古
-句
-可
-台
-史
-右
-司
-各
-合
-吉
-同
-名
-后
-吏
-吐
-君
-吠
-吳
-呂
-告
-周
-味
-呵
-命
-和
-咳
-咸
-咽
-哀
-品
-哨
-哮
-哲
-唐
-唯
-唱
-商
-問
-啼
-善
-喆
-喉
-喜
-喩
-喪
-嘗
-器
-嚴
-囊
-四
-回
-因
-困
-固
-圈
-國
-圍
-園
-圓
-圖
-團
-土
-在
-地
-均
-坊
-坐
-坑
-坵
-型
-垢
-城
-域
-埴
-執
-培
-基
-堂
-堅
-堆
-堤
-堯
-報
-場
-塔
-塚
-塞
-塵
-境
-墜
-墟
-墨
-墳
-墾
-壁
-壇
-壓
-壤
-士
-壬
-壯
-壺
-壽
-夏
-夕
-外
-多
-夜
-夢
-大
-天
-太
-夫
-央
-失
-夷
-奄
-奇
-奉
-奎
-奏
-契
-奔
-奮
-女
-奴
-好
-如
-妄
-妊
-妖
-妙
-始
-姑
-姓
-姚
-姜
-威
-婆
-婚
-婦
-媒
-媚
-子
-孔
-字
-存
-孝
-孟
-季
-孤
-孫
-學
-孺
-宇
-守
-安
-宋
-宗
-官
-宙
-定
-客
-宣
-室
-宮
-害
-家
-容
-寂
-寃
-寄
-寅
-密
-寇
-富
-寒
-寓
-實
-審
-寫
-寬
-寶
-寸
-寺
-封
-將
-專
-尊
-對
-小
-少
-尙
-尹
-尼
-尿
-局
-居
-屈
-屋
-屍
-屎
-屛
-層
-屬
-山
-岐
-岡
-岩
-岳
-岸
-峙
-峰
-島
-峻
-峽
-崇
-崔
-崖
-崩
-嶋
-巖
-川
-州
-巢
-工
-左
-巧
-巨
-巫
-差
-己
-巷
-市
-布
-帝
-師
-帶
-常
-帽
-幕
-干
-平
-年
-幹
-幻
-幼
-幽
-庇
-序
-店
-府
-度
-座
-庫
-庭
-康
-廟
-廣
-廳
-延
-廷
-建
-廻
-弁
-式
-弑
-弓
-引
-弘
-弟
-弱
-張
-强
-弼
-彌
-彛
-形
-彬
-影
-役
-彼
-彿
-往
-征
-待
-律
-後
-徐
-徑
-得
-從
-循
-微
-德
-徹
-心
-必
-忌
-忍
-志
-忠
-思
-怡
-急
-性
-恐
-恒
-恨
-恩
-悅
-悖
-患
-悲
-情
-惑
-惟
-惠
-惡
-想
-惺
-愁
-意
-愚
-愛
-感
-愼
-慈
-態
-慕
-慣
-慧
-慾
-憂
-憤
-憺
-應
-懸
-戎
-成
-我
-戟
-戮
-戰
-戴
-戶
-房
-所
-手
-才
-打
-批
-承
-技
-抄
-把
-抗
-抱
-抽
-拇
-拓
-拘
-拙
-拜
-拾
-持
-指
-捌
-捨
-捿
-授
-掌
-排
-接
-推
-提
-揚
-揭
-援
-損
-搗
-摩
-播
-操
-擒
-擔
-擘
-據
-擧
-攘
-攝
-攬
-支
-改
-攻
-放
-政
-故
-敍
-敎
-救
-敗
-散
-敬
-整
-數
-文
-斗
-料
-斛
-斜
-斧
-斯
-新
-斷
-方
-於
-施
-旋
-族
-旗
-日
-旨
-早
-旱
-昌
-明
-易
-昔
-星
-春
-昧
-昭
-是
-時
-晉
-晋
-晩
-普
-景
-晴
-晶
-智
-暈
-暑
-暗
-暘
-曉
-曜
-曠
-曦
-曰
-曲
-書
-曹
-曼
-曾
-最
-會
-月
-有
-朋
-服
-望
-朝
-期
-木
-未
-末
-本
-朱
-朴
-李
-材
-村
-杖
-杜
-杞
-杭
-杯
-東
-松
-板
-林
-果
-枝
-枯
-枰
-枾
-柏
-柑
-柱
-栗
-校
-栢
-核
-根
-格
-桀
-桂
-案
-桎
-桑
-桓
-桔
-梁
-梏
-梓
-梗
-條
-梨
-梵
-棗
-棟
-森
-植
-椒
-楊
-楓
-楚
-業
-楮
-極
-榮
-槃
-槍
-樂
-樓
-樗
-樣
-樸
-樹
-樺
-樽
-橄
-橋
-橘
-機
-橡
-檀
-檎
-權
-欌
-欖
-次
-欲
-歌
-歐
-止
-正
-此
-步
-武
-歲
-歸
-死
-殖
-段
-殷
-殺
-殿
-毅
-母
-毒
-比
-毛
-氏
-民
-氣
-水
-永
-求
-汎
-汗
-江
-池
-沅
-沒
-沖
-沙
-沛
-河
-油
-治
-沼
-沿
-泉
-泊
-法
-泗
-泡
-波
-注
-泰
-洋
-洙
-洛
-洞
-津
-洲
-活
-派
-流
-浅
-浦
-浮
-浴
-海
-涅
-涇
-消
-涌
-液
-淑
-淡
-淨
-淫
-深
-淳
-淵
-淸
-渠
-渡
-游
-渾
-湖
-湯
-源
-溪
-溫
-溶
-滄
-滅
-滋
-滯
-滿
-漁
-漆
-漢
-漫
-漸
-潑
-潤
-潭
-澄
-澎
-澤
-澳
-澹
-濁
-濕
-濟
-濤
-濯
-瀋
-瀝
-灣
-火
-灰
-灸
-災
-炎
-炭
-点
-烈
-烏
-烙
-焚
-無
-焦
-然
-煌
-煎
-照
-煬
-煮
-熟
-熱
-燁
-燈
-燔
-燕
-燥
-燧
-燮
-爲
-爵
-父
-片
-版
-牌
-牛
-牝
-牟
-牡
-物
-特
-犧
-犬
-狀
-狗
-猥
-猩
-猪
-獨
-獵
-獸
-獻
-玄
-玉
-王
-玲
-珍
-珠
-珪
-班
-現
-球
-理
-琴
-瑞
-瑟
-瑪
-璃
-璋
-璽
-瓜
-瓦
-甑
-甘
-生
-産
-用
-甫
-田
-由
-甲
-申
-男
-界
-畏
-留
-畜
-畢
-略
-番
-異
-畵
-當
-畸
-疏
-疑
-疫
-疹
-疼
-病
-症
-痔
-痛
-痺
-瘀
-瘍
-瘡
-療
-癌
-癖
-登
-發
-白
-百
-的
-皆
-皇
-皮
-盂
-盆
-益
-盛
-盜
-盟
-盡
-盤
-盧
-目
-直
-相
-省
-看
-眞
-眼
-睡
-督
-瞋
-矢
-矣
-知
-短
-石
-破
-碍
-碑
-磁
-磨
-磬
-示
-社
-祇
-祖
-祝
-神
-祥
-祭
-祺
-禁
-禅
-禍
-福
-禦
-禪
-禮
-禹
-禽
-禾
-秀
-私
-秉
-秋
-科
-秘
-秤
-秦
-秩
-移
-稀
-稗
-種
-稱
-稷
-稼
-稽
-穀
-穆
-積
-空
-窮
-竅
-立
-章
-童
-竭
-端
-竹
-笑
-符
-第
-筆
-等
-筍
-答
-策
-箋
-箕
-管
-箱
-節
-篇
-簡
-米
-粉
-粘
-粥
-精
-糖
-糞
-系
-紀
-紂
-約
-紅
-紋
-純
-紙
-級
-素
-索
-紫
-紬
-累
-細
-紳
-終
-組
-結
-絡
-統
-絲
-絶
-絹
-經
-綠
-維
-綱
-網
-綸
-綽
-緖
-線
-緣
-緯
-縣
-縱
-總
-織
-繡
-繩
-繪
-繭
-纂
-續
-罕
-置
-罰
-羅
-羊
-美
-群
-義
-羽
-翁
-習
-翟
-老
-考
-者
-而
-耐
-耕
-耳
-聃
-聖
-聞
-聰
-聲
-職
-肇
-肉
-肖
-肝
-股
-肥
-育
-肺
-胃
-胎
-胚
-胞
-胡
-胥
-能
-脂
-脈
-脚
-脛
-脣
-脩
-脫
-脯
-脾
-腋
-腎
-腫
-腸
-腹
-膜
-膠
-膨
-膽
-臆
-臟
-臣
-臥
-臨
-自
-至
-致
-臺
-臼
-臾
-與
-興
-舊
-舌
-舍
-舒
-舜
-舟
-般
-船
-艦
-良
-色
-芋
-花
-芳
-芽
-苑
-苔
-苕
-苛
-苞
-若
-苦
-英
-茂
-茵
-茶
-茹
-荀
-荇
-草
-荒
-荷
-莊
-莫
-菊
-菌
-菜
-菩
-菫
-華
-菴
-菽
-萊
-萍
-萬
-落
-葉
-著
-葛
-董
-葬
-蒙
-蒜
-蒲
-蒸
-蒿
-蓮
-蔓
-蔘
-蔡
-蔬
-蕃
-蕉
-蕓
-薄
-薑
-薛
-薩
-薪
-薺
-藏
-藝
-藤
-藥
-藩
-藻
-蘆
-蘇
-蘊
-蘚
-蘭
-虎
-處
-虛
-虞
-虹
-蜀
-蜂
-蜜
-蝕
-蝶
-融
-蟬
-蟲
-蠶
-蠻
-血
-衆
-行
-術
-衛
-衡
-衣
-表
-袁
-裔
-裕
-裙
-補
-製
-複
-襄
-西
-要
-見
-視
-親
-覺
-觀
-角
-解
-言
-訂
-訊
-訓
-託
-記
-訣
-設
-診
-註
-評
-詩
-話
-詵
-誅
-誌
-認
-誕
-語
-誠
-誤
-誥
-誦
-說
-調
-談
-諍
-論
-諡
-諫
-諭
-諸
-謙
-講
-謝
-謠
-證
-識
-譚
-譜
-譯
-議
-護
-讀
-變
-谷
-豆
-豊
-豚
-象
-豪
-豫
-貝
-貞
-財
-貧
-貨
-貪
-貫
-貴
-貸
-費
-資
-賊
-賓
-賞
-賢
-賣
-賦
-質
-贍
-赤
-赫
-走
-起
-超
-越
-趙
-趣
-趨
-足
-趾
-跋
-跡
-路
-踏
-蹟
-身
-躬
-車
-軍
-軒
-軟
-載
-輓
-輕
-輪
-輯
-輸
-輻
-輿
-轅
-轉
-辨
-辭
-辯
-辰
-農
-近
-迦
-述
-追
-逆
-透
-逐
-通
-逝
-造
-逢
-連
-進
-逵
-遂
-遊
-運
-遍
-過
-道
-達
-遠
-遡
-適
-遷
-選
-遺
-遽
-還
-邊
-邑
-那
-邪
-郞
-郡
-部
-都
-鄒
-鄕
-鄭
-鄲
-配
-酒
-酸
-醉
-醫
-醯
-釋
-里
-重
-野
-量
-釐
-金
-針
-鈍
-鈴
-鉞
-銀
-銅
-銘
-鋼
-錄
-錢
-錦
-鎭
-鏡
-鐘
-鐵
-鑑
-鑛
-長
-門
-閃
-開
-間
-閔
-閣
-閥
-閭
-閻
-闕
-關
-阪
-防
-阿
-陀
-降
-限
-陝
-院
-陰
-陳
-陵
-陶
-陸
-陽
-隆
-隊
-隋
-階
-際
-障
-隣
-隨
-隱
-隷
-雀
-雄
-雅
-集
-雇
-雌
-雖
-雙
-雜
-離
-難
-雨
-雪
-雲
-電
-霜
-露
-靈
-靑
-靖
-靜
-非
-面
-革
-靴
-鞏
-韓
-音
-韶
-韻
-順
-須
-頊
-頌
-領
-頭
-顔
-願
-顚
-類
-顯
-風
-飛
-食
-飢
-飮
-飯
-飾
-養
-餓
-餘
-首
-香
-馨
-馬
-駒
-騫
-騷
-驕
-骨
-骸
-髓
-體
-高
-髥
-髮
-鬪
-鬱
-鬼
-魏
-魔
-魚
-魯
-鮮
-鰍
-鰐
-鳥
-鳧
-鳳
-鴨
-鵲
-鶴
-鷄
-鷹
-鹽
-鹿
-麗
-麥
-麻
-黃
-黑
-默
-點
-黨
-鼎
-齊
-齋
-齒
-龍
-龜
-가
-각
-간
-갇
-갈
-갉
-감
-갑
-값
-갓
-갔
-강
-갖
-갗
-같
-갚
-갛
-개
-객
-갠
-갤
-갬
-갭
-갯
-갰
-갱
-갸
-걀
-걔
-걘
-거
-걱
-건
-걷
-걸
-검
-겁
-것
-겄
-겅
-겆
-겉
-겊
-겋
-게
-겐
-겔
-겟
-겠
-겡
-겨
-격
-겪
-견
-결
-겸
-겹
-겻
-겼
-경
-곁
-계
-곕
-곗
-고
-곡
-곤
-곧
-골
-곪
-곬
-곯
-곰
-곱
-곳
-공
-곶
-과
-곽
-관
-괄
-괌
-광
-괘
-괜
-괭
-괴
-괸
-굉
-교
-구
-국
-군
-굳
-굴
-굵
-굶
-굼
-굽
-굿
-궁
-궂
-궈
-권
-궐
-궜
-궝
-궤
-귀
-귄
-귈
-귓
-규
-균
-귤
-그
-극
-근
-글
-긁
-금
-급
-긋
-긍
-기
-긴
-길
-김
-깁
-깃
-깅
-깊
-까
-깍
-깎
-깐
-깔
-깜
-깝
-깟
-깡
-깥
-깨
-깬
-깰
-깻
-깼
-깽
-꺄
-꺼
-꺽
-꺾
-껀
-껄
-껌
-껍
-껏
-껐
-껑
-께
-껴
-꼈
-꼍
-꼐
-꼬
-꼭
-꼴
-꼼
-꼽
-꼿
-꽁
-꽂
-꽃
-꽉
-꽝
-꽤
-꽥
-꾀
-꾜
-꾸
-꾹
-꾼
-꿀
-꿇
-꿈
-꿉
-꿋
-꿍
-꿎
-꿔
-꿨
-꿩
-꿰
-꿴
-뀄
-뀌
-뀐
-뀔
-뀜
-뀝
-끄
-끈
-끊
-끌
-끓
-끔
-끕
-끗
-끙
-끝
-끼
-끽
-낀
-낄
-낌
-낍
-낏
-낑
-나
-낙
-낚
-난
-낟
-날
-낡
-남
-납
-낫
-났
-낭
-낮
-낯
-낱
-낳
-내
-낵
-낸
-낼
-냄
-냅
-냇
-냈
-냉
-냐
-냔
-냘
-냥
-너
-넉
-넋
-넌
-널
-넓
-넘
-넙
-넛
-넜
-넝
-넣
-네
-넥
-넨
-넬
-넴
-넵
-넷
-넸
-넹
-녀
-녁
-년
-념
-녔
-녕
-녘
-녜
-노
-녹
-논
-놀
-놈
-놋
-농
-높
-놓
-놔
-놨
-뇌
-뇨
-뇩
-뇽
-누
-눅
-눈
-눌
-눔
-눕
-눗
-눠
-눴
-뉘
-뉜
-뉩
-뉴
-늄
-늅
-늉
-느
-늑
-는
-늘
-늙
-늠
-늡
-능
-늦
-늪
-늬
-니
-닉
-닌
-닐
-님
-닙
-닛
-닝
-닢
-다
-닥
-닦
-단
-닫
-달
-닭
-닮
-닯
-닳
-담
-답
-닷
-당
-닻
-닿
-대
-댁
-댄
-댈
-댐
-댑
-댓
-댔
-댕
-댜
-더
-덕
-덖
-던
-덜
-덟
-덤
-덥
-덧
-덩
-덫
-덮
-데
-덱
-덴
-델
-뎀
-뎃
-뎅
-뎌
-뎠
-뎨
-도
-독
-돈
-돋
-돌
-돔
-돕
-돗
-동
-돛
-돝
-돼
-됐
-되
-된
-될
-됨
-됩
-됴
-두
-둑
-둔
-둘
-둠
-둡
-둣
-둥
-둬
-뒀
-뒤
-뒬
-뒷
-뒹
-듀
-듈
-듐
-드
-득
-든
-듣
-들
-듦
-듬
-듭
-듯
-등
-듸
-디
-딕
-딘
-딛
-딜
-딤
-딥
-딧
-딨
-딩
-딪
-따
-딱
-딴
-딸
-땀
-땄
-땅
-때
-땐
-땔
-땜
-땝
-땠
-땡
-떠
-떡
-떤
-떨
-떫
-떰
-떱
-떳
-떴
-떵
-떻
-떼
-떽
-뗀
-뗄
-뗍
-뗏
-뗐
-뗑
-또
-똑
-똘
-똥
-뙤
-뚜
-뚝
-뚤
-뚫
-뚱
-뛰
-뛴
-뛸
-뜀
-뜁
-뜨
-뜩
-뜬
-뜯
-뜰
-뜸
-뜻
-띄
-띈
-띌
-띔
-띕
-띠
-띤
-띨
-띱
-띵
-라
-락
-란
-랄
-람
-랍
-랏
-랐
-랑
-랒
-랗
-래
-랙
-랜
-랠
-램
-랩
-랫
-랬
-랭
-랴
-략
-량
-러
-럭
-런
-럴
-럼
-럽
-럿
-렀
-렁
-렇
-레
-렉
-렌
-렐
-렘
-렙
-렛
-렝
-려
-력
-련
-렬
-렴
-렵
-렷
-렸
-령
-례
-로
-록
-론
-롤
-롬
-롭
-롯
-롱
-롸
-롹
-뢰
-뢴
-뢸
-룃
-료
-룐
-룡
-루
-룩
-룬
-룰
-룸
-룹
-룻
-룽
-뤄
-뤘
-뤼
-류
-륙
-륜
-률
-륨
-륭
-르
-륵
-른
-를
-름
-릅
-릇
-릉
-릎
-리
-릭
-린
-릴
-림
-립
-릿
-링
-마
-막
-만
-많
-맏
-말
-맑
-맘
-맙
-맛
-망
-맞
-맡
-맣
-매
-맥
-맨
-맬
-맴
-맵
-맷
-맸
-맹
-맺
-먀
-먁
-머
-먹
-먼
-멀
-멈
-멋
-멍
-멎
-메
-멕
-멘
-멜
-멤
-멥
-멧
-멩
-며
-멱
-면
-멸
-몄
-명
-몇
-모
-목
-몫
-몬
-몰
-몸
-몹
-못
-몽
-뫼
-묘
-무
-묵
-묶
-문
-묻
-물
-묽
-뭄
-뭅
-뭇
-뭉
-뭍
-뭏
-뭐
-뭔
-뭘
-뭡
-뭣
-뮈
-뮌
-뮐
-뮤
-뮬
-므
-믈
-믐
-미
-믹
-민
-믿
-밀
-밈
-밉
-밋
-밌
-밍
-및
-밑
-바
-박
-밖
-반
-받
-발
-밝
-밟
-밤
-밥
-밧
-방
-밭
-배
-백
-밴
-밸
-뱀
-뱁
-뱃
-뱄
-뱅
-뱉
-뱍
-뱐
-버
-벅
-번
-벌
-범
-법
-벗
-벙
-벚
-베
-벡
-벤
-벨
-벰
-벱
-벳
-벵
-벼
-벽
-변
-별
-볍
-볏
-볐
-병
-볕
-보
-복
-볶
-본
-볼
-봄
-봅
-봇
-봉
-봐
-봤
-뵈
-뵐
-뵙
-부
-북
-분
-붇
-불
-붉
-붐
-붓
-붕
-붙
-뷔
-뷰
-뷴
-뷸
-브
-븐
-블
-비
-빅
-빈
-빌
-빔
-빕
-빗
-빙
-빚
-빛
-빠
-빡
-빤
-빨
-빳
-빴
-빵
-빻
-빼
-빽
-뺀
-뺄
-뺌
-뺏
-뺐
-뺑
-뺨
-뻐
-뻑
-뻔
-뻗
-뻘
-뻣
-뻤
-뻥
-뻬
-뼈
-뼉
-뼘
-뽀
-뽈
-뽐
-뽑
-뽕
-뾰
-뿌
-뿍
-뿐
-뿔
-뿜
-쁘
-쁜
-쁠
-쁨
-삐
-삔
-삘
-사
-삭
-삯
-산
-살
-삵
-삶
-삼
-삽
-삿
-샀
-상
-샅
-새
-색
-샌
-샐
-샘
-샙
-샛
-샜
-생
-샤
-샨
-샬
-샴
-샵
-샷
-샹
-서
-석
-섞
-선
-섣
-설
-섬
-섭
-섯
-섰
-성
-섶
-세
-섹
-센
-셀
-셈
-셉
-셋
-셌
-셍
-셔
-션
-셜
-셨
-셰
-셴
-셸
-소
-속
-손
-솔
-솜
-솝
-솟
-송
-솥
-쇄
-쇠
-쇤
-쇳
-쇼
-숀
-숄
-숍
-수
-숙
-순
-숟
-술
-숨
-숩
-숫
-숭
-숯
-숱
-숲
-숴
-쉐
-쉘
-쉬
-쉭
-쉰
-쉴
-쉼
-쉽
-슈
-슐
-슘
-슛
-슝
-스
-슥
-슨
-슬
-슭
-슴
-습
-슷
-승
-시
-식
-신
-싣
-실
-싫
-심
-십
-싯
-싱
-싶
-싸
-싹
-싼
-쌀
-쌈
-쌉
-쌌
-쌍
-쌓
-쌔
-쌘
-쌩
-써
-썩
-썬
-썰
-썸
-썹
-썼
-썽
-쎄
-쎈
-쏘
-쏙
-쏜
-쏟
-쏠
-쏭
-쏴
-쐈
-쐐
-쐬
-쑤
-쑥
-쑨
-쒀
-쒔
-쓰
-쓱
-쓴
-쓸
-씀
-씁
-씌
-씨
-씩
-씬
-씰
-씸
-씹
-씻
-씽
-아
-악
-안
-앉
-않
-알
-앎
-앓
-암
-압
-앗
-았
-앙
-앞
-애
-액
-앤
-앨
-앰
-앱
-앳
-앴
-앵
-야
-약
-얀
-얄
-얇
-얌
-얍
-얏
-양
-얕
-얗
-얘
-얜
-어
-억
-언
-얹
-얻
-얼
-얽
-엄
-업
-없
-엇
-었
-엉
-엊
-엌
-엎
-에
-엑
-엔
-엘
-엠
-엡
-엣
-엥
-여
-역
-엮
-연
-열
-엷
-염
-엽
-엾
-엿
-였
-영
-옅
-옆
-옇
-예
-옌
-옐
-옙
-옛
-오
-옥
-온
-올
-옭
-옮
-옳
-옴
-옵
-옷
-옹
-옻
-와
-왁
-완
-왈
-왑
-왓
-왔
-왕
-왜
-왠
-왱
-외
-왼
-요
-욕
-욘
-욜
-욤
-용
-우
-욱
-운
-울
-움
-웁
-웃
-웅
-워
-웍
-원
-월
-웜
-웠
-웡
-웨
-웬
-웰
-웸
-웹
-위
-윅
-윈
-윌
-윔
-윗
-윙
-유
-육
-윤
-율
-윱
-윳
-융
-으
-윽
-은
-을
-읊
-음
-읍
-응
-의
-읜
-읠
-이
-익
-인
-일
-읽
-잃
-임
-입
-잇
-있
-잉
-잊
-잎
-자
-작
-잔
-잖
-잘
-잠
-잡
-잣
-잤
-장
-잦
-재
-잭
-잰
-잴
-잽
-잿
-쟀
-쟁
-쟈
-쟉
-쟤
-저
-적
-전
-절
-젊
-점
-접
-젓
-정
-젖
-제
-젝
-젠
-젤
-젬
-젭
-젯
-져
-젼
-졀
-졌
-졍
-조
-족
-존
-졸
-좀
-좁
-종
-좇
-좋
-좌
-좍
-좽
-죄
-죠
-죤
-주
-죽
-준
-줄
-줌
-줍
-줏
-중
-줘
-줬
-쥐
-쥔
-쥘
-쥬
-쥴
-즈
-즉
-즌
-즐
-즘
-즙
-증
-지
-직
-진
-짇
-질
-짊
-짐
-집
-짓
-징
-짖
-짙
-짚
-짜
-짝
-짠
-짢
-짤
-짧
-짬
-짭
-짰
-짱
-째
-짹
-짼
-쨀
-쨉
-쨋
-쨌
-쨍
-쩄
-쩌
-쩍
-쩐
-쩔
-쩜
-쩝
-쩡
-쩨
-쪄
-쪘
-쪼
-쪽
-쪾
-쫀
-쫄
-쫑
-쫓
-쫙
-쬐
-쭈
-쭉
-쭐
-쭙
-쯔
-쯤
-쯧
-찌
-찍
-찐
-찔
-찜
-찝
-찡
-찢
-찧
-차
-착
-찬
-찮
-찰
-참
-찹
-찻
-찼
-창
-찾
-채
-책
-챈
-챌
-챔
-챕
-챗
-챘
-챙
-챠
-챤
-처
-척
-천
-철
-첨
-첩
-첫
-청
-체
-첵
-첸
-첼
-쳄
-쳇
-쳉
-쳐
-쳔
-쳤
-초
-촉
-촌
-촘
-촛
-총
-촨
-촬
-최
-쵸
-추
-축
-춘
-출
-춤
-춥
-춧
-충
-춰
-췄
-췌
-취
-췬
-츄
-츠
-측
-츨
-츰
-층
-치
-칙
-친
-칠
-칡
-침
-칩
-칫
-칭
-카
-칵
-칸
-칼
-캄
-캅
-캇
-캉
-캐
-캔
-캘
-캠
-캡
-캣
-캤
-캥
-캬
-커
-컥
-컨
-컫
-컬
-컴
-컵
-컷
-컸
-컹
-케
-켄
-켈
-켐
-켓
-켕
-켜
-켠
-켤
-켭
-켯
-켰
-코
-콕
-콘
-콜
-콤
-콥
-콧
-콩
-콰
-콱
-콴
-콸
-쾅
-쾌
-쾡
-쾨
-쾰
-쿄
-쿠
-쿡
-쿤
-쿨
-쿰
-쿵
-쿼
-퀀
-퀄
-퀘
-퀭
-퀴
-퀵
-퀸
-퀼
-큐
-큘
-크
-큰
-클
-큼
-큽
-키
-킥
-킨
-킬
-킴
-킵
-킷
-킹
-타
-탁
-탄
-탈
-탉
-탐
-탑
-탓
-탔
-탕
-태
-택
-탠
-탤
-탬
-탭
-탯
-탰
-탱
-터
-턱
-턴
-털
-텀
-텁
-텃
-텄
-텅
-테
-텍
-텐
-텔
-템
-텝
-텡
-텨
-톈
-토
-톡
-톤
-톨
-톰
-톱
-톳
-통
-퇴
-툇
-투
-툭
-툰
-툴
-툼
-퉁
-퉈
-퉜
-튀
-튄
-튈
-튕
-튜
-튠
-튤
-튬
-트
-특
-튼
-튿
-틀
-틈
-틉
-틋
-틔
-티
-틱
-틴
-틸
-팀
-팁
-팅
-파
-팍
-팎
-판
-팔
-팜
-팝
-팟
-팠
-팡
-팥
-패
-팩
-팬
-팰
-팸
-팻
-팼
-팽
-퍼
-퍽
-펀
-펄
-펌
-펍
-펐
-펑
-페
-펙
-펜
-펠
-펨
-펩
-펫
-펭
-펴
-편
-펼
-폄
-폈
-평
-폐
-포
-폭
-폰
-폴
-폼
-폿
-퐁
-표
-푭
-푸
-푹
-푼
-풀
-품
-풋
-풍
-퓨
-퓬
-퓰
-퓸
-프
-픈
-플
-픔
-픕
-피
-픽
-핀
-필
-핌
-핍
-핏
-핑
-하
-학
-한
-할
-핥
-함
-합
-핫
-항
-해
-핵
-핸
-핼
-햄
-햅
-햇
-했
-행
-햐
-향
-헀
-허
-헉
-헌
-헐
-험
-헙
-헛
-헝
-헤
-헥
-헨
-헬
-헴
-헵
-헷
-헹
-혀
-혁
-현
-혈
-혐
-협
-혓
-혔
-형
-혜
-호
-혹
-혼
-홀
-홈
-홉
-홋
-홍
-홑
-화
-확
-환
-활
-홧
-황
-홰
-홱
-횃
-회
-획
-횝
-횟
-횡
-효
-후
-훅
-훈
-훌
-훑
-훔
-훗
-훤
-훨
-훼
-휄
-휑
-휘
-휙
-휜
-휠
-휩
-휭
-휴
-휼
-흄
-흉
-흐
-흑
-흔
-흘
-흙
-흠
-흡
-흣
-흥
-흩
-희
-흰
-흽
-히
-힉
-힌
-힐
-힘
-힙
-힝
-車
-滑
-金
-奈
-羅
-洛
-卵
-欄
-蘭
-郎
-來
-盧
-老
-魯
-綠
-鹿
-論
-雷
-樓
-縷
-凌
-樂
-不
-參
-葉
-沈
-若
-兩
-凉
-梁
-呂
-女
-廬
-麗
-黎
-曆
-歷
-戀
-蓮
-連
-列
-烈
-裂
-念
-獵
-靈
-領
-例
-禮
-醴
-惡
-尿
-料
-遼
-龍
-暈
-柳
-流
-類
-六
-陸
-倫
-律
-栗
-利
-李
-梨
-理
-離
-燐
-林
-臨
-立
-茶
-切
-宅
- 
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt
deleted file mode 100644
index e166bf33ecfbdc90ddb3d9743fded23306acabd5..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/latin_dict.txt
+++ /dev/null
@@ -1,185 +0,0 @@
- 
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-<
-=
->
-?
-@
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-]
-_
-`
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-{
-}
-¡
-£
-§
-ª
-«
-­
-°
-²
-³
-´
-µ
-·
-º
-»
-¿
-À
-Á
-Â
-Ä
-Å
-Ç
-È
-É
-Ê
-Ë
-Ì
-Í
-Î
-Ï
-Ò
-Ó
-Ô
-Õ
-Ö
-Ú
-Ü
-Ý
-ß
-à
-á
-â
-ã
-ä
-å
-æ
-ç
-è
-é
-ê
-ë
-ì
-í
-î
-ï
-ñ
-ò
-ó
-ô
-õ
-ö
-ø
-ù
-ú
-û
-ü
-ý
-ą
-Ć
-ć
-Č
-č
-Đ
-đ
-ę
-ı
-Ł
-ł
-ō
-Œ
-œ
-Š
-š
-Ÿ
-Ž
-ž
-ʒ
-β
-δ
-ε
-з
-Ṡ
-‘
-€
-™
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt
deleted file mode 100644
index 84b885d8352226e49b1d5d791b8f43a663e246aa..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocr_keys_v1.txt
+++ /dev/null
@@ -1,6623 +0,0 @@
-'
-疗
-绚
-诚
-娇
-溜
-题
-贿
-者
-廖
-更
-纳
-加
-奉
-公
-一
-就
-汴
-计
-与
-路
-房
-原
-妇
-2
-0
-8
--
-7
-其
->
-:
-]
-,
-，
-骑
-刈
-全
-消
-昏
-傈
-安
-久
-钟
-嗅
-不
-影
-处
-驽
-蜿
-资
-关
-椤
-地
-瘸
-专
-问
-忖
-票
-嫉
-炎
-韵
-要
-月
-田
-节
-陂
-鄙
-捌
-备
-拳
-伺
-眼
-网
-盎
-大
-傍
-心
-东
-愉
-汇
-蹿
-科
-每
-业
-里
-航
-晏
-字
-平
-录
-先
-1
-3
-彤
-鲶
-产
-稍
-督
-腴
-有
-象
-岳
-注
-绍
-在
-泺
-文
-定
-核
-名
-水
-过
-理
-让
-偷
-率
-等
-这
-发
-”
-为
-含
-肥
-酉
-相
-鄱
-七
-编
-猥
-锛
-日
-镀
-蒂
-掰
-倒
-辆
-栾
-栗
-综
-涩
-州
-雌
-滑
-馀
-了
-机
-块
-司
-宰
-甙
-兴
-矽
-抚
-保
-用
-沧
-秩
-如
-收
-息
-滥
-页
-疑
-埠
-!
-！
-姥
-异
-橹
-钇
-向
-下
-跄
-的
-椴
-沫
-国
-绥
-獠
-报
-开
-民
-蜇
-何
-分
-凇
-长
-讥
-藏
-掏
-施
-羽
-中
-讲
-派
-嘟
-人
-提
-浼
-间
-世
-而
-古
-多
-倪
-唇
-饯
-控
-庚
-首
-赛
-蜓
-味
-断
-制
-觉
-技
-替
-艰
-溢
-潮
-夕
-钺
-外
-摘
-枋
-动
-双
-单
-啮
-户
-枇
-确
-锦
-曜
-杜
-或
-能
-效
-霜
-盒
-然
-侗
-电
-晁
-放
-步
-鹃
-新
-杖
-蜂
-吒
-濂
-瞬
-评
-总
-隍
-对
-独
-合
-也
-是
-府
-青
-天
-诲
-墙
-组
-滴
-级
-邀
-帘
-示
-已
-时
-骸
-仄
-泅
-和
-遨
-店
-雇
-疫
-持
-巍
-踮
-境
-只
-亨
-目
-鉴
-崤
-闲
-体
-泄
-杂
-作
-般
-轰
-化
-解
-迂
-诿
-蛭
-璀
-腾
-告
-版
-服
-省
-师
-小
-规
-程
-线
-海
-办
-引
-二
-桧
-牌
-砺
-洄
-裴
-修
-图
-痫
-胡
-许
-犊
-事
-郛
-基
-柴
-呼
-食
-研
-奶
-律
-蛋
-因
-葆
-察
-戏
-褒
-戒
-再
-李
-骁
-工
-貂
-油
-鹅
-章
-啄
-休
-场
-给
-睡
-纷
-豆
-器
-捎
-说
-敏
-学
-会
-浒
-设
-诊
-格
-廓
-查
-来
-霓
-室
-溆
-￠
-诡
-寥
-焕
-舜
-柒
-狐
-回
-戟
-砾
-厄
-实
-翩
-尿
-五
-入
-径
-惭
-喹
-股
-宇
-篝
-|
-;
-美
-期
-云
-九
-祺
-扮
-靠
-锝
-槌
-系
-企
-酰
-阊
-暂
-蚕
-忻
-豁
-本
-羹
-执
-条
-钦
-H
-獒
-限
-进
-季
-楦
-于
-芘
-玖
-铋
-茯
-未
-答
-粘
-括
-样
-精
-欠
-矢
-甥
-帷
-嵩
-扣
-令
-仔
-风
-皈
-行
-支
-部
-蓉
-刮
-站
-蜡
-救
-钊
-汗
-松
-嫌
-成
-可
-.
-鹤
-院
-从
-交
-政
-怕
-活
-调
-球
-局
-验
-髌
-第
-韫
-谗
-串
-到
-圆
-年
-米
-/
-*
-友
-忿
-检
-区
-看
-自
-敢
-刃
-个
-兹
-弄
-流
-留
-同
-没
-齿
-星
-聆
-轼
-湖
-什
-三
-建
-蛔
-儿
-椋
-汕
-震
-颧
-鲤
-跟
-力
-情
-璺
-铨
-陪
-务
-指
-族
-训
-滦
-鄣
-濮
-扒
-商
-箱
-十
-召
-慷
-辗
-所
-莞
-管
-护
-臭
-横
-硒
-嗓
-接
-侦
-六
-露
-党
-馋
-驾
-剖
-高
-侬
-妪
-幂
-猗
-绺
-骐
-央
-酐
-孝
-筝
-课
-徇
-缰
-门
-男
-西
-项
-句
-谙
-瞒
-秃
-篇
-教
-碲
-罚
-声
-呐
-景
-前
-富
-嘴
-鳌
-稀
-免
-朋
-啬
-睐
-去
-赈
-鱼
-住
-肩
-愕
-速
-旁
-波
-厅
-健
-茼
-厥
-鲟
-谅
-投
-攸
-炔
-数
-方
-击
-呋
-谈
-绩
-别
-愫
-僚
-躬
-鹧
-胪
-炳
-招
-喇
-膨
-泵
-蹦
-毛
-结
-5
-4
-谱
-识
-陕
-粽
-婚
-拟
-构
-且
-搜
-任
-潘
-比
-郢
-妨
-醪
-陀
-桔
-碘
-扎
-选
-哈
-骷
-楷
-亿
-明
-缆
-脯
-监
-睫
-逻
-婵
-共
-赴
-淝
-凡
-惦
-及
-达
-揖
-谩
-澹
-减
-焰
-蛹
-番
-祁
-柏
-员
-禄
-怡
-峤
-龙
-白
-叽
-生
-闯
-起
-细
-装
-谕
-竟
-聚
-钙
-上
-导
-渊
-按
-艾
-辘
-挡
-耒
-盹
-饪
-臀
-记
-邮
-蕙
-受
-各
-医
-搂
-普
-滇
-朗
-茸
-带
-翻
-酚
-(
-光
-堤
-墟
-蔷
-万
-幻
-〓
-瑙
-辈
-昧
-盏
-亘
-蛀
-吉
-铰
-请
-子
-假
-闻
-税
-井
-诩
-哨
-嫂
-好
-面
-琐
-校
-馊
-鬣
-缂
-营
-访
-炖
-占
-农
-缀
-否
-经
-钚
-棵
-趟
-张
-亟
-吏
-茶
-谨
-捻
-论
-迸
-堂
-玉
-信
-吧
-瞠
-乡
-姬
-寺
-咬
-溏
-苄
-皿
-意
-赉
-宝
-尔
-钰
-艺
-特
-唳
-踉
-都
-荣
-倚
-登
-荐
-丧
-奇
-涵
-批
-炭
-近
-符
-傩
-感
-道
-着
-菊
-虹
-仲
-众
-懈
-濯
-颞
-眺
-南
-释
-北
-缝
-标
-既
-茗
-整
-撼
-迤
-贲
-挎
-耱
-拒
-某
-妍
-卫
-哇
-英
-矶
-藩
-治
-他
-元
-领
-膜
-遮
-穗
-蛾
-飞
-荒
-棺
-劫
-么
-市
-火
-温
-拈
-棚
-洼
-转
-果
-奕
-卸
-迪
-伸
-泳
-斗
-邡
-侄
-涨
-屯
-萋
-胭
-氡
-崮
-枞
-惧
-冒
-彩
-斜
-手
-豚
-随
-旭
-淑
-妞
-形
-菌
-吲
-沱
-争
-驯
-歹
-挟
-兆
-柱
-传
-至
-包
-内
-响
-临
-红
-功
-弩
-衡
-寂
-禁
-老
-棍
-耆
-渍
-织
-害
-氵
-渑
-布
-载
-靥
-嗬
-虽
-苹
-咨
-娄
-库
-雉
-榜
-帜
-嘲
-套
-瑚
-亲
-簸
-欧
-边
-6
-腿
-旮
-抛
-吹
-瞳
-得
-镓
-梗
-厨
-继
-漾
-愣
-憨
-士
-策
-窑
-抑
-躯
-襟
-脏
-参
-贸
-言
-干
-绸
-鳄
-穷
-藜
-音
-折
-详
-)
-举
-悍
-甸
-癌
-黎
-谴
-死
-罩
-迁
-寒
-驷
-袖
-媒
-蒋
-掘
-模
-纠
-恣
-观
-祖
-蛆
-碍
-位
-稿
-主
-澧
-跌
-筏
-京
-锏
-帝
-贴
-证
-糠
-才
-黄
-鲸
-略
-炯
-饱
-四
-出
-园
-犀
-牧
-容
-汉
-杆
-浈
-汰
-瑷
-造
-虫
-瘩
-怪
-驴
-济
-应
-花
-沣
-谔
-夙
-旅
-价
-矿
-以
-考
-s
-u
-呦
-晒
-巡
-茅
-准
-肟
-瓴
-詹
-仟
-褂
-译
-桌
-混
-宁
-怦
-郑
-抿
-些
-余
-鄂
-饴
-攒
-珑
-群
-阖
-岔
-琨
-藓
-预
-环
-洮
-岌
-宀
-杲
-瀵
-最
-常
-囡
-周
-踊
-女
-鼓
-袭
-喉
-简
-范
-薯
-遐
-疏
-粱
-黜
-禧
-法
-箔
-斤
-遥
-汝
-奥
-直
-贞
-撑
-置
-绱
-集
-她
-馅
-逗
-钧
-橱
-魉
-[
-恙
-躁
-唤
-9
-旺
-膘
-待
-脾
-惫
-购
-吗
-依
-盲
-度
-瘿
-蠖
-俾
-之
-镗
-拇
-鲵
-厝
-簧
-续
-款
-展
-啃
-表
-剔
-品
-钻
-腭
-损
-清
-锶
-统
-涌
-寸
-滨
-贪
-链
-吠
-冈
-伎
-迥
-咏
-吁
-览
-防
-迅
-失
-汾
-阔
-逵
-绀
-蔑
-列
-川
-凭
-努
-熨
-揪
-利
-俱
-绉
-抢
-鸨
-我
-即
-责
-膦
-易
-毓
-鹊
-刹
-玷
-岿
-空
-嘞
-绊
-排
-术
-估
-锷
-违
-们
-苟
-铜
-播
-肘
-件
-烫
-审
-鲂
-广
-像
-铌
-惰
-铟
-巳
-胍
-鲍
-康
-憧
-色
-恢
-想
-拷
-尤
-疳
-知
-S
-Y
-F
-D
-A
-峄
-裕
-帮
-握
-搔
-氐
-氘
-难
-墒
-沮
-雨
-叁
-缥
-悴
-藐
-湫
-娟
-苑
-稠
-颛
-簇
-后
-阕
-闭
-蕤
-缚
-怎
-佞
-码
-嘤
-蔡
-痊
-舱
-螯
-帕
-赫
-昵
-升
-烬
-岫
-、
-疵
-蜻
-髁
-蕨
-隶
-烛
-械
-丑
-盂
-梁
-强
-鲛
-由
-拘
-揉
-劭
-龟
-撤
-钩
-呕
-孛
-费
-妻
-漂
-求
-阑
-崖
-秤
-甘
-通
-深
-补
-赃
-坎
-床
-啪
-承
-吼
-量
-暇
-钼
-烨
-阂
-擎
-脱
-逮
-称
-P
-神
-属
-矗
-华
-届
-狍
-葑
-汹
-育
-患
-窒
-蛰
-佼
-静
-槎
-运
-鳗
-庆
-逝
-曼
-疱
-克
-代
-官
-此
-麸
-耧
-蚌
-晟
-例
-础
-榛
-副
-测
-唰
-缢
-迹
-灬
-霁
-身
-岁
-赭
-扛
-又
-菡
-乜
-雾
-板
-读
-陷
-徉
-贯
-郁
-虑
-变
-钓
-菜
-圾
-现
-琢
-式
-乐
-维
-渔
-浜
-左
-吾
-脑
-钡
-警
-T
-啵
-拴
-偌
-漱
-湿
-硕
-止
-骼
-魄
-积
-燥
-联
-踢
-玛
-则
-窿
-见
-振
-畿
-送
-班
-钽
-您
-赵
-刨
-印
-讨
-踝
-籍
-谡
-舌
-崧
-汽
-蔽
-沪
-酥
-绒
-怖
-财
-帖
-肱
-私
-莎
-勋
-羔
-霸
-励
-哼
-帐
-将
-帅
-渠
-纪
-婴
-娩
-岭
-厘
-滕
-吻
-伤
-坝
-冠
-戊
-隆
-瘁
-介
-涧
-物
-黍
-并
-姗
-奢
-蹑
-掣
-垸
-锴
-命
-箍
-捉
-病
-辖
-琰
-眭
-迩
-艘
-绌
-繁
-寅
-若
-毋
-思
-诉
-类
-诈
-燮
-轲
-酮
-狂
-重
-反
-职
-筱
-县
-委
-磕
-绣
-奖
-晋
-濉
-志
-徽
-肠
-呈
-獐
-坻
-口
-片
-碰
-几
-村
-柿
-劳
-料
-获
-亩
-惕
-晕
-厌
-号
-罢
-池
-正
-鏖
-煨
-家
-棕
-复
-尝
-懋
-蜥
-锅
-岛
-扰
-队
-坠
-瘾
-钬
-@
-卧
-疣
-镇
-譬
-冰
-彷
-频
-黯
-据
-垄
-采
-八
-缪
-瘫
-型
-熹
-砰
-楠
-襁
-箐
-但
-嘶
-绳
-啤
-拍
-盥
-穆
-傲
-洗
-盯
-塘
-怔
-筛
-丿
-台
-恒
-喂
-葛
-永
-￥
-烟
-酒
-桦
-书
-砂
-蚝
-缉
-态
-瀚
-袄
-圳
-轻
-蛛
-超
-榧
-遛
-姒
-奘
-铮
-右
-荽
-望
-偻
-卡
-丶
-氰
-附
-做
-革
-索
-戚
-坨
-桷
-唁
-垅
-榻
-岐
-偎
-坛
-莨
-山
-殊
-微
-骇
-陈
-爨
-推
-嗝
-驹
-澡
-藁
-呤
-卤
-嘻
-糅
-逛
-侵
-郓
-酌
-德
-摇
-※
-鬃
-被
-慨
-殡
-羸
-昌
-泡
-戛
-鞋
-河
-宪
-沿
-玲
-鲨
-翅
-哽
-源
-铅
-语
-照
-邯
-址
-荃
-佬
-顺
-鸳
-町
-霭
-睾
-瓢
-夸
-椁
-晓
-酿
-痈
-咔
-侏
-券
-噎
-湍
-签
-嚷
-离
-午
-尚
-社
-锤
-背
-孟
-使
-浪
-缦
-潍
-鞅
-军
-姹
-驶
-笑
-鳟
-鲁
-》
-孽
-钜
-绿
-洱
-礴
-焯
-椰
-颖
-囔
-乌
-孔
-巴
-互
-性
-椽
-哞
-聘
-昨
-早
-暮
-胶
-炀
-隧
-低
-彗
-昝
-铁
-呓
-氽
-藉
-喔
-癖
-瑗
-姨
-权
-胱
-韦
-堑
-蜜
-酋
-楝
-砝
-毁
-靓
-歙
-锲
-究
-屋
-喳
-骨
-辨
-碑
-武
-鸠
-宫
-辜
-烊
-适
-坡
-殃
-培
-佩
-供
-走
-蜈
-迟
-翼
-况
-姣
-凛
-浔
-吃
-飘
-债
-犟
-金
-促
-苛
-崇
-坂
-莳
-畔
-绂
-兵
-蠕
-斋
-根
-砍
-亢
-欢
-恬
-崔
-剁
-餐
-榫
-快
-扶
-‖
-濒
-缠
-鳜
-当
-彭
-驭
-浦
-篮
-昀
-锆
-秸
-钳
-弋
-娣
-瞑
-夷
-龛
-苫
-拱
-致
-%
-嵊
-障
-隐
-弑
-初
-娓
-抉
-汩
-累
-蓖
-"
-唬
-助
-苓
-昙
-押
-毙
-破
-城
-郧
-逢
-嚏
-獭
-瞻
-溱
-婿
-赊
-跨
-恼
-璧
-萃
-姻
-貉
-灵
-炉
-密
-氛
-陶
-砸
-谬
-衔
-点
-琛
-沛
-枳
-层
-岱
-诺
-脍
-榈
-埂
-征
-冷
-裁
-打
-蹴
-素
-瘘
-逞
-蛐
-聊
-激
-腱
-萘
-踵
-飒
-蓟
-吆
-取
-咙
-簋
-涓
-矩
-曝
-挺
-揣
-座
-你
-史
-舵
-焱
-尘
-苏
-笈
-脚
-溉
-榨
-诵
-樊
-邓
-焊
-义
-庶
-儋
-蟋
-蒲
-赦
-呷
-杞
-诠
-豪
-还
-试
-颓
-茉
-太
-除
-紫
-逃
-痴
-草
-充
-鳕
-珉
-祗
-墨
-渭
-烩
-蘸
-慕
-璇
-镶
-穴
-嵘
-恶
-骂
-险
-绋
-幕
-碉
-肺
-戳
-刘
-潞
-秣
-纾
-潜
-銮
-洛
-须
-罘
-销
-瘪
-汞
-兮
-屉
-r
-林
-厕
-质
-探
-划
-狸
-殚
-善
-煊
-烹
-〒
-锈
-逯
-宸
-辍
-泱
-柚
-袍
-远
-蹋
-嶙
-绝
-峥
-娥
-缍
-雀
-徵
-认
-镱
-谷
-=
-贩
-勉
-撩
-鄯
-斐
-洋
-非
-祚
-泾
-诒
-饿
-撬
-威
-晷
-搭
-芍
-锥
-笺
-蓦
-候
-琊
-档
-礁
-沼
-卵
-荠
-忑
-朝
-凹
-瑞
-头
-仪
-弧
-孵
-畏
-铆
-突
-衲
-车
-浩
-气
-茂
-悖
-厢
-枕
-酝
-戴
-湾
-邹
-飚
-攘
-锂
-写
-宵
-翁
-岷
-无
-喜
-丈
-挑
-嗟
-绛
-殉
-议
-槽
-具
-醇
-淞
-笃
-郴
-阅
-饼
-底
-壕
-砚
-弈
-询
-缕
-庹
-翟
-零
-筷
-暨
-舟
-闺
-甯
-撞
-麂
-茌
-蔼
-很
-珲
-捕
-棠
-角
-阉
-媛
-娲
-诽
-剿
-尉
-爵
-睬
-韩
-诰
-匣
-危
-糍
-镯
-立
-浏
-阳
-少
-盆
-舔
-擘
-匪
-申
-尬
-铣
-旯
-抖
-赘
-瓯
-居
-ˇ
-哮
-游
-锭
-茏
-歌
-坏
-甚
-秒
-舞
-沙
-仗
-劲
-潺
-阿
-燧
-郭
-嗖
-霏
-忠
-材
-奂
-耐
-跺
-砀
-输
-岖
-媳
-氟
-极
-摆
-灿
-今
-扔
-腻
-枝
-奎
-药
-熄
-吨
-话
-q
-额
-慑
-嘌
-协
-喀
-壳
-埭
-视
-著
-於
-愧
-陲
-翌
-峁
-颅
-佛
-腹
-聋
-侯
-咎
-叟
-秀
-颇
-存
-较
-罪
-哄
-岗
-扫
-栏
-钾
-羌
-己
-璨
-枭
-霉
-煌
-涸
-衿
-键
-镝
-益
-岢
-奏
-连
-夯
-睿
-冥
-均
-糖
-狞
-蹊
-稻
-爸
-刿
-胥
-煜
-丽
-肿
-璃
-掸
-跚
-灾
-垂
-樾
-濑
-乎
-莲
-窄
-犹
-撮
-战
-馄
-软
-络
-显
-鸢
-胸
-宾
-妲
-恕
-埔
-蝌
-份
-遇
-巧
-瞟
-粒
-恰
-剥
-桡
-博
-讯
-凯
-堇
-阶
-滤
-卖
-斌
-骚
-彬
-兑
-磺
-樱
-舷
-两
-娱
-福
-仃
-差
-找
-桁
-÷
-净
-把
-阴
-污
-戬
-雷
-碓
-蕲
-楚
-罡
-焖
-抽
-妫
-咒
-仑
-闱
-尽
-邑
-菁
-爱
-贷
-沥
-鞑
-牡
-嗉
-崴
-骤
-塌
-嗦
-订
-拮
-滓
-捡
-锻
-次
-坪
-杩
-臃
-箬
-融
-珂
-鹗
-宗
-枚
-降
-鸬
-妯
-阄
-堰
-盐
-毅
-必
-杨
-崃
-俺
-甬
-状
-莘
-货
-耸
-菱
-腼
-铸
-唏
-痤
-孚
-澳
-懒
-溅
-翘
-疙
-杷
-淼
-缙
-骰
-喊
-悉
-砻
-坷
-艇
-赁
-界
-谤
-纣
-宴
-晃
-茹
-归
-饭
-梢
-铡
-街
-抄
-肼
-鬟
-苯
-颂
-撷
-戈
-炒
-咆
-茭
-瘙
-负
-仰
-客
-琉
-铢
-封
-卑
-珥
-椿
-镧
-窨
-鬲
-寿
-御
-袤
-铃
-萎
-砖
-餮
-脒
-裳
-肪
-孕
-嫣
-馗
-嵇
-恳
-氯
-江
-石
-褶
-冢
-祸
-阻
-狈
-羞
-银
-靳
-透
-咳
-叼
-敷
-芷
-啥
-它
-瓤
-兰
-痘
-懊
-逑
-肌
-往
-捺
-坊
-甩
-呻
-〃
-沦
-忘
-膻
-祟
-菅
-剧
-崆
-智
-坯
-臧
-霍
-墅
-攻
-眯
-倘
-拢
-骠
-铐
-庭
-岙
-瓠
-′
-缺
-泥
-迢
-捶
-?
-？
-郏
-喙
-掷
-沌
-纯
-秘
-种
-听
-绘
-固
-螨
-团
-香
-盗
-妒
-埚
-蓝
-拖
-旱
-荞
-铀
-血
-遏
-汲
-辰
-叩
-拽
-幅
-硬
-惶
-桀
-漠
-措
-泼
-唑
-齐
-肾
-念
-酱
-虚
-屁
-耶
-旗
-砦
-闵
-婉
-馆
-拭
-绅
-韧
-忏
-窝
-醋
-葺
-顾
-辞
-倜
-堆
-辋
-逆
-玟
-贱
-疾
-董
-惘
-倌
-锕
-淘
-嘀
-莽
-俭
-笏
-绑
-鲷
-杈
-择
-蟀
-粥
-嗯
-驰
-逾
-案
-谪
-褓
-胫
-哩
-昕
-颚
-鲢
-绠
-躺
-鹄
-崂
-儒
-俨
-丝
-尕
-泌
-啊
-萸
-彰
-幺
-吟
-骄
-苣
-弦
-脊
-瑰
-〈
-诛
-镁
-析
-闪
-剪
-侧
-哟
-框
-螃
-守
-嬗
-燕
-狭
-铈
-缮
-概
-迳
-痧
-鲲
-俯
-售
-笼
-痣
-扉
-挖
-满
-咋
-援
-邱
-扇
-歪
-便
-玑
-绦
-峡
-蛇
-叨
-〖
-泽
-胃
-斓
-喋
-怂
-坟
-猪
-该
-蚬
-炕
-弥
-赞
-棣
-晔
-娠
-挲
-狡
-创
-疖
-铕
-镭
-稷
-挫
-弭
-啾
-翔
-粉
-履
-苘
-哦
-楼
-秕
-铂
-土
-锣
-瘟
-挣
-栉
-习
-享
-桢
-袅
-磨
-桂
-谦
-延
-坚
-蔚
-噗
-署
-谟
-猬
-钎
-恐
-嬉
-雒
-倦
-衅
-亏
-璩
-睹
-刻
-殿
-王
-算
-雕
-麻
-丘
-柯
-骆
-丸
-塍
-谚
-添
-鲈
-垓
-桎
-蚯
-芥
-予
-飕
-镦
-谌
-窗
-醚
-菀
-亮
-搪
-莺
-蒿
-羁
-足
-J
-真
-轶
-悬
-衷
-靛
-翊
-掩
-哒
-炅
-掐
-冼
-妮
-l
-谐
-稚
-荆
-擒
-犯
-陵
-虏
-浓
-崽
-刍
-陌
-傻
-孜
-千
-靖
-演
-矜
-钕
-煽
-杰
-酗
-渗
-伞
-栋
-俗
-泫
-戍
-罕
-沾
-疽
-灏
-煦
-芬
-磴
-叱
-阱
-榉
-湃
-蜀
-叉
-醒
-彪
-租
-郡
-篷
-屎
-良
-垢
-隗
-弱
-陨
-峪
-砷
-掴
-颁
-胎
-雯
-绵
-贬
-沐
-撵
-隘
-篙
-暖
-曹
-陡
-栓
-填
-臼
-彦
-瓶
-琪
-潼
-哪
-鸡
-摩
-啦
-俟
-锋
-域
-耻
-蔫
-疯
-纹
-撇
-毒
-绶
-痛
-酯
-忍
-爪
-赳
-歆
-嘹
-辕
-烈
-册
-朴
-钱
-吮
-毯
-癜
-娃
-谀
-邵
-厮
-炽
-璞
-邃
-丐
-追
-词
-瓒
-忆
-轧
-芫
-谯
-喷
-弟
-半
-冕
-裙
-掖
-墉
-绮
-寝
-苔
-势
-顷
-褥
-切
-衮
-君
-佳
-嫒
-蚩
-霞
-佚
-洙
-逊
-镖
-暹
-唛
-&
-殒
-顶
-碗
-獗
-轭
-铺
-蛊
-废
-恹
-汨
-崩
-珍
-那
-杵
-曲
-纺
-夏
-薰
-傀
-闳
-淬
-姘
-舀
-拧
-卷
-楂
-恍
-讪
-厩
-寮
-篪
-赓
-乘
-灭
-盅
-鞣
-沟
-慎
-挂
-饺
-鼾
-杳
-树
-缨
-丛
-絮
-娌
-臻
-嗳
-篡
-侩
-述
-衰
-矛
-圈
-蚜
-匕
-筹
-匿
-濞
-晨
-叶
-骋
-郝
-挚
-蚴
-滞
-增
-侍
-描
-瓣
-吖
-嫦
-蟒
-匾
-圣
-赌
-毡
-癞
-恺
-百
-曳
-需
-篓
-肮
-庖
-帏
-卿
-驿
-遗
-蹬
-鬓
-骡
-歉
-芎
-胳
-屐
-禽
-烦
-晌
-寄
-媾
-狄
-翡
-苒
-船
-廉
-终
-痞
-殇
-々
-畦
-饶
-改
-拆
-悻
-萄
-￡
-瓿
-乃
-訾
-桅
-匮
-溧
-拥
-纱
-铍
-骗
-蕃
-龋
-缬
-父
-佐
-疚
-栎
-醍
-掳
-蓄
-x
-惆
-颜
-鲆
-榆
-〔
-猎
-敌
-暴
-谥
-鲫
-贾
-罗
-玻
-缄
-扦
-芪
-癣
-落
-徒
-臾
-恿
-猩
-托
-邴
-肄
-牵
-春
-陛
-耀
-刊
-拓
-蓓
-邳
-堕
-寇
-枉
-淌
-啡
-湄
-兽
-酷
-萼
-碚
-濠
-萤
-夹
-旬
-戮
-梭
-琥
-椭
-昔
-勺
-蜊
-绐
-晚
-孺
-僵
-宣
-摄
-冽
-旨
-萌
-忙
-蚤
-眉
-噼
-蟑
-付
-契
-瓜
-悼
-颡
-壁
-曾
-窕
-颢
-澎
-仿
-俑
-浑
-嵌
-浣
-乍
-碌
-褪
-乱
-蔟
-隙
-玩
-剐
-葫
-箫
-纲
-围
-伐
-决
-伙
-漩
-瑟
-刑
-肓
-镳
-缓
-蹭
-氨
-皓
-典
-畲
-坍
-铑
-檐
-塑
-洞
-倬
-储
-胴
-淳
-戾
-吐
-灼
-惺
-妙
-毕
-珐
-缈
-虱
-盖
-羰
-鸿
-磅
-谓
-髅
-娴
-苴
-唷
-蚣
-霹
-抨
-贤
-唠
-犬
-誓
-逍
-庠
-逼
-麓
-籼
-釉
-呜
-碧
-秧
-氩
-摔
-霄
-穸
-纨
-辟
-妈
-映
-完
-牛
-缴
-嗷
-炊
-恩
-荔
-茆
-掉
-紊
-慌
-莓
-羟
-阙
-萁
-磐
-另
-蕹
-辱
-鳐
-湮
-吡
-吩
-唐
-睦
-垠
-舒
-圜
-冗
-瞿
-溺
-芾
-囱
-匠
-僳
-汐
-菩
-饬
-漓
-黑
-霰
-浸
-濡
-窥
-毂
-蒡
-兢
-驻
-鹉
-芮
-诙
-迫
-雳
-厂
-忐
-臆
-猴
-鸣
-蚪
-栈
-箕
-羡
-渐
-莆
-捍
-眈
-哓
-趴
-蹼
-埕
-嚣
-骛
-宏
-淄
-斑
-噜
-严
-瑛
-垃
-椎
-诱
-压
-庾
-绞
-焘
-廿
-抡
-迄
-棘
-夫
-纬
-锹
-眨
-瞌
-侠
-脐
-竞
-瀑
-孳
-骧
-遁
-姜
-颦
-荪
-滚
-萦
-伪
-逸
-粳
-爬
-锁
-矣
-役
-趣
-洒
-颔
-诏
-逐
-奸
-甭
-惠
-攀
-蹄
-泛
-尼
-拼
-阮
-鹰
-亚
-颈
-惑
-勒
-〉
-际
-肛
-爷
-刚
-钨
-丰
-养
-冶
-鲽
-辉
-蔻
-画
-覆
-皴
-妊
-麦
-返
-醉
-皂
-擀
-〗
-酶
-凑
-粹
-悟
-诀
-硖
-港
-卜
-z
-杀
-涕
-±
-舍
-铠
-抵
-弛
-段
-敝
-镐
-奠
-拂
-轴
-跛
-袱
-e
-t
-沉
-菇
-俎
-薪
-峦
-秭
-蟹
-历
-盟
-菠
-寡
-液
-肢
-喻
-染
-裱
-悱
-抱
-氙
-赤
-捅
-猛
-跑
-氮
-谣
-仁
-尺
-辊
-窍
-烙
-衍
-架
-擦
-倏
-璐
-瑁
-币
-楞
-胖
-夔
-趸
-邛
-惴
-饕
-虔
-蝎
-§
-哉
-贝
-宽
-辫
-炮
-扩
-饲
-籽
-魏
-菟
-锰
-伍
-猝
-末
-琳
-哚
-蛎
-邂
-呀
-姿
-鄞
-却
-歧
-仙
-恸
-椐
-森
-牒
-寤
-袒
-婆
-虢
-雅
-钉
-朵
-贼
-欲
-苞
-寰
-故
-龚
-坭
-嘘
-咫
-礼
-硷
-兀
-睢
-汶
-’
-铲
-烧
-绕
-诃
-浃
-钿
-哺
-柜
-讼
-颊
-璁
-腔
-洽
-咐
-脲
-簌
-筠
-镣
-玮
-鞠
-谁
-兼
-姆
-挥
-梯
-蝴
-谘
-漕
-刷
-躏
-宦
-弼
-b
-垌
-劈
-麟
-莉
-揭
-笙
-渎
-仕
-嗤
-仓
-配
-怏
-抬
-错
-泯
-镊
-孰
-猿
-邪
-仍
-秋
-鼬
-壹
-歇
-吵
-炼
-<
-尧
-射
-柬
-廷
-胧
-霾
-凳
-隋
-肚
-浮
-梦
-祥
-株
-堵
-退
-L
-鹫
-跎
-凶
-毽
-荟
-炫
-栩
-玳
-甜
-沂
-鹿
-顽
-伯
-爹
-赔
-蛴
-徐
-匡
-欣
-狰
-缸
-雹
-蟆
-疤
-默
-沤
-啜
-痂
-衣
-禅
-w
-i
-h
-辽
-葳
-黝
-钗
-停
-沽
-棒
-馨
-颌
-肉
-吴
-硫
-悯
-劾
-娈
-马
-啧
-吊
-悌
-镑
-峭
-帆
-瀣
-涉
-咸
-疸
-滋
-泣
-翦
-拙
-癸
-钥
-蜒
-+
-尾
-庄
-凝
-泉
-婢
-渴
-谊
-乞
-陆
-锉
-糊
-鸦
-淮
-I
-B
-N
-晦
-弗
-乔
-庥
-葡
-尻
-席
-橡
-傣
-渣
-拿
-惩
-麋
-斛
-缃
-矮
-蛏
-岘
-鸽
-姐
-膏
-催
-奔
-镒
-喱
-蠡
-摧
-钯
-胤
-柠
-拐
-璋
-鸥
-卢
-荡
-倾
-^
-_
-珀
-逄
-萧
-塾
-掇
-贮
-笆
-聂
-圃
-冲
-嵬
-M
-滔
-笕
-值
-炙
-偶
-蜱
-搐
-梆
-汪
-蔬
-腑
-鸯
-蹇
-敞
-绯
-仨
-祯
-谆
-梧
-糗
-鑫
-啸
-豺
-囹
-猾
-巢
-柄
-瀛
-筑
-踌
-沭
-暗
-苁
-鱿
-蹉
-脂
-蘖
-牢
-热
-木
-吸
-溃
-宠
-序
-泞
-偿
-拜
-檩
-厚
-朐
-毗
-螳
-吞
-媚
-朽
-担
-蝗
-橘
-畴
-祈
-糟
-盱
-隼
-郜
-惜
-珠
-裨
-铵
-焙
-琚
-唯
-咚
-噪
-骊
-丫
-滢
-勤
-棉
-呸
-咣
-淀
-隔
-蕾
-窈
-饨
-挨
-煅
-短
-匙
-粕
-镜
-赣
-撕
-墩
-酬
-馁
-豌
-颐
-抗
-酣
-氓
-佑
-搁
-哭
-递
-耷
-涡
-桃
-贻
-碣
-截
-瘦
-昭
-镌
-蔓
-氚
-甲
-猕
-蕴
-蓬
-散
-拾
-纛
-狼
-猷
-铎
-埋
-旖
-矾
-讳
-囊
-糜
-迈
-粟
-蚂
-紧
-鲳
-瘢
-栽
-稼
-羊
-锄
-斟
-睁
-桥
-瓮
-蹙
-祉
-醺
-鼻
-昱
-剃
-跳
-篱
-跷
-蒜
-翎
-宅
-晖
-嗑
-壑
-峻
-癫
-屏
-狠
-陋
-袜
-途
-憎
-祀
-莹
-滟
-佶
-溥
-臣
-约
-盛
-峰
-磁
-慵
-婪
-拦
-莅
-朕
-鹦
-粲
-裤
-哎
-疡
-嫖
-琵
-窟
-堪
-谛
-嘉
-儡
-鳝
-斩
-郾
-驸
-酊
-妄
-胜
-贺
-徙
-傅
-噌
-钢
-栅
-庇
-恋
-匝
-巯
-邈
-尸
-锚
-粗
-佟
-蛟
-薹
-纵
-蚊
-郅
-绢
-锐
-苗
-俞
-篆
-淆
-膀
-鲜
-煎
-诶
-秽
-寻
-涮
-刺
-怀
-噶
-巨
-褰
-魅
-灶
-灌
-桉
-藕
-谜
-舸
-薄
-搀
-恽
-借
-牯
-痉
-渥
-愿
-亓
-耘
-杠
-柩
-锔
-蚶
-钣
-珈
-喘
-蹒
-幽
-赐
-稗
-晤
-莱
-泔
-扯
-肯
-菪
-裆
-腩
-豉
-疆
-骜
-腐
-倭
-珏
-唔
-粮
-亡
-润
-慰
-伽
-橄
-玄
-誉
-醐
-胆
-龊
-粼
-塬
-陇
-彼
-削
-嗣
-绾
-芽
-妗
-垭
-瘴
-爽
-薏
-寨
-龈
-泠
-弹
-赢
-漪
-猫
-嘧
-涂
-恤
-圭
-茧
-烽
-屑
-痕
-巾
-赖
-荸
-凰
-腮
-畈
-亵
-蹲
-偃
-苇
-澜
-艮
-换
-骺
-烘
-苕
-梓
-颉
-肇
-哗
-悄
-氤
-涠
-葬
-屠
-鹭
-植
-竺
-佯
-诣
-鲇
-瘀
-鲅
-邦
-移
-滁
-冯
-耕
-癔
-戌
-茬
-沁
-巩
-悠
-湘
-洪
-痹
-锟
-循
-谋
-腕
-鳃
-钠
-捞
-焉
-迎
-碱
-伫
-急
-榷
-奈
-邝
-卯
-辄
-皲
-卟
-醛
-畹
-忧
-稳
-雄
-昼
-缩
-阈
-睑
-扌
-耗
-曦
-涅
-捏
-瞧
-邕
-淖
-漉
-铝
-耦
-禹
-湛
-喽
-莼
-琅
-诸
-苎
-纂
-硅
-始
-嗨
-傥
-燃
-臂
-赅
-嘈
-呆
-贵
-屹
-壮
-肋
-亍
-蚀
-卅
-豹
-腆
-邬
-迭
-浊
-}
-童
-螂
-捐
-圩
-勐
-触
-寞
-汊
-壤
-荫
-膺
-渌
-芳
-懿
-遴
-螈
-泰
-蓼
-蛤
-茜
-舅
-枫
-朔
-膝
-眙
-避
-梅
-判
-鹜
-璜
-牍
-缅
-垫
-藻
-黔
-侥
-惚
-懂
-踩
-腰
-腈
-札
-丞
-唾
-慈
-顿
-摹
-荻
-琬
-~
-斧
-沈
-滂
-胁
-胀
-幄
-莜
-Z
-匀
-鄄
-掌
-绰
-茎
-焚
-赋
-萱
-谑
-汁
-铒
-瞎
-夺
-蜗
-野
-娆
-冀
-弯
-篁
-懵
-灞
-隽
-芡
-脘
-俐
-辩
-芯
-掺
-喏
-膈
-蝈
-觐
-悚
-踹
-蔗
-熠
-鼠
-呵
-抓
-橼
-峨
-畜
-缔
-禾
-崭
-弃
-熊
-摒
-凸
-拗
-穹
-蒙
-抒
-祛
-劝
-闫
-扳
-阵
-醌
-踪
-喵
-侣
-搬
-仅
-荧
-赎
-蝾
-琦
-买
-婧
-瞄
-寓
-皎
-冻
-赝
-箩
-莫
-瞰
-郊
-笫
-姝
-筒
-枪
-遣
-煸
-袋
-舆
-痱
-涛
-母
-〇
-启
-践
-耙
-绲
-盘
-遂
-昊
-搞
-槿
-诬
-纰
-泓
-惨
-檬
-亻
-越
-C
-o
-憩
-熵
-祷
-钒
-暧
-塔
-阗
-胰
-咄
-娶
-魔
-琶
-钞
-邻
-扬
-杉
-殴
-咽
-弓
-〆
-髻
-】
-吭
-揽
-霆
-拄
-殖
-脆
-彻
-岩
-芝
-勃
-辣
-剌
-钝
-嘎
-甄
-佘
-皖
-伦
-授
-徕
-憔
-挪
-皇
-庞
-稔
-芜
-踏
-溴
-兖
-卒
-擢
-饥
-鳞
-煲
-‰
-账
-颗
-叻
-斯
-捧
-鳍
-琮
-讹
-蛙
-纽
-谭
-酸
-兔
-莒
-睇
-伟
-觑
-羲
-嗜
-宜
-褐
-旎
-辛
-卦
-诘
-筋
-鎏
-溪
-挛
-熔
-阜
-晰
-鳅
-丢
-奚
-灸
-呱
-献
-陉
-黛
-鸪
-甾
-萨
-疮
-拯
-洲
-疹
-辑
-叙
-恻
-谒
-允
-柔
-烂
-氏
-逅
-漆
-拎
-惋
-扈
-湟
-纭
-啕
-掬
-擞
-哥
-忽
-涤
-鸵
-靡
-郗
-瓷
-扁
-廊
-怨
-雏
-钮
-敦
-E
-懦
-憋
-汀
-拚
-啉
-腌
-岸
-f
-痼
-瞅
-尊
-咀
-眩
-飙
-忌
-仝
-迦
-熬
-毫
-胯
-篑
-茄
-腺
-凄
-舛
-碴
-锵
-诧
-羯
-後
-漏
-汤
-宓
-仞
-蚁
-壶
-谰
-皑
-铄
-棰
-罔
-辅
-晶
-苦
-牟
-闽
-\
-烃
-饮
-聿
-丙
-蛳
-朱
-煤
-涔
-鳖
-犁
-罐
-荼
-砒
-淦
-妤
-黏
-戎
-孑
-婕
-瑾
-戢
-钵
-枣
-捋
-砥
-衩
-狙
-桠
-稣
-阎
-肃
-梏
-诫
-孪
-昶
-婊
-衫
-嗔
-侃
-塞
-蜃
-樵
-峒
-貌
-屿
-欺
-缫
-阐
-栖
-诟
-珞
-荭
-吝
-萍
-嗽
-恂
-啻
-蜴
-磬
-峋
-俸
-豫
-谎
-徊
-镍
-韬
-魇
-晴
-U
-囟
-猜
-蛮
-坐
-囿
-伴
-亭
-肝
-佗
-蝠
-妃
-胞
-滩
-榴
-氖
-垩
-苋
-砣
-扪
-馏
-姓
-轩
-厉
-夥
-侈
-禀
-垒
-岑
-赏
-钛
-辐
-痔
-披
-纸
-碳
-“
-坞
-蠓
-挤
-荥
-沅
-悔
-铧
-帼
-蒌
-蝇
-a
-p
-y
-n
-g
-哀
-浆
-瑶
-凿
-桶
-馈
-皮
-奴
-苜
-佤
-伶
-晗
-铱
-炬
-优
-弊
-氢
-恃
-甫
-攥
-端
-锌
-灰
-稹
-炝
-曙
-邋
-亥
-眶
-碾
-拉
-萝
-绔
-捷
-浍
-腋
-姑
-菖
-凌
-涞
-麽
-锢
-桨
-潢
-绎
-镰
-殆
-锑
-渝
-铬
-困
-绽
-觎
-匈
-糙
-暑
-裹
-鸟
-盔
-肽
-迷
-綦
-『
-亳
-佝
-俘
-钴
-觇
-骥
-仆
-疝
-跪
-婶
-郯
-瀹
-唉
-脖
-踞
-针
-晾
-忒
-扼
-瞩
-叛
-椒
-疟
-嗡
-邗
-肆
-跆
-玫
-忡
-捣
-咧
-唆
-艄
-蘑
-潦
-笛
-阚
-沸
-泻
-掊
-菽
-贫
-斥
-髂
-孢
-镂
-赂
-麝
-鸾
-屡
-衬
-苷
-恪
-叠
-希
-粤
-爻
-喝
-茫
-惬
-郸
-绻
-庸
-撅
-碟
-宄
-妹
-膛
-叮
-饵
-崛
-嗲
-椅
-冤
-搅
-咕
-敛
-尹
-垦
-闷
-蝉
-霎
-勰
-败
-蓑
-泸
-肤
-鹌
-幌
-焦
-浠
-鞍
-刁
-舰
-乙
-竿
-裔
-。
-茵
-函
-伊
-兄
-丨
-娜
-匍
-謇
-莪
-宥
-似
-蝽
-翳
-酪
-翠
-粑
-薇
-祢
-骏
-赠
-叫
-Q
-噤
-噻
-竖
-芗
-莠
-潭
-俊
-羿
-耜
-O
-郫
-趁
-嗪
-囚
-蹶
-芒
-洁
-笋
-鹑
-敲
-硝
-啶
-堡
-渲
-揩
-』
-携
-宿
-遒
-颍
-扭
-棱
-割
-萜
-蔸
-葵
-琴
-捂
-饰
-衙
-耿
-掠
-募
-岂
-窖
-涟
-蔺
-瘤
-柞
-瞪
-怜
-匹
-距
-楔
-炜
-哆
-秦
-缎
-幼
-茁
-绪
-痨
-恨
-楸
-娅
-瓦
-桩
-雪
-嬴
-伏
-榔
-妥
-铿
-拌
-眠
-雍
-缇
-‘
-卓
-搓
-哌
-觞
-噩
-屈
-哧
-髓
-咦
-巅
-娑
-侑
-淫
-膳
-祝
-勾
-姊
-莴
-胄
-疃
-薛
-蜷
-胛
-巷
-芙
-芋
-熙
-闰
-勿
-窃
-狱
-剩
-钏
-幢
-陟
-铛
-慧
-靴
-耍
-k
-浙
-浇
-飨
-惟
-绗
-祜
-澈
-啼
-咪
-磷
-摞
-诅
-郦
-抹
-跃
-壬
-吕
-肖
-琏
-颤
-尴
-剡
-抠
-凋
-赚
-泊
-津
-宕
-殷
-倔
-氲
-漫
-邺
-涎
-怠
-$
-垮
-荬
-遵
-俏
-叹
-噢
-饽
-蜘
-孙
-筵
-疼
-鞭
-羧
-牦
-箭
-潴
-c
-眸
-祭
-髯
-啖
-坳
-愁
-芩
-驮
-倡
-巽
-穰
-沃
-胚
-怒
-凤
-槛
-剂
-趵
-嫁
-v
-邢
-灯
-鄢
-桐
-睽
-檗
-锯
-槟
-婷
-嵋
-圻
-诗
-蕈
-颠
-遭
-痢
-芸
-怯
-馥
-竭
-锗
-徜
-恭
-遍
-籁
-剑
-嘱
-苡
-龄
-僧
-桑
-潸
-弘
-澶
-楹
-悲
-讫
-愤
-腥
-悸
-谍
-椹
-呢
-桓
-葭
-攫
-阀
-翰
-躲
-敖
-柑
-郎
-笨
-橇
-呃
-魁
-燎
-脓
-葩
-磋
-垛
-玺
-狮
-沓
-砜
-蕊
-锺
-罹
-蕉
-翱
-虐
-闾
-巫
-旦
-茱
-嬷
-枯
-鹏
-贡
-芹
-汛
-矫
-绁
-拣
-禺
-佃
-讣
-舫
-惯
-乳
-趋
-疲
-挽
-岚
-虾
-衾
-蠹
-蹂
-飓
-氦
-铖
-孩
-稞
-瑜
-壅
-掀
-勘
-妓
-畅
-髋
-W
-庐
-牲
-蓿
-榕
-练
-垣
-唱
-邸
-菲
-昆
-婺
-穿
-绡
-麒
-蚱
-掂
-愚
-泷
-涪
-漳
-妩
-娉
-榄
-讷
-觅
-旧
-藤
-煮
-呛
-柳
-腓
-叭
-庵
-烷
-阡
-罂
-蜕
-擂
-猖
-咿
-媲
-脉
-【
-沏
-貅
-黠
-熏
-哲
-烁
-坦
-酵
-兜
-×
-潇
-撒
-剽
-珩
-圹
-乾
-摸
-樟
-帽
-嗒
-襄
-魂
-轿
-憬
-锡
-〕
-喃
-皆
-咖
-隅
-脸
-残
-泮
-袂
-鹂
-珊
-囤
-捆
-咤
-误
-徨
-闹
-淙
-芊
-淋
-怆
-囗
-拨
-梳
-渤
-R
-G
-绨
-蚓
-婀
-幡
-狩
-麾
-谢
-唢
-裸
-旌
-伉
-纶
-裂
-驳
-砼
-咛
-澄
-樨
-蹈
-宙
-澍
-倍
-貔
-操
-勇
-蟠
-摈
-砧
-虬
-够
-缁
-悦
-藿
-撸
-艹
-摁
-淹
-豇
-虎
-榭
-ˉ
-吱
-d
-°
-喧
-荀
-踱
-侮
-奋
-偕
-饷
-犍
-惮
-坑
-璎
-徘
-宛
-妆
-袈
-倩
-窦
-昂
-荏
-乖
-K
-怅
-撰
-鳙
-牙
-袁
-酞
-X
-痿
-琼
-闸
-雁
-趾
-荚
-虻
-涝
-《
-杏
-韭
-偈
-烤
-绫
-鞘
-卉
-症
-遢
-蓥
-诋
-杭
-荨
-匆
-竣
-簪
-辙
-敕
-虞
-丹
-缭
-咩
-黟
-m
-淤
-瑕
-咂
-铉
-硼
-茨
-嶂
-痒
-畸
-敬
-涿
-粪
-窘
-熟
-叔
-嫔
-盾
-忱
-裘
-憾
-梵
-赡
-珙
-咯
-娘
-庙
-溯
-胺
-葱
-痪
-摊
-荷
-卞
-乒
-髦
-寐
-铭
-坩
-胗
-枷
-爆
-溟
-嚼
-羚
-砬
-轨
-惊
-挠
-罄
-竽
-菏
-氧
-浅
-楣
-盼
-枢
-炸
-阆
-杯
-谏
-噬
-淇
-渺
-俪
-秆
-墓
-泪
-跻
-砌
-痰
-垡
-渡
-耽
-釜
-讶
-鳎
-煞
-呗
-韶
-舶
-绷
-鹳
-缜
-旷
-铊
-皱
-龌
-檀
-霖
-奄
-槐
-艳
-蝶
-旋
-哝
-赶
-骞
-蚧
-腊
-盈
-丁
-`
-蜚
-矸
-蝙
-睨
-嚓
-僻
-鬼
-醴
-夜
-彝
-磊
-笔
-拔
-栀
-糕
-厦
-邰
-纫
-逭
-纤
-眦
-膊
-馍
-躇
-烯
-蘼
-冬
-诤
-暄
-骶
-哑
-瘠
-」
-臊
-丕
-愈
-咱
-螺
-擅
-跋
-搏
-硪
-谄
-笠
-淡
-嘿
-骅
-谧
-鼎
-皋
-姚
-歼
-蠢
-驼
-耳
-胬
-挝
-涯
-狗
-蒽
-孓
-犷
-凉
-芦
-箴
-铤
-孤
-嘛
-坤
-V
-茴
-朦
-挞
-尖
-橙
-诞
-搴
-碇
-洵
-浚
-帚
-蜍
-漯
-柘
-嚎
-讽
-芭
-荤
-咻
-祠
-秉
-跖
-埃
-吓
-糯
-眷
-馒
-惹
-娼
-鲑
-嫩
-讴
-轮
-瞥
-靶
-褚
-乏
-缤
-宋
-帧
-删
-驱
-碎
-扑
-俩
-俄
-偏
-涣
-竹
-噱
-皙
-佰
-渚
-唧
-斡
-#
-镉
-刀
-崎
-筐
-佣
-夭
-贰
-肴
-峙
-哔
-艿
-匐
-牺
-镛
-缘
-仡
-嫡
-劣
-枸
-堀
-梨
-簿
-鸭
-蒸
-亦
-稽
-浴
-{
-衢
-束
-槲
-j
-阁
-揍
-疥
-棋
-潋
-聪
-窜
-乓
-睛
-插
-冉
-阪
-苍
-搽
-「
-蟾
-螟
-幸
-仇
-樽
-撂
-慢
-跤
-幔
-俚
-淅
-覃
-觊
-溶
-妖
-帛
-侨
-曰
-妾
-泗
-·
-：
-瀘
-風
-Ë
-（
-）
-∶
-紅
-紗
-瑭
-雲
-頭
-鶏
-財
-許
-•
-¥
-樂
-焗
-麗
-—
-；
-滙
-東
-榮
-繪
-興
-…
-門
-業
-π
-楊
-國
-顧
-é
-盤
-寳
-Λ
-龍
-鳳
-島
-誌
-緣
-結
-銭
-萬
-勝
-祎
-璟
-優
-歡
-臨
-時
-購
-＝
-★
-藍
-昇
-鐵
-觀
-勅
-農
-聲
-畫
-兿
-術
-發
-劉
-記
-專
-耑
-園
-書
-壴
-種
-Ο
-●
-褀
-號
-銀
-匯
-敟
-锘
-葉
-橪
-廣
-進
-蒄
-鑽
-阝
-祙
-貢
-鍋
-豊
-夬
-喆
-團
-閣
-開
-燁
-賓
-館
-酡
-沔
-順
-＋
-硚
-劵
-饸
-陽
-車
-湓
-復
-萊
-氣
-軒
-華
-堃
-迮
-纟
-戶
-馬
-學
-裡
-電
-嶽
-獨
-マ
-シ
-サ
-ジ
-燘
-袪
-環
-❤
-臺
-灣
-専
-賣
-孖
-聖
-攝
-線
-▪
-α
-傢
-俬
-夢
-達
-莊
-喬
-貝
-薩
-劍
-羅
-壓
-棛
-饦
-尃
-璈
-囍
-醫
-Ｇ
-Ｉ
-Ａ
-＃
-Ｎ
-鷄
-髙
-嬰
-啓
-約
-隹
-潔
-賴
-藝
-～
-寶
-籣
-麺
-　
-嶺
-√
-義
-網
-峩
-長
-∧
-魚
-機
-構
-②
-鳯
-偉
-Ｌ
-Ｂ
-㙟
-畵
-鴿
-＇
-詩
-溝
-嚞
-屌
-藔
-佧
-玥
-蘭
-織
-１
-３
-９
-０
-７
-點
-砭
-鴨
-鋪
-銘
-廳
-弍
-‧
-創
-湯
-坶
-℃
-卩
-骝
-＆
-烜
-荘
-當
-潤
-扞
-係
-懷
-碶
-钅
-蚨
-讠
-☆
-叢
-爲
-埗
-涫
-塗
-→
-楽
-現
-鯨
-愛
-瑪
-鈺
-忄
-悶
-藥
-飾
-樓
-視
-孬
-ㆍ
-燚
-苪
-師
-①
-丼
-锽
-│
-韓
-標
-è
-兒
-閏
-匋
-張
-漢
-Ü
-髪
-會
-閑
-檔
-習
-裝
-の
-峯
-菘
-輝
-И
-雞
-釣
-億
-浐
-Ｋ
-Ｏ
-Ｒ
-８
-Ｈ
-Ｅ
-Ｐ
-Ｔ
-Ｗ
-Ｄ
-Ｓ
-Ｃ
-Ｍ
-Ｆ
-姌
-饹
-»
-晞
-廰
-ä
-嵯
-鷹
-負
-飲
-絲
-冚
-楗
-澤
-綫
-區
-❋
-←
-質
-靑
-揚
-③
-滬
-統
-産
-協
-﹑
-乸
-畐
-經
-運
-際
-洺
-岽
-為
-粵
-諾
-崋
-豐
-碁
-ɔ
-Ｖ
-２
-６
-齋
-誠
-訂
-´
-勑
-雙
-陳
-無
-í
-泩
-媄
-夌
-刂
-ｉ
-ｃ
-ｔ
-ｏ
-ｒ
-ａ
-嘢
-耄
-燴
-暃
-壽
-媽
-靈
-抻
-體
-唻
-É
-冮
-甹
-鎮
-錦
-ʌ
-蜛
-蠄
-尓
-駕
-戀
-飬
-逹
-倫
-貴
-極
-Я
-Й
-寬
-磚
-嶪
-郎
-職
-｜
-間
-ｎ
-ｄ
-剎
-伈
-課
-飛
-橋
-瘊
-№
-譜
-骓
-圗
-滘
-縣
-粿
-咅
-養
-濤
-彳
-®
-％
-Ⅱ
-啰
-㴪
-見
-矞
-薬
-糁
-邨
-鲮
-顔
-罱
-З
-選
-話
-贏
-氪
-俵
-競
-瑩
-繡
-枱
-β
-綉
-á
-獅
-爾
-™
-麵
-戋
-淩
-徳
-個
-劇
-場
-務
-簡
-寵
-ｈ
-實
-膠
-轱
-圖
-築
-嘣
-樹
-㸃
-營
-耵
-孫
-饃
-鄺
-飯
-麯
-遠
-輸
-坫
-孃
-乚
-閃
-鏢
-㎡
-題
-廠
-關
-↑
-爺
-將
-軍
-連
-篦
-覌
-參
-箸
-－
-窠
-棽
-寕
-夀
-爰
-歐
-呙
-閥
-頡
-熱
-雎
-垟
-裟
-凬
-勁
-帑
-馕
-夆
-疌
-枼
-馮
-貨
-蒤
-樸
-彧
-旸
-靜
-龢
-暢
-㐱
-鳥
-珺
-鏡
-灡
-爭
-堷
-廚
-Ó
-騰
-診
-┅
-蘇
-褔
-凱
-頂
-豕
-亞
-帥
-嘬
-⊥
-仺
-桖
-複
-饣
-絡
-穂
-顏
-棟
-納
-▏
-濟
-親
-設
-計
-攵
-埌
-烺
-ò
-頤
-燦
-蓮
-撻
-節
-講
-濱
-濃
-娽
-洳
-朿
-燈
-鈴
-護
-膚
-铔
-過
-補
-Ｚ
-Ｕ
-５
-４
-坋
-闿
-䖝
-餘
-缐
-铞
-貿
-铪
-桼
-趙
-鍊
-［
-㐂
-垚
-菓
-揸
-捲
-鐘
-滏
-𣇉
-爍
-輪
-燜
-鴻
-鮮
-動
-鹞
-鷗
-丄
-慶
-鉌
-翥
-飮
-腸
-⇋
-漁
-覺
-來
-熘
-昴
-翏
-鲱
-圧
-鄉
-萭
-頔
-爐
-嫚
-г
-貭
-類
-聯
-幛
-輕
-訓
-鑒
-夋
-锨
-芃
-珣
-䝉
-扙
-嵐
-銷
-處
-ㄱ
-語
-誘
-苝
-歸
-儀
-燒
-楿
-內
-粢
-葒
-奧
-麥
-礻
-滿
-蠔
-穵
-瞭
-態
-鱬
-榞
-硂
-鄭
-黃
-煙
-祐
-奓
-逺
-＊
-瑄
-獲
-聞
-薦
-讀
-這
-樣
-決
-問
-啟
-們
-執
-説
-轉
-單
-隨
-唘
-帶
-倉
-庫
-還
-贈
-尙
-皺
-■
-餅
-產
-○
-∈
-報
-狀
-楓
-賠
-琯
-嗮
-禮
-｀
-傳
-＞
-≤
-嗞
-Φ
-≥
-換
-咭
-∣
-↓
-曬
-ε
-応
-寫
-″
-終
-様
-純
-費
-療
-聨
-凍
-壐
-郵
-ü
-黒
-∫
-製
-塊
-調
-軽
-確
-撃
-級
-馴
-Ⅲ
-涇
-繹
-數
-碼
-證
-狒
-処
-劑
-＜
-晧
-賀
-衆
-］
-櫥
-兩
-陰
-絶
-對
-鯉
-憶
-◎
-ｐ
-ｅ
-Ｙ
-蕒
-煖
-頓
-測
-試
-鼽
-僑
-碩
-妝
-帯
-≈
-鐡
-舖
-權
-喫
-倆
-ˋ
-該
-悅
-ā
-俫
-．
-ｆ
-ｓ
-ｂ
-ｍ
-ｋ
-ｇ
-ｕ
-ｊ
-貼
-淨
-濕
-針
-適
-備
-ｌ
-／
-給
-謢
-強
-觸
-衛
-與
-⊙
-＄
-緯
-變
-⑴
-⑵
-⑶
-㎏
-殺
-∩
-幚
-─
-價
-▲
-離
-ú
-ó
-飄
-烏
-関
-閟
-﹝
-﹞
-邏
-輯
-鍵
-驗
-訣
-導
-歷
-屆
-層
-▼
-儱
-錄
-熳
-ē
-艦
-吋
-錶
-辧
-飼
-顯
-④
-禦
-販
-気
-対
-枰
-閩
-紀
-幹
-瞓
-貊
-淚
-△
-眞
-墊
-Ω
-獻
-褲
-縫
-緑
-亜
-鉅
-餠
-｛
-｝
-◆
-蘆
-薈
-█
-◇
-溫
-彈
-晳
-粧
-犸
-穩
-訊
-崬
-凖
-熥
-П
-舊
-條
-紋
-圍
-Ⅳ
-筆
-尷
-難
-雜
-錯
-綁
-識
-頰
-鎖
-艶
-□
-殁
-殼
-⑧
-├
-▕
-鵬
-ǐ
-ō
-ǒ
-糝
-綱
-▎
-μ
-盜
-饅
-醬
-籤
-蓋
-釀
-鹽
-據
-à
-ɡ
-辦
-◥
-彐
-┌
-婦
-獸
-鲩
-伱
-ī
-蒟
-蒻
-齊
-袆
-腦
-寧
-凈
-妳
-煥
-詢
-偽
-謹
-啫
-鯽
-騷
-鱸
-損
-傷
-鎻
-髮
-買
-冏
-儥
-両
-﹢
-∞
-載
-喰
-ｚ
-羙
-悵
-燙
-曉
-員
-組
-徹
-艷
-痠
-鋼
-鼙
-縮
-細
-嚒
-爯
-≠
-維
-＂
-鱻
-壇
-厍
-帰
-浥
-犇
-薡
-軎
-²
-應
-醜
-刪
-緻
-鶴
-賜
-噁
-軌
-尨
-镔
-鷺
-槗
-彌
-葚
-濛
-請
-溇
-緹
-賢
-訪
-獴
-瑅
-資
-縤
-陣
-蕟
-栢
-韻
-祼
-恁
-伢
-謝
-劃
-涑
-總
-衖
-踺
-砋
-凉
-籃
-駿
-苼
-瘋
-昽
-紡
-驊
-腎
-﹗
-響
-杋
-剛
-嚴
-禪
-歓
-槍
-傘
-檸
-檫
-炣
-勢
-鏜
-鎢
-銑
-尐
-減
-奪
-惡
-θ
-僮
-婭
-臘
-ū
-ì
-殻
-鉄
-∑
-蛲
-焼
-緖
-續
-紹
-懮
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt
deleted file mode 100644
index 09e275bae943431ae75f583b9f4519c96161eb85..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv4_doc_dict.txt
+++ /dev/null
@@ -1,15629 +0,0 @@
-'
-疗
-绚
-诚
-娇
-溜
-题
-贿
-者
-廖
-更
-纳
-加
-奉
-公
-一
-就
-汴
-计
-与
-路
-房
-原
-妇
-2
-0
-8
--
-7
-其
->
-:
-]
-,
-，
-骑
-刈
-全
-消
-昏
-傈
-安
-久
-钟
-嗅
-不
-影
-处
-驽
-蜿
-资
-关
-椤
-地
-瘸
-专
-问
-忖
-票
-嫉
-炎
-韵
-要
-月
-田
-节
-陂
-鄙
-捌
-备
-拳
-伺
-眼
-网
-盎
-大
-傍
-心
-东
-愉
-汇
-蹿
-科
-每
-业
-里
-航
-晏
-字
-平
-录
-先
-1
-3
-彤
-鲶
-产
-稍
-督
-腴
-有
-象
-岳
-注
-绍
-在
-泺
-文
-定
-核
-名
-水
-过
-理
-让
-偷
-率
-等
-这
-发
-”
-为
-含
-肥
-酉
-相
-鄱
-七
-编
-猥
-锛
-日
-镀
-蒂
-掰
-倒
-辆
-栾
-栗
-综
-涩
-州
-雌
-滑
-馀
-了
-机
-块
-司
-宰
-甙
-兴
-矽
-抚
-保
-用
-沧
-秩
-如
-收
-息
-滥
-页
-疑
-埠
-!
-！
-姥
-异
-橹
-钇
-向
-下
-跄
-的
-椴
-沫
-国
-绥
-獠
-报
-开
-民
-蜇
-何
-分
-凇
-长
-讥
-藏
-掏
-施
-羽
-中
-讲
-派
-嘟
-人
-提
-浼
-间
-世
-而
-古
-多
-倪
-唇
-饯
-控
-庚
-首
-赛
-蜓
-味
-断
-制
-觉
-技
-替
-艰
-溢
-潮
-夕
-钺
-外
-摘
-枋
-动
-双
-单
-啮
-户
-枇
-确
-锦
-曜
-杜
-或
-能
-效
-霜
-盒
-然
-侗
-电
-晁
-放
-步
-鹃
-新
-杖
-蜂
-吒
-濂
-瞬
-评
-总
-隍
-对
-独
-合
-也
-是
-府
-青
-天
-诲
-墙
-组
-滴
-级
-邀
-帘
-示
-已
-时
-骸
-仄
-泅
-和
-遨
-店
-雇
-疫
-持
-巍
-踮
-境
-只
-亨
-目
-鉴
-崤
-闲
-体
-泄
-杂
-作
-般
-轰
-化
-解
-迂
-诿
-蛭
-璀
-腾
-告
-版
-服
-省
-师
-小
-规
-程
-线
-海
-办
-引
-二
-桧
-牌
-砺
-洄
-裴
-修
-图
-痫
-胡
-许
-犊
-事
-郛
-基
-柴
-呼
-食
-研
-奶
-律
-蛋
-因
-葆
-察
-戏
-褒
-戒
-再
-李
-骁
-工
-貂
-油
-鹅
-章
-啄
-休
-场
-给
-睡
-纷
-豆
-器
-捎
-说
-敏
-学
-会
-浒
-设
-诊
-格
-廓
-查
-来
-霓
-室
-溆
-￠
-诡
-寥
-焕
-舜
-柒
-狐
-回
-戟
-砾
-厄
-实
-翩
-尿
-五
-入
-径
-惭
-喹
-股
-宇
-篝
-|
-;
-美
-期
-云
-九
-祺
-扮
-靠
-锝
-槌
-系
-企
-酰
-阊
-暂
-蚕
-忻
-豁
-本
-羹
-执
-条
-钦
-H
-獒
-限
-进
-季
-楦
-于
-芘
-玖
-铋
-茯
-未
-答
-粘
-括
-样
-精
-欠
-矢
-甥
-帷
-嵩
-扣
-令
-仔
-风
-皈
-行
-支
-部
-蓉
-刮
-站
-蜡
-救
-钊
-汗
-松
-嫌
-成
-可
-.
-鹤
-院
-从
-交
-政
-怕
-活
-调
-球
-局
-验
-髌
-第
-韫
-谗
-串
-到
-圆
-年
-米
-/
-*
-友
-忿
-检
-区
-看
-自
-敢
-刃
-个
-兹
-弄
-流
-留
-同
-没
-齿
-星
-聆
-轼
-湖
-什
-三
-建
-蛔
-儿
-椋
-汕
-震
-颧
-鲤
-跟
-力
-情
-璺
-铨
-陪
-务
-指
-族
-训
-滦
-鄣
-濮
-扒
-商
-箱
-十
-召
-慷
-辗
-所
-莞
-管
-护
-臭
-横
-硒
-嗓
-接
-侦
-六
-露
-党
-馋
-驾
-剖
-高
-侬
-妪
-幂
-猗
-绺
-骐
-央
-酐
-孝
-筝
-课
-徇
-缰
-门
-男
-西
-项
-句
-谙
-瞒
-秃
-篇
-教
-碲
-罚
-声
-呐
-景
-前
-富
-嘴
-鳌
-稀
-免
-朋
-啬
-睐
-去
-赈
-鱼
-住
-肩
-愕
-速
-旁
-波
-厅
-健
-茼
-厥
-鲟
-谅
-投
-攸
-炔
-数
-方
-击
-呋
-谈
-绩
-别
-愫
-僚
-躬
-鹧
-胪
-炳
-招
-喇
-膨
-泵
-蹦
-毛
-结
-5
-4
-谱
-识
-陕
-粽
-婚
-拟
-构
-且
-搜
-任
-潘
-比
-郢
-妨
-醪
-陀
-桔
-碘
-扎
-选
-哈
-骷
-楷
-亿
-明
-缆
-脯
-监
-睫
-逻
-婵
-共
-赴
-淝
-凡
-惦
-及
-达
-揖
-谩
-澹
-减
-焰
-蛹
-番
-祁
-柏
-员
-禄
-怡
-峤
-龙
-白
-叽
-生
-闯
-起
-细
-装
-谕
-竟
-聚
-钙
-上
-导
-渊
-按
-艾
-辘
-挡
-耒
-盹
-饪
-臀
-记
-邮
-蕙
-受
-各
-医
-搂
-普
-滇
-朗
-茸
-带
-翻
-酚
-(
-光
-堤
-墟
-蔷
-万
-幻
-〓
-瑙
-辈
-昧
-盏
-亘
-蛀
-吉
-铰
-请
-子
-假
-闻
-税
-井
-诩
-哨
-嫂
-好
-面
-琐
-校
-馊
-鬣
-缂
-营
-访
-炖
-占
-农
-缀
-否
-经
-钚
-棵
-趟
-张
-亟
-吏
-茶
-谨
-捻
-论
-迸
-堂
-玉
-信
-吧
-瞠
-乡
-姬
-寺
-咬
-溏
-苄
-皿
-意
-赉
-宝
-尔
-钰
-艺
-特
-唳
-踉
-都
-荣
-倚
-登
-荐
-丧
-奇
-涵
-批
-炭
-近
-符
-傩
-感
-道
-着
-菊
-虹
-仲
-众
-懈
-濯
-颞
-眺
-南
-释
-北
-缝
-标
-既
-茗
-整
-撼
-迤
-贲
-挎
-耱
-拒
-某
-妍
-卫
-哇
-英
-矶
-藩
-治
-他
-元
-领
-膜
-遮
-穗
-蛾
-飞
-荒
-棺
-劫
-么
-市
-火
-温
-拈
-棚
-洼
-转
-果
-奕
-卸
-迪
-伸
-泳
-斗
-邡
-侄
-涨
-屯
-萋
-胭
-氡
-崮
-枞
-惧
-冒
-彩
-斜
-手
-豚
-随
-旭
-淑
-妞
-形
-菌
-吲
-沱
-争
-驯
-歹
-挟
-兆
-柱
-传
-至
-包
-内
-响
-临
-红
-功
-弩
-衡
-寂
-禁
-老
-棍
-耆
-渍
-织
-害
-氵
-渑
-布
-载
-靥
-嗬
-虽
-苹
-咨
-娄
-库
-雉
-榜
-帜
-嘲
-套
-瑚
-亲
-簸
-欧
-边
-6
-腿
-旮
-抛
-吹
-瞳
-得
-镓
-梗
-厨
-继
-漾
-愣
-憨
-士
-策
-窑
-抑
-躯
-襟
-脏
-参
-贸
-言
-干
-绸
-鳄
-穷
-藜
-音
-折
-详
-)
-举
-悍
-甸
-癌
-黎
-谴
-死
-罩
-迁
-寒
-驷
-袖
-媒
-蒋
-掘
-模
-纠
-恣
-观
-祖
-蛆
-碍
-位
-稿
-主
-澧
-跌
-筏
-京
-锏
-帝
-贴
-证
-糠
-才
-黄
-鲸
-略
-炯
-饱
-四
-出
-园
-犀
-牧
-容
-汉
-杆
-浈
-汰
-瑷
-造
-虫
-瘩
-怪
-驴
-济
-应
-花
-沣
-谔
-夙
-旅
-价
-矿
-以
-考
-s
-u
-呦
-晒
-巡
-茅
-准
-肟
-瓴
-詹
-仟
-褂
-译
-桌
-混
-宁
-怦
-郑
-抿
-些
-余
-鄂
-饴
-攒
-珑
-群
-阖
-岔
-琨
-藓
-预
-环
-洮
-岌
-宀
-杲
-瀵
-最
-常
-囡
-周
-踊
-女
-鼓
-袭
-喉
-简
-范
-薯
-遐
-疏
-粱
-黜
-禧
-法
-箔
-斤
-遥
-汝
-奥
-直
-贞
-撑
-置
-绱
-集
-她
-馅
-逗
-钧
-橱
-魉
-[
-恙
-躁
-唤
-9
-旺
-膘
-待
-脾
-惫
-购
-吗
-依
-盲
-度
-瘿
-蠖
-俾
-之
-镗
-拇
-鲵
-厝
-簧
-续
-款
-展
-啃
-表
-剔
-品
-钻
-腭
-损
-清
-锶
-统
-涌
-寸
-滨
-贪
-链
-吠
-冈
-伎
-迥
-咏
-吁
-览
-防
-迅
-失
-汾
-阔
-逵
-绀
-蔑
-列
-川
-凭
-努
-熨
-揪
-利
-俱
-绉
-抢
-鸨
-我
-即
-责
-膦
-易
-毓
-鹊
-刹
-玷
-岿
-空
-嘞
-绊
-排
-术
-估
-锷
-违
-们
-苟
-铜
-播
-肘
-件
-烫
-审
-鲂
-广
-像
-铌
-惰
-铟
-巳
-胍
-鲍
-康
-憧
-色
-恢
-想
-拷
-尤
-疳
-知
-S
-Y
-F
-D
-A
-峄
-裕
-帮
-握
-搔
-氐
-氘
-难
-墒
-沮
-雨
-叁
-缥
-悴
-藐
-湫
-娟
-苑
-稠
-颛
-簇
-后
-阕
-闭
-蕤
-缚
-怎
-佞
-码
-嘤
-蔡
-痊
-舱
-螯
-帕
-赫
-昵
-升
-烬
-岫
-、
-疵
-蜻
-髁
-蕨
-隶
-烛
-械
-丑
-盂
-梁
-强
-鲛
-由
-拘
-揉
-劭
-龟
-撤
-钩
-呕
-孛
-费
-妻
-漂
-求
-阑
-崖
-秤
-甘
-通
-深
-补
-赃
-坎
-床
-啪
-承
-吼
-量
-暇
-钼
-烨
-阂
-擎
-脱
-逮
-称
-P
-神
-属
-矗
-华
-届
-狍
-葑
-汹
-育
-患
-窒
-蛰
-佼
-静
-槎
-运
-鳗
-庆
-逝
-曼
-疱
-克
-代
-官
-此
-麸
-耧
-蚌
-晟
-例
-础
-榛
-副
-测
-唰
-缢
-迹
-灬
-霁
-身
-岁
-赭
-扛
-又
-菡
-乜
-雾
-板
-读
-陷
-徉
-贯
-郁
-虑
-变
-钓
-菜
-圾
-现
-琢
-式
-乐
-维
-渔
-浜
-左
-吾
-脑
-钡
-警
-T
-啵
-拴
-偌
-漱
-湿
-硕
-止
-骼
-魄
-积
-燥
-联
-踢
-玛
-则
-窿
-见
-振
-畿
-送
-班
-钽
-您
-赵
-刨
-印
-讨
-踝
-籍
-谡
-舌
-崧
-汽
-蔽
-沪
-酥
-绒
-怖
-财
-帖
-肱
-私
-莎
-勋
-羔
-霸
-励
-哼
-帐
-将
-帅
-渠
-纪
-婴
-娩
-岭
-厘
-滕
-吻
-伤
-坝
-冠
-戊
-隆
-瘁
-介
-涧
-物
-黍
-并
-姗
-奢
-蹑
-掣
-垸
-锴
-命
-箍
-捉
-病
-辖
-琰
-眭
-迩
-艘
-绌
-繁
-寅
-若
-毋
-思
-诉
-类
-诈
-燮
-轲
-酮
-狂
-重
-反
-职
-筱
-县
-委
-磕
-绣
-奖
-晋
-濉
-志
-徽
-肠
-呈
-獐
-坻
-口
-片
-碰
-几
-村
-柿
-劳
-料
-获
-亩
-惕
-晕
-厌
-号
-罢
-池
-正
-鏖
-煨
-家
-棕
-复
-尝
-懋
-蜥
-锅
-岛
-扰
-队
-坠
-瘾
-钬
-@
-卧
-疣
-镇
-譬
-冰
-彷
-频
-黯
-据
-垄
-采
-八
-缪
-瘫
-型
-熹
-砰
-楠
-襁
-箐
-但
-嘶
-绳
-啤
-拍
-盥
-穆
-傲
-洗
-盯
-塘
-怔
-筛
-丿
-台
-恒
-喂
-葛
-永
-￥
-烟
-酒
-桦
-书
-砂
-蚝
-缉
-态
-瀚
-袄
-圳
-轻
-蛛
-超
-榧
-遛
-姒
-奘
-铮
-右
-荽
-望
-偻
-卡
-丶
-氰
-附
-做
-革
-索
-戚
-坨
-桷
-唁
-垅
-榻
-岐
-偎
-坛
-莨
-山
-殊
-微
-骇
-陈
-爨
-推
-嗝
-驹
-澡
-藁
-呤
-卤
-嘻
-糅
-逛
-侵
-郓
-酌
-德
-摇
-※
-鬃
-被
-慨
-殡
-羸
-昌
-泡
-戛
-鞋
-河
-宪
-沿
-玲
-鲨
-翅
-哽
-源
-铅
-语
-照
-邯
-址
-荃
-佬
-顺
-鸳
-町
-霭
-睾
-瓢
-夸
-椁
-晓
-酿
-痈
-咔
-侏
-券
-噎
-湍
-签
-嚷
-离
-午
-尚
-社
-锤
-背
-孟
-使
-浪
-缦
-潍
-鞅
-军
-姹
-驶
-笑
-鳟
-鲁
-》
-孽
-钜
-绿
-洱
-礴
-焯
-椰
-颖
-囔
-乌
-孔
-巴
-互
-性
-椽
-哞
-聘
-昨
-早
-暮
-胶
-炀
-隧
-低
-彗
-昝
-铁
-呓
-氽
-藉
-喔
-癖
-瑗
-姨
-权
-胱
-韦
-堑
-蜜
-酋
-楝
-砝
-毁
-靓
-歙
-锲
-究
-屋
-喳
-骨
-辨
-碑
-武
-鸠
-宫
-辜
-烊
-适
-坡
-殃
-培
-佩
-供
-走
-蜈
-迟
-翼
-况
-姣
-凛
-浔
-吃
-飘
-债
-犟
-金
-促
-苛
-崇
-坂
-莳
-畔
-绂
-兵
-蠕
-斋
-根
-砍
-亢
-欢
-恬
-崔
-剁
-餐
-榫
-快
-扶
-‖
-濒
-缠
-鳜
-当
-彭
-驭
-浦
-篮
-昀
-锆
-秸
-钳
-弋
-娣
-瞑
-夷
-龛
-苫
-拱
-致
-%
-嵊
-障
-隐
-弑
-初
-娓
-抉
-汩
-累
-蓖
-"
-唬
-助
-苓
-昙
-押
-毙
-破
-城
-郧
-逢
-嚏
-獭
-瞻
-溱
-婿
-赊
-跨
-恼
-璧
-萃
-姻
-貉
-灵
-炉
-密
-氛
-陶
-砸
-谬
-衔
-点
-琛
-沛
-枳
-层
-岱
-诺
-脍
-榈
-埂
-征
-冷
-裁
-打
-蹴
-素
-瘘
-逞
-蛐
-聊
-激
-腱
-萘
-踵
-飒
-蓟
-吆
-取
-咙
-簋
-涓
-矩
-曝
-挺
-揣
-座
-你
-史
-舵
-焱
-尘
-苏
-笈
-脚
-溉
-榨
-诵
-樊
-邓
-焊
-义
-庶
-儋
-蟋
-蒲
-赦
-呷
-杞
-诠
-豪
-还
-试
-颓
-茉
-太
-除
-紫
-逃
-痴
-草
-充
-鳕
-珉
-祗
-墨
-渭
-烩
-蘸
-慕
-璇
-镶
-穴
-嵘
-恶
-骂
-险
-绋
-幕
-碉
-肺
-戳
-刘
-潞
-秣
-纾
-潜
-銮
-洛
-须
-罘
-销
-瘪
-汞
-兮
-屉
-r
-林
-厕
-质
-探
-划
-狸
-殚
-善
-煊
-烹
-〒
-锈
-逯
-宸
-辍
-泱
-柚
-袍
-远
-蹋
-嶙
-绝
-峥
-娥
-缍
-雀
-徵
-认
-镱
-谷
-=
-贩
-勉
-撩
-鄯
-斐
-洋
-非
-祚
-泾
-诒
-饿
-撬
-威
-晷
-搭
-芍
-锥
-笺
-蓦
-候
-琊
-档
-礁
-沼
-卵
-荠
-忑
-朝
-凹
-瑞
-头
-仪
-弧
-孵
-畏
-铆
-突
-衲
-车
-浩
-气
-茂
-悖
-厢
-枕
-酝
-戴
-湾
-邹
-飚
-攘
-锂
-写
-宵
-翁
-岷
-无
-喜
-丈
-挑
-嗟
-绛
-殉
-议
-槽
-具
-醇
-淞
-笃
-郴
-阅
-饼
-底
-壕
-砚
-弈
-询
-缕
-庹
-翟
-零
-筷
-暨
-舟
-闺
-甯
-撞
-麂
-茌
-蔼
-很
-珲
-捕
-棠
-角
-阉
-媛
-娲
-诽
-剿
-尉
-爵
-睬
-韩
-诰
-匣
-危
-糍
-镯
-立
-浏
-阳
-少
-盆
-舔
-擘
-匪
-申
-尬
-铣
-旯
-抖
-赘
-瓯
-居
-ˇ
-哮
-游
-锭
-茏
-歌
-坏
-甚
-秒
-舞
-沙
-仗
-劲
-潺
-阿
-燧
-郭
-嗖
-霏
-忠
-材
-奂
-耐
-跺
-砀
-输
-岖
-媳
-氟
-极
-摆
-灿
-今
-扔
-腻
-枝
-奎
-药
-熄
-吨
-话
-q
-额
-慑
-嘌
-协
-喀
-壳
-埭
-视
-著
-於
-愧
-陲
-翌
-峁
-颅
-佛
-腹
-聋
-侯
-咎
-叟
-秀
-颇
-存
-较
-罪
-哄
-岗
-扫
-栏
-钾
-羌
-己
-璨
-枭
-霉
-煌
-涸
-衿
-键
-镝
-益
-岢
-奏
-连
-夯
-睿
-冥
-均
-糖
-狞
-蹊
-稻
-爸
-刿
-胥
-煜
-丽
-肿
-璃
-掸
-跚
-灾
-垂
-樾
-濑
-乎
-莲
-窄
-犹
-撮
-战
-馄
-软
-络
-显
-鸢
-胸
-宾
-妲
-恕
-埔
-蝌
-份
-遇
-巧
-瞟
-粒
-恰
-剥
-桡
-博
-讯
-凯
-堇
-阶
-滤
-卖
-斌
-骚
-彬
-兑
-磺
-樱
-舷
-两
-娱
-福
-仃
-差
-找
-桁
-÷
-净
-把
-阴
-污
-戬
-雷
-碓
-蕲
-楚
-罡
-焖
-抽
-妫
-咒
-仑
-闱
-尽
-邑
-菁
-爱
-贷
-沥
-鞑
-牡
-嗉
-崴
-骤
-塌
-嗦
-订
-拮
-滓
-捡
-锻
-次
-坪
-杩
-臃
-箬
-融
-珂
-鹗
-宗
-枚
-降
-鸬
-妯
-阄
-堰
-盐
-毅
-必
-杨
-崃
-俺
-甬
-状
-莘
-货
-耸
-菱
-腼
-铸
-唏
-痤
-孚
-澳
-懒
-溅
-翘
-疙
-杷
-淼
-缙
-骰
-喊
-悉
-砻
-坷
-艇
-赁
-界
-谤
-纣
-宴
-晃
-茹
-归
-饭
-梢
-铡
-街
-抄
-肼
-鬟
-苯
-颂
-撷
-戈
-炒
-咆
-茭
-瘙
-负
-仰
-客
-琉
-铢
-封
-卑
-珥
-椿
-镧
-窨
-鬲
-寿
-御
-袤
-铃
-萎
-砖
-餮
-脒
-裳
-肪
-孕
-嫣
-馗
-嵇
-恳
-氯
-江
-石
-褶
-冢
-祸
-阻
-狈
-羞
-银
-靳
-透
-咳
-叼
-敷
-芷
-啥
-它
-瓤
-兰
-痘
-懊
-逑
-肌
-往
-捺
-坊
-甩
-呻
-〃
-沦
-忘
-膻
-祟
-菅
-剧
-崆
-智
-坯
-臧
-霍
-墅
-攻
-眯
-倘
-拢
-骠
-铐
-庭
-岙
-瓠
-′
-缺
-泥
-迢
-捶
-?
-？
-郏
-喙
-掷
-沌
-纯
-秘
-种
-听
-绘
-固
-螨
-团
-香
-盗
-妒
-埚
-蓝
-拖
-旱
-荞
-铀
-血
-遏
-汲
-辰
-叩
-拽
-幅
-硬
-惶
-桀
-漠
-措
-泼
-唑
-齐
-肾
-念
-酱
-虚
-屁
-耶
-旗
-砦
-闵
-婉
-馆
-拭
-绅
-韧
-忏
-窝
-醋
-葺
-顾
-辞
-倜
-堆
-辋
-逆
-玟
-贱
-疾
-董
-惘
-倌
-锕
-淘
-嘀
-莽
-俭
-笏
-绑
-鲷
-杈
-择
-蟀
-粥
-嗯
-驰
-逾
-案
-谪
-褓
-胫
-哩
-昕
-颚
-鲢
-绠
-躺
-鹄
-崂
-儒
-俨
-丝
-尕
-泌
-啊
-萸
-彰
-幺
-吟
-骄
-苣
-弦
-脊
-瑰
-〈
-诛
-镁
-析
-闪
-剪
-侧
-哟
-框
-螃
-守
-嬗
-燕
-狭
-铈
-缮
-概
-迳
-痧
-鲲
-俯
-售
-笼
-痣
-扉
-挖
-满
-咋
-援
-邱
-扇
-歪
-便
-玑
-绦
-峡
-蛇
-叨
-〖
-泽
-胃
-斓
-喋
-怂
-坟
-猪
-该
-蚬
-炕
-弥
-赞
-棣
-晔
-娠
-挲
-狡
-创
-疖
-铕
-镭
-稷
-挫
-弭
-啾
-翔
-粉
-履
-苘
-哦
-楼
-秕
-铂
-土
-锣
-瘟
-挣
-栉
-习
-享
-桢
-袅
-磨
-桂
-谦
-延
-坚
-蔚
-噗
-署
-谟
-猬
-钎
-恐
-嬉
-雒
-倦
-衅
-亏
-璩
-睹
-刻
-殿
-王
-算
-雕
-麻
-丘
-柯
-骆
-丸
-塍
-谚
-添
-鲈
-垓
-桎
-蚯
-芥
-予
-飕
-镦
-谌
-窗
-醚
-菀
-亮
-搪
-莺
-蒿
-羁
-足
-J
-真
-轶
-悬
-衷
-靛
-翊
-掩
-哒
-炅
-掐
-冼
-妮
-l
-谐
-稚
-荆
-擒
-犯
-陵
-虏
-浓
-崽
-刍
-陌
-傻
-孜
-千
-靖
-演
-矜
-钕
-煽
-杰
-酗
-渗
-伞
-栋
-俗
-泫
-戍
-罕
-沾
-疽
-灏
-煦
-芬
-磴
-叱
-阱
-榉
-湃
-蜀
-叉
-醒
-彪
-租
-郡
-篷
-屎
-良
-垢
-隗
-弱
-陨
-峪
-砷
-掴
-颁
-胎
-雯
-绵
-贬
-沐
-撵
-隘
-篙
-暖
-曹
-陡
-栓
-填
-臼
-彦
-瓶
-琪
-潼
-哪
-鸡
-摩
-啦
-俟
-锋
-域
-耻
-蔫
-疯
-纹
-撇
-毒
-绶
-痛
-酯
-忍
-爪
-赳
-歆
-嘹
-辕
-烈
-册
-朴
-钱
-吮
-毯
-癜
-娃
-谀
-邵
-厮
-炽
-璞
-邃
-丐
-追
-词
-瓒
-忆
-轧
-芫
-谯
-喷
-弟
-半
-冕
-裙
-掖
-墉
-绮
-寝
-苔
-势
-顷
-褥
-切
-衮
-君
-佳
-嫒
-蚩
-霞
-佚
-洙
-逊
-镖
-暹
-唛
-&
-殒
-顶
-碗
-獗
-轭
-铺
-蛊
-废
-恹
-汨
-崩
-珍
-那
-杵
-曲
-纺
-夏
-薰
-傀
-闳
-淬
-姘
-舀
-拧
-卷
-楂
-恍
-讪
-厩
-寮
-篪
-赓
-乘
-灭
-盅
-鞣
-沟
-慎
-挂
-饺
-鼾
-杳
-树
-缨
-丛
-絮
-娌
-臻
-嗳
-篡
-侩
-述
-衰
-矛
-圈
-蚜
-匕
-筹
-匿
-濞
-晨
-叶
-骋
-郝
-挚
-蚴
-滞
-增
-侍
-描
-瓣
-吖
-嫦
-蟒
-匾
-圣
-赌
-毡
-癞
-恺
-百
-曳
-需
-篓
-肮
-庖
-帏
-卿
-驿
-遗
-蹬
-鬓
-骡
-歉
-芎
-胳
-屐
-禽
-烦
-晌
-寄
-媾
-狄
-翡
-苒
-船
-廉
-终
-痞
-殇
-々
-畦
-饶
-改
-拆
-悻
-萄
-￡
-瓿
-乃
-訾
-桅
-匮
-溧
-拥
-纱
-铍
-骗
-蕃
-龋
-缬
-父
-佐
-疚
-栎
-醍
-掳
-蓄
-x
-惆
-颜
-鲆
-榆
-〔
-猎
-敌
-暴
-谥
-鲫
-贾
-罗
-玻
-缄
-扦
-芪
-癣
-落
-徒
-臾
-恿
-猩
-托
-邴
-肄
-牵
-春
-陛
-耀
-刊
-拓
-蓓
-邳
-堕
-寇
-枉
-淌
-啡
-湄
-兽
-酷
-萼
-碚
-濠
-萤
-夹
-旬
-戮
-梭
-琥
-椭
-昔
-勺
-蜊
-绐
-晚
-孺
-僵
-宣
-摄
-冽
-旨
-萌
-忙
-蚤
-眉
-噼
-蟑
-付
-契
-瓜
-悼
-颡
-壁
-曾
-窕
-颢
-澎
-仿
-俑
-浑
-嵌
-浣
-乍
-碌
-褪
-乱
-蔟
-隙
-玩
-剐
-葫
-箫
-纲
-围
-伐
-决
-伙
-漩
-瑟
-刑
-肓
-镳
-缓
-蹭
-氨
-皓
-典
-畲
-坍
-铑
-檐
-塑
-洞
-倬
-储
-胴
-淳
-戾
-吐
-灼
-惺
-妙
-毕
-珐
-缈
-虱
-盖
-羰
-鸿
-磅
-谓
-髅
-娴
-苴
-唷
-蚣
-霹
-抨
-贤
-唠
-犬
-誓
-逍
-庠
-逼
-麓
-籼
-釉
-呜
-碧
-秧
-氩
-摔
-霄
-穸
-纨
-辟
-妈
-映
-完
-牛
-缴
-嗷
-炊
-恩
-荔
-茆
-掉
-紊
-慌
-莓
-羟
-阙
-萁
-磐
-另
-蕹
-辱
-鳐
-湮
-吡
-吩
-唐
-睦
-垠
-舒
-圜
-冗
-瞿
-溺
-芾
-囱
-匠
-僳
-汐
-菩
-饬
-漓
-黑
-霰
-浸
-濡
-窥
-毂
-蒡
-兢
-驻
-鹉
-芮
-诙
-迫
-雳
-厂
-忐
-臆
-猴
-鸣
-蚪
-栈
-箕
-羡
-渐
-莆
-捍
-眈
-哓
-趴
-蹼
-埕
-嚣
-骛
-宏
-淄
-斑
-噜
-严
-瑛
-垃
-椎
-诱
-压
-庾
-绞
-焘
-廿
-抡
-迄
-棘
-夫
-纬
-锹
-眨
-瞌
-侠
-脐
-竞
-瀑
-孳
-骧
-遁
-姜
-颦
-荪
-滚
-萦
-伪
-逸
-粳
-爬
-锁
-矣
-役
-趣
-洒
-颔
-诏
-逐
-奸
-甭
-惠
-攀
-蹄
-泛
-尼
-拼
-阮
-鹰
-亚
-颈
-惑
-勒
-〉
-际
-肛
-爷
-刚
-钨
-丰
-养
-冶
-鲽
-辉
-蔻
-画
-覆
-皴
-妊
-麦
-返
-醉
-皂
-擀
-〗
-酶
-凑
-粹
-悟
-诀
-硖
-港
-卜
-z
-杀
-涕
-±
-舍
-铠
-抵
-弛
-段
-敝
-镐
-奠
-拂
-轴
-跛
-袱
-e
-t
-沉
-菇
-俎
-薪
-峦
-秭
-蟹
-历
-盟
-菠
-寡
-液
-肢
-喻
-染
-裱
-悱
-抱
-氙
-赤
-捅
-猛
-跑
-氮
-谣
-仁
-尺
-辊
-窍
-烙
-衍
-架
-擦
-倏
-璐
-瑁
-币
-楞
-胖
-夔
-趸
-邛
-惴
-饕
-虔
-蝎
-§
-哉
-贝
-宽
-辫
-炮
-扩
-饲
-籽
-魏
-菟
-锰
-伍
-猝
-末
-琳
-哚
-蛎
-邂
-呀
-姿
-鄞
-却
-歧
-仙
-恸
-椐
-森
-牒
-寤
-袒
-婆
-虢
-雅
-钉
-朵
-贼
-欲
-苞
-寰
-故
-龚
-坭
-嘘
-咫
-礼
-硷
-兀
-睢
-汶
-’
-铲
-烧
-绕
-诃
-浃
-钿
-哺
-柜
-讼
-颊
-璁
-腔
-洽
-咐
-脲
-簌
-筠
-镣
-玮
-鞠
-谁
-兼
-姆
-挥
-梯
-蝴
-谘
-漕
-刷
-躏
-宦
-弼
-b
-垌
-劈
-麟
-莉
-揭
-笙
-渎
-仕
-嗤
-仓
-配
-怏
-抬
-错
-泯
-镊
-孰
-猿
-邪
-仍
-秋
-鼬
-壹
-歇
-吵
-炼
-<
-尧
-射
-柬
-廷
-胧
-霾
-凳
-隋
-肚
-浮
-梦
-祥
-株
-堵
-退
-L
-鹫
-跎
-凶
-毽
-荟
-炫
-栩
-玳
-甜
-沂
-鹿
-顽
-伯
-爹
-赔
-蛴
-徐
-匡
-欣
-狰
-缸
-雹
-蟆
-疤
-默
-沤
-啜
-痂
-衣
-禅
-w
-i
-h
-辽
-葳
-黝
-钗
-停
-沽
-棒
-馨
-颌
-肉
-吴
-硫
-悯
-劾
-娈
-马
-啧
-吊
-悌
-镑
-峭
-帆
-瀣
-涉
-咸
-疸
-滋
-泣
-翦
-拙
-癸
-钥
-蜒
-+
-尾
-庄
-凝
-泉
-婢
-渴
-谊
-乞
-陆
-锉
-糊
-鸦
-淮
-I
-B
-N
-晦
-弗
-乔
-庥
-葡
-尻
-席
-橡
-傣
-渣
-拿
-惩
-麋
-斛
-缃
-矮
-蛏
-岘
-鸽
-姐
-膏
-催
-奔
-镒
-喱
-蠡
-摧
-钯
-胤
-柠
-拐
-璋
-鸥
-卢
-荡
-倾
-^
-_
-珀
-逄
-萧
-塾
-掇
-贮
-笆
-聂
-圃
-冲
-嵬
-M
-滔
-笕
-值
-炙
-偶
-蜱
-搐
-梆
-汪
-蔬
-腑
-鸯
-蹇
-敞
-绯
-仨
-祯
-谆
-梧
-糗
-鑫
-啸
-豺
-囹
-猾
-巢
-柄
-瀛
-筑
-踌
-沭
-暗
-苁
-鱿
-蹉
-脂
-蘖
-牢
-热
-木
-吸
-溃
-宠
-序
-泞
-偿
-拜
-檩
-厚
-朐
-毗
-螳
-吞
-媚
-朽
-担
-蝗
-橘
-畴
-祈
-糟
-盱
-隼
-郜
-惜
-珠
-裨
-铵
-焙
-琚
-唯
-咚
-噪
-骊
-丫
-滢
-勤
-棉
-呸
-咣
-淀
-隔
-蕾
-窈
-饨
-挨
-煅
-短
-匙
-粕
-镜
-赣
-撕
-墩
-酬
-馁
-豌
-颐
-抗
-酣
-氓
-佑
-搁
-哭
-递
-耷
-涡
-桃
-贻
-碣
-截
-瘦
-昭
-镌
-蔓
-氚
-甲
-猕
-蕴
-蓬
-散
-拾
-纛
-狼
-猷
-铎
-埋
-旖
-矾
-讳
-囊
-糜
-迈
-粟
-蚂
-紧
-鲳
-瘢
-栽
-稼
-羊
-锄
-斟
-睁
-桥
-瓮
-蹙
-祉
-醺
-鼻
-昱
-剃
-跳
-篱
-跷
-蒜
-翎
-宅
-晖
-嗑
-壑
-峻
-癫
-屏
-狠
-陋
-袜
-途
-憎
-祀
-莹
-滟
-佶
-溥
-臣
-约
-盛
-峰
-磁
-慵
-婪
-拦
-莅
-朕
-鹦
-粲
-裤
-哎
-疡
-嫖
-琵
-窟
-堪
-谛
-嘉
-儡
-鳝
-斩
-郾
-驸
-酊
-妄
-胜
-贺
-徙
-傅
-噌
-钢
-栅
-庇
-恋
-匝
-巯
-邈
-尸
-锚
-粗
-佟
-蛟
-薹
-纵
-蚊
-郅
-绢
-锐
-苗
-俞
-篆
-淆
-膀
-鲜
-煎
-诶
-秽
-寻
-涮
-刺
-怀
-噶
-巨
-褰
-魅
-灶
-灌
-桉
-藕
-谜
-舸
-薄
-搀
-恽
-借
-牯
-痉
-渥
-愿
-亓
-耘
-杠
-柩
-锔
-蚶
-钣
-珈
-喘
-蹒
-幽
-赐
-稗
-晤
-莱
-泔
-扯
-肯
-菪
-裆
-腩
-豉
-疆
-骜
-腐
-倭
-珏
-唔
-粮
-亡
-润
-慰
-伽
-橄
-玄
-誉
-醐
-胆
-龊
-粼
-塬
-陇
-彼
-削
-嗣
-绾
-芽
-妗
-垭
-瘴
-爽
-薏
-寨
-龈
-泠
-弹
-赢
-漪
-猫
-嘧
-涂
-恤
-圭
-茧
-烽
-屑
-痕
-巾
-赖
-荸
-凰
-腮
-畈
-亵
-蹲
-偃
-苇
-澜
-艮
-换
-骺
-烘
-苕
-梓
-颉
-肇
-哗
-悄
-氤
-涠
-葬
-屠
-鹭
-植
-竺
-佯
-诣
-鲇
-瘀
-鲅
-邦
-移
-滁
-冯
-耕
-癔
-戌
-茬
-沁
-巩
-悠
-湘
-洪
-痹
-锟
-循
-谋
-腕
-鳃
-钠
-捞
-焉
-迎
-碱
-伫
-急
-榷
-奈
-邝
-卯
-辄
-皲
-卟
-醛
-畹
-忧
-稳
-雄
-昼
-缩
-阈
-睑
-扌
-耗
-曦
-涅
-捏
-瞧
-邕
-淖
-漉
-铝
-耦
-禹
-湛
-喽
-莼
-琅
-诸
-苎
-纂
-硅
-始
-嗨
-傥
-燃
-臂
-赅
-嘈
-呆
-贵
-屹
-壮
-肋
-亍
-蚀
-卅
-豹
-腆
-邬
-迭
-浊
-}
-童
-螂
-捐
-圩
-勐
-触
-寞
-汊
-壤
-荫
-膺
-渌
-芳
-懿
-遴
-螈
-泰
-蓼
-蛤
-茜
-舅
-枫
-朔
-膝
-眙
-避
-梅
-判
-鹜
-璜
-牍
-缅
-垫
-藻
-黔
-侥
-惚
-懂
-踩
-腰
-腈
-札
-丞
-唾
-慈
-顿
-摹
-荻
-琬
-~
-斧
-沈
-滂
-胁
-胀
-幄
-莜
-Z
-匀
-鄄
-掌
-绰
-茎
-焚
-赋
-萱
-谑
-汁
-铒
-瞎
-夺
-蜗
-野
-娆
-冀
-弯
-篁
-懵
-灞
-隽
-芡
-脘
-俐
-辩
-芯
-掺
-喏
-膈
-蝈
-觐
-悚
-踹
-蔗
-熠
-鼠
-呵
-抓
-橼
-峨
-畜
-缔
-禾
-崭
-弃
-熊
-摒
-凸
-拗
-穹
-蒙
-抒
-祛
-劝
-闫
-扳
-阵
-醌
-踪
-喵
-侣
-搬
-仅
-荧
-赎
-蝾
-琦
-买
-婧
-瞄
-寓
-皎
-冻
-赝
-箩
-莫
-瞰
-郊
-笫
-姝
-筒
-枪
-遣
-煸
-袋
-舆
-痱
-涛
-母
-〇
-启
-践
-耙
-绲
-盘
-遂
-昊
-搞
-槿
-诬
-纰
-泓
-惨
-檬
-亻
-越
-C
-o
-憩
-熵
-祷
-钒
-暧
-塔
-阗
-胰
-咄
-娶
-魔
-琶
-钞
-邻
-扬
-杉
-殴
-咽
-弓
-〆
-髻
-】
-吭
-揽
-霆
-拄
-殖
-脆
-彻
-岩
-芝
-勃
-辣
-剌
-钝
-嘎
-甄
-佘
-皖
-伦
-授
-徕
-憔
-挪
-皇
-庞
-稔
-芜
-踏
-溴
-兖
-卒
-擢
-饥
-鳞
-煲
-‰
-账
-颗
-叻
-斯
-捧
-鳍
-琮
-讹
-蛙
-纽
-谭
-酸
-兔
-莒
-睇
-伟
-觑
-羲
-嗜
-宜
-褐
-旎
-辛
-卦
-诘
-筋
-鎏
-溪
-挛
-熔
-阜
-晰
-鳅
-丢
-奚
-灸
-呱
-献
-陉
-黛
-鸪
-甾
-萨
-疮
-拯
-洲
-疹
-辑
-叙
-恻
-谒
-允
-柔
-烂
-氏
-逅
-漆
-拎
-惋
-扈
-湟
-纭
-啕
-掬
-擞
-哥
-忽
-涤
-鸵
-靡
-郗
-瓷
-扁
-廊
-怨
-雏
-钮
-敦
-E
-懦
-憋
-汀
-拚
-啉
-腌
-岸
-f
-痼
-瞅
-尊
-咀
-眩
-飙
-忌
-仝
-迦
-熬
-毫
-胯
-篑
-茄
-腺
-凄
-舛
-碴
-锵
-诧
-羯
-後
-漏
-汤
-宓
-仞
-蚁
-壶
-谰
-皑
-铄
-棰
-罔
-辅
-晶
-苦
-牟
-闽
-\
-烃
-饮
-聿
-丙
-蛳
-朱
-煤
-涔
-鳖
-犁
-罐
-荼
-砒
-淦
-妤
-黏
-戎
-孑
-婕
-瑾
-戢
-钵
-枣
-捋
-砥
-衩
-狙
-桠
-稣
-阎
-肃
-梏
-诫
-孪
-昶
-婊
-衫
-嗔
-侃
-塞
-蜃
-樵
-峒
-貌
-屿
-欺
-缫
-阐
-栖
-诟
-珞
-荭
-吝
-萍
-嗽
-恂
-啻
-蜴
-磬
-峋
-俸
-豫
-谎
-徊
-镍
-韬
-魇
-晴
-U
-囟
-猜
-蛮
-坐
-囿
-伴
-亭
-肝
-佗
-蝠
-妃
-胞
-滩
-榴
-氖
-垩
-苋
-砣
-扪
-馏
-姓
-轩
-厉
-夥
-侈
-禀
-垒
-岑
-赏
-钛
-辐
-痔
-披
-纸
-碳
-“
-坞
-蠓
-挤
-荥
-沅
-悔
-铧
-帼
-蒌
-蝇
-a
-p
-y
-n
-g
-哀
-浆
-瑶
-凿
-桶
-馈
-皮
-奴
-苜
-佤
-伶
-晗
-铱
-炬
-优
-弊
-氢
-恃
-甫
-攥
-端
-锌
-灰
-稹
-炝
-曙
-邋
-亥
-眶
-碾
-拉
-萝
-绔
-捷
-浍
-腋
-姑
-菖
-凌
-涞
-麽
-锢
-桨
-潢
-绎
-镰
-殆
-锑
-渝
-铬
-困
-绽
-觎
-匈
-糙
-暑
-裹
-鸟
-盔
-肽
-迷
-綦
-『
-亳
-佝
-俘
-钴
-觇
-骥
-仆
-疝
-跪
-婶
-郯
-瀹
-唉
-脖
-踞
-针
-晾
-忒
-扼
-瞩
-叛
-椒
-疟
-嗡
-邗
-肆
-跆
-玫
-忡
-捣
-咧
-唆
-艄
-蘑
-潦
-笛
-阚
-沸
-泻
-掊
-菽
-贫
-斥
-髂
-孢
-镂
-赂
-麝
-鸾
-屡
-衬
-苷
-恪
-叠
-希
-粤
-爻
-喝
-茫
-惬
-郸
-绻
-庸
-撅
-碟
-宄
-妹
-膛
-叮
-饵
-崛
-嗲
-椅
-冤
-搅
-咕
-敛
-尹
-垦
-闷
-蝉
-霎
-勰
-败
-蓑
-泸
-肤
-鹌
-幌
-焦
-浠
-鞍
-刁
-舰
-乙
-竿
-裔
-。
-茵
-函
-伊
-兄
-丨
-娜
-匍
-謇
-莪
-宥
-似
-蝽
-翳
-酪
-翠
-粑
-薇
-祢
-骏
-赠
-叫
-Q
-噤
-噻
-竖
-芗
-莠
-潭
-俊
-羿
-耜
-O
-郫
-趁
-嗪
-囚
-蹶
-芒
-洁
-笋
-鹑
-敲
-硝
-啶
-堡
-渲
-揩
-』
-携
-宿
-遒
-颍
-扭
-棱
-割
-萜
-蔸
-葵
-琴
-捂
-饰
-衙
-耿
-掠
-募
-岂
-窖
-涟
-蔺
-瘤
-柞
-瞪
-怜
-匹
-距
-楔
-炜
-哆
-秦
-缎
-幼
-茁
-绪
-痨
-恨
-楸
-娅
-瓦
-桩
-雪
-嬴
-伏
-榔
-妥
-铿
-拌
-眠
-雍
-缇
-‘
-卓
-搓
-哌
-觞
-噩
-屈
-哧
-髓
-咦
-巅
-娑
-侑
-淫
-膳
-祝
-勾
-姊
-莴
-胄
-疃
-薛
-蜷
-胛
-巷
-芙
-芋
-熙
-闰
-勿
-窃
-狱
-剩
-钏
-幢
-陟
-铛
-慧
-靴
-耍
-k
-浙
-浇
-飨
-惟
-绗
-祜
-澈
-啼
-咪
-磷
-摞
-诅
-郦
-抹
-跃
-壬
-吕
-肖
-琏
-颤
-尴
-剡
-抠
-凋
-赚
-泊
-津
-宕
-殷
-倔
-氲
-漫
-邺
-涎
-怠
-$
-垮
-荬
-遵
-俏
-叹
-噢
-饽
-蜘
-孙
-筵
-疼
-鞭
-羧
-牦
-箭
-潴
-c
-眸
-祭
-髯
-啖
-坳
-愁
-芩
-驮
-倡
-巽
-穰
-沃
-胚
-怒
-凤
-槛
-剂
-趵
-嫁
-v
-邢
-灯
-鄢
-桐
-睽
-檗
-锯
-槟
-婷
-嵋
-圻
-诗
-蕈
-颠
-遭
-痢
-芸
-怯
-馥
-竭
-锗
-徜
-恭
-遍
-籁
-剑
-嘱
-苡
-龄
-僧
-桑
-潸
-弘
-澶
-楹
-悲
-讫
-愤
-腥
-悸
-谍
-椹
-呢
-桓
-葭
-攫
-阀
-翰
-躲
-敖
-柑
-郎
-笨
-橇
-呃
-魁
-燎
-脓
-葩
-磋
-垛
-玺
-狮
-沓
-砜
-蕊
-锺
-罹
-蕉
-翱
-虐
-闾
-巫
-旦
-茱
-嬷
-枯
-鹏
-贡
-芹
-汛
-矫
-绁
-拣
-禺
-佃
-讣
-舫
-惯
-乳
-趋
-疲
-挽
-岚
-虾
-衾
-蠹
-蹂
-飓
-氦
-铖
-孩
-稞
-瑜
-壅
-掀
-勘
-妓
-畅
-髋
-W
-庐
-牲
-蓿
-榕
-练
-垣
-唱
-邸
-菲
-昆
-婺
-穿
-绡
-麒
-蚱
-掂
-愚
-泷
-涪
-漳
-妩
-娉
-榄
-讷
-觅
-旧
-藤
-煮
-呛
-柳
-腓
-叭
-庵
-烷
-阡
-罂
-蜕
-擂
-猖
-咿
-媲
-脉
-【
-沏
-貅
-黠
-熏
-哲
-烁
-坦
-酵
-兜
-×
-潇
-撒
-剽
-珩
-圹
-乾
-摸
-樟
-帽
-嗒
-襄
-魂
-轿
-憬
-锡
-〕
-喃
-皆
-咖
-隅
-脸
-残
-泮
-袂
-鹂
-珊
-囤
-捆
-咤
-误
-徨
-闹
-淙
-芊
-淋
-怆
-囗
-拨
-梳
-渤
-R
-G
-绨
-蚓
-婀
-幡
-狩
-麾
-谢
-唢
-裸
-旌
-伉
-纶
-裂
-驳
-砼
-咛
-澄
-樨
-蹈
-宙
-澍
-倍
-貔
-操
-勇
-蟠
-摈
-砧
-虬
-够
-缁
-悦
-藿
-撸
-艹
-摁
-淹
-豇
-虎
-榭
-ˉ
-吱
-d
-°
-喧
-荀
-踱
-侮
-奋
-偕
-饷
-犍
-惮
-坑
-璎
-徘
-宛
-妆
-袈
-倩
-窦
-昂
-荏
-乖
-K
-怅
-撰
-鳙
-牙
-袁
-酞
-X
-痿
-琼
-闸
-雁
-趾
-荚
-虻
-涝
-《
-杏
-韭
-偈
-烤
-绫
-鞘
-卉
-症
-遢
-蓥
-诋
-杭
-荨
-匆
-竣
-簪
-辙
-敕
-虞
-丹
-缭
-咩
-黟
-m
-淤
-瑕
-咂
-铉
-硼
-茨
-嶂
-痒
-畸
-敬
-涿
-粪
-窘
-熟
-叔
-嫔
-盾
-忱
-裘
-憾
-梵
-赡
-珙
-咯
-娘
-庙
-溯
-胺
-葱
-痪
-摊
-荷
-卞
-乒
-髦
-寐
-铭
-坩
-胗
-枷
-爆
-溟
-嚼
-羚
-砬
-轨
-惊
-挠
-罄
-竽
-菏
-氧
-浅
-楣
-盼
-枢
-炸
-阆
-杯
-谏
-噬
-淇
-渺
-俪
-秆
-墓
-泪
-跻
-砌
-痰
-垡
-渡
-耽
-釜
-讶
-鳎
-煞
-呗
-韶
-舶
-绷
-鹳
-缜
-旷
-铊
-皱
-龌
-檀
-霖
-奄
-槐
-艳
-蝶
-旋
-哝
-赶
-骞
-蚧
-腊
-盈
-丁
-`
-蜚
-矸
-蝙
-睨
-嚓
-僻
-鬼
-醴
-夜
-彝
-磊
-笔
-拔
-栀
-糕
-厦
-邰
-纫
-逭
-纤
-眦
-膊
-馍
-躇
-烯
-蘼
-冬
-诤
-暄
-骶
-哑
-瘠
-」
-臊
-丕
-愈
-咱
-螺
-擅
-跋
-搏
-硪
-谄
-笠
-淡
-嘿
-骅
-谧
-鼎
-皋
-姚
-歼
-蠢
-驼
-耳
-胬
-挝
-涯
-狗
-蒽
-孓
-犷
-凉
-芦
-箴
-铤
-孤
-嘛
-坤
-V
-茴
-朦
-挞
-尖
-橙
-诞
-搴
-碇
-洵
-浚
-帚
-蜍
-漯
-柘
-嚎
-讽
-芭
-荤
-咻
-祠
-秉
-跖
-埃
-吓
-糯
-眷
-馒
-惹
-娼
-鲑
-嫩
-讴
-轮
-瞥
-靶
-褚
-乏
-缤
-宋
-帧
-删
-驱
-碎
-扑
-俩
-俄
-偏
-涣
-竹
-噱
-皙
-佰
-渚
-唧
-斡
-#
-镉
-刀
-崎
-筐
-佣
-夭
-贰
-肴
-峙
-哔
-艿
-匐
-牺
-镛
-缘
-仡
-嫡
-劣
-枸
-堀
-梨
-簿
-鸭
-蒸
-亦
-稽
-浴
-{
-衢
-束
-槲
-j
-阁
-揍
-疥
-棋
-潋
-聪
-窜
-乓
-睛
-插
-冉
-阪
-苍
-搽
-「
-蟾
-螟
-幸
-仇
-樽
-撂
-慢
-跤
-幔
-俚
-淅
-覃
-觊
-溶
-妖
-帛
-侨
-曰
-妾
-泗
-·
-：
-瀘
-風
-Ë
-（
-）
-∶
-紅
-紗
-瑭
-雲
-頭
-鶏
-財
-許
-•
-¥
-樂
-焗
-麗
-—
-；
-滙
-東
-榮
-繪
-興
-…
-門
-業
-π
-楊
-國
-顧
-é
-盤
-寳
-Λ
-龍
-鳳
-島
-誌
-緣
-結
-銭
-萬
-勝
-祎
-璟
-優
-歡
-臨
-時
-購
-＝
-★
-藍
-昇
-鐵
-觀
-勅
-農
-聲
-畫
-兿
-術
-發
-劉
-記
-專
-耑
-園
-書
-壴
-種
-Ο
-●
-褀
-號
-銀
-匯
-敟
-锘
-葉
-橪
-廣
-進
-蒄
-鑽
-阝
-祙
-貢
-鍋
-豊
-夬
-喆
-團
-閣
-開
-燁
-賓
-館
-酡
-沔
-順
-＋
-硚
-劵
-饸
-陽
-車
-湓
-復
-萊
-氣
-軒
-華
-堃
-迮
-纟
-戶
-馬
-學
-裡
-電
-嶽
-獨
-マ
-シ
-サ
-ジ
-燘
-袪
-環
-❤
-臺
-灣
-専
-賣
-孖
-聖
-攝
-線
-▪
-α
-傢
-俬
-夢
-達
-莊
-喬
-貝
-薩
-劍
-羅
-壓
-棛
-饦
-尃
-璈
-囍
-醫
-Ｇ
-Ｉ
-Ａ
-＃
-Ｎ
-鷄
-髙
-嬰
-啓
-約
-隹
-潔
-賴
-藝
-～
-寶
-籣
-麺
-　
-嶺
-√
-義
-網
-峩
-長
-∧
-魚
-機
-構
-②
-鳯
-偉
-Ｌ
-Ｂ
-㙟
-畵
-鴿
-＇
-詩
-溝
-嚞
-屌
-藔
-佧
-玥
-蘭
-織
-１
-３
-９
-０
-７
-點
-砭
-鴨
-鋪
-銘
-廳
-弍
-‧
-創
-湯
-坶
-℃
-卩
-骝
-＆
-烜
-荘
-當
-潤
-扞
-係
-懷
-碶
-钅
-蚨
-讠
-☆
-叢
-爲
-埗
-涫
-塗
-→
-楽
-現
-鯨
-愛
-瑪
-鈺
-忄
-悶
-藥
-飾
-樓
-視
-孬
-ㆍ
-燚
-苪
-師
-①
-丼
-锽
-│
-韓
-標
-è
-兒
-閏
-匋
-張
-漢
-Ü
-髪
-會
-閑
-檔
-習
-裝
-の
-峯
-菘
-輝
-И
-雞
-釣
-億
-浐
-Ｋ
-Ｏ
-Ｒ
-８
-Ｈ
-Ｅ
-Ｐ
-Ｔ
-Ｗ
-Ｄ
-Ｓ
-Ｃ
-Ｍ
-Ｆ
-姌
-饹
-»
-晞
-廰
-ä
-嵯
-鷹
-負
-飲
-絲
-冚
-楗
-澤
-綫
-區
-❋
-←
-質
-靑
-揚
-③
-滬
-統
-産
-協
-﹑
-乸
-畐
-經
-運
-際
-洺
-岽
-為
-粵
-諾
-崋
-豐
-碁
-ɔ
-Ｖ
-２
-６
-齋
-誠
-訂
-´
-勑
-雙
-陳
-無
-í
-泩
-媄
-夌
-刂
-ｉ
-ｃ
-ｔ
-ｏ
-ｒ
-ａ
-嘢
-耄
-燴
-暃
-壽
-媽
-靈
-抻
-體
-唻
-É
-冮
-甹
-鎮
-錦
-ʌ
-蜛
-蠄
-尓
-駕
-戀
-飬
-逹
-倫
-貴
-極
-Я
-Й
-寬
-磚
-嶪
-郎
-職
-｜
-間
-ｎ
-ｄ
-剎
-伈
-課
-飛
-橋
-瘊
-№
-譜
-骓
-圗
-滘
-縣
-粿
-咅
-養
-濤
-彳
-®
-％
-Ⅱ
-啰
-㴪
-見
-矞
-薬
-糁
-邨
-鲮
-顔
-罱
-З
-選
-話
-贏
-氪
-俵
-競
-瑩
-繡
-枱
-β
-綉
-á
-獅
-爾
-™
-麵
-戋
-淩
-徳
-個
-劇
-場
-務
-簡
-寵
-ｈ
-實
-膠
-轱
-圖
-築
-嘣
-樹
-㸃
-營
-耵
-孫
-饃
-鄺
-飯
-麯
-遠
-輸
-坫
-孃
-乚
-閃
-鏢
-㎡
-題
-廠
-關
-↑
-爺
-將
-軍
-連
-篦
-覌
-參
-箸
-－
-窠
-棽
-寕
-夀
-爰
-歐
-呙
-閥
-頡
-熱
-雎
-垟
-裟
-凬
-勁
-帑
-馕
-夆
-疌
-枼
-馮
-貨
-蒤
-樸
-彧
-旸
-靜
-龢
-暢
-㐱
-鳥
-珺
-鏡
-灡
-爭
-堷
-廚
-Ó
-騰
-診
-┅
-蘇
-褔
-凱
-頂
-豕
-亞
-帥
-嘬
-⊥
-仺
-桖
-複
-饣
-絡
-穂
-顏
-棟
-納
-▏
-濟
-親
-設
-計
-攵
-埌
-烺
-ò
-頤
-燦
-蓮
-撻
-節
-講
-濱
-濃
-娽
-洳
-朿
-燈
-鈴
-護
-膚
-铔
-過
-補
-Ｚ
-Ｕ
-５
-４
-坋
-闿
-䖝
-餘
-缐
-铞
-貿
-铪
-桼
-趙
-鍊
-［
-㐂
-垚
-菓
-揸
-捲
-鐘
-滏
-𣇉
-爍
-輪
-燜
-鴻
-鮮
-動
-鹞
-鷗
-丄
-慶
-鉌
-翥
-飮
-腸
-⇋
-漁
-覺
-來
-熘
-昴
-翏
-鲱
-圧
-鄉
-萭
-頔
-爐
-嫚
-г
-貭
-類
-聯
-幛
-輕
-訓
-鑒
-夋
-锨
-芃
-珣
-䝉
-扙
-嵐
-銷
-處
-ㄱ
-語
-誘
-苝
-歸
-儀
-燒
-楿
-內
-粢
-葒
-奧
-麥
-礻
-滿
-蠔
-穵
-瞭
-態
-鱬
-榞
-硂
-鄭
-黃
-煙
-祐
-奓
-逺
-＊
-瑄
-獲
-聞
-薦
-讀
-這
-樣
-決
-問
-啟
-們
-執
-説
-轉
-單
-隨
-唘
-帶
-倉
-庫
-還
-贈
-尙
-皺
-■
-餅
-產
-○
-∈
-報
-狀
-楓
-賠
-琯
-嗮
-禮
-｀
-傳
-＞
-≤
-嗞
-Φ
-≥
-換
-咭
-∣
-↓
-曬
-ε
-応
-寫
-″
-終
-様
-純
-費
-療
-聨
-凍
-壐
-郵
-ü
-黒
-∫
-製
-塊
-調
-軽
-確
-撃
-級
-馴
-Ⅲ
-涇
-繹
-數
-碼
-證
-狒
-処
-劑
-＜
-晧
-賀
-衆
-］
-櫥
-兩
-陰
-絶
-對
-鯉
-憶
-◎
-ｐ
-ｅ
-Ｙ
-蕒
-煖
-頓
-測
-試
-鼽
-僑
-碩
-妝
-帯
-≈
-鐡
-舖
-權
-喫
-倆
-ˋ
-該
-悅
-ā
-俫
-．
-ｆ
-ｓ
-ｂ
-ｍ
-ｋ
-ｇ
-ｕ
-ｊ
-貼
-淨
-濕
-針
-適
-備
-ｌ
-／
-給
-謢
-強
-觸
-衛
-與
-⊙
-＄
-緯
-變
-⑴
-⑵
-⑶
-㎏
-殺
-∩
-幚
-─
-價
-▲
-離
-ú
-ó
-飄
-烏
-関
-閟
-﹝
-﹞
-邏
-輯
-鍵
-驗
-訣
-導
-歷
-屆
-層
-▼
-儱
-錄
-熳
-ē
-艦
-吋
-錶
-辧
-飼
-顯
-④
-禦
-販
-気
-対
-枰
-閩
-紀
-幹
-瞓
-貊
-淚
-△
-眞
-墊
-Ω
-獻
-褲
-縫
-緑
-亜
-鉅
-餠
-｛
-｝
-◆
-蘆
-薈
-█
-◇
-溫
-彈
-晳
-粧
-犸
-穩
-訊
-崬
-凖
-熥
-П
-舊
-條
-紋
-圍
-Ⅳ
-筆
-尷
-難
-雜
-錯
-綁
-識
-頰
-鎖
-艶
-□
-殁
-殼
-⑧
-├
-▕
-鵬
-ǐ
-ō
-ǒ
-糝
-綱
-▎
-μ
-盜
-饅
-醬
-籤
-蓋
-釀
-鹽
-據
-à
-ɡ
-辦
-◥
-彐
-┌
-婦
-獸
-鲩
-伱
-ī
-蒟
-蒻
-齊
-袆
-腦
-寧
-凈
-妳
-煥
-詢
-偽
-謹
-啫
-鯽
-騷
-鱸
-損
-傷
-鎻
-髮
-買
-冏
-儥
-両
-﹢
-∞
-載
-喰
-ｚ
-羙
-悵
-燙
-曉
-員
-組
-徹
-艷
-痠
-鋼
-鼙
-縮
-細
-嚒
-爯
-≠
-維
-＂
-鱻
-壇
-厍
-帰
-浥
-犇
-薡
-軎
-²
-應
-醜
-刪
-緻
-鶴
-賜
-噁
-軌
-尨
-镔
-鷺
-槗
-彌
-葚
-濛
-請
-溇
-緹
-賢
-訪
-獴
-瑅
-資
-縤
-陣
-蕟
-栢
-韻
-祼
-恁
-伢
-謝
-劃
-涑
-總
-衖
-踺
-砋
-凉
-籃
-駿
-苼
-瘋
-昽
-紡
-驊
-腎
-﹗
-響
-杋
-剛
-嚴
-禪
-歓
-槍
-傘
-檸
-檫
-炣
-勢
-鏜
-鎢
-銑
-尐
-減
-奪
-惡
-θ
-僮
-婭
-臘
-ū
-ì
-殻
-鉄
-∑
-蛲
-焼
-緖
-續
-紹
-懮!
-䰾
-䲁
-丌
-丏
-丟
-並
-乂
-乗
-乩
-乭
-乹
-亀
-亂
-亅
-亊
-亠
-亰
-亶
-亹
-仂
-仉
-仏
-仛
-仫
-仮
-仳
-仵
-仼
-伃
-伋
-伕
-伝
-伷
-伾
-佀
-佁
-佇
-佈
-佉
-佋
-佔
-併
-佹
-佺
-佾
-侁
-侅
-侊
-侖
-侘
-侚
-侞
-価
-侶
-侷
-侹
-俁
-俅
-俋
-俌
-俍
-俛
-俠
-俳
-俴
-俶
-俽
-倈
-倓
-倖
-倗
-倞
-倢
-倣
-値
-倧
-倮
-倻
-偁
-偊
-偍
-偓
-偪
-偲
-側
-偵
-偸
-傃
-傉
-傑
-傒
-傕
-傖
-傜
-傭
-債
-傾
-僅
-僉
-僊
-働
-僔
-僕
-僖
-僙
-僜
-僡
-僩
-僭
-僰
-僱
-僴
-儁
-儂
-儆
-儇
-儈
-儉
-儐
-儔
-儕
-儘
-儚
-儞
-償
-儦
-儫
-儲
-儷
-儺
-儻
-儼
-兌
-児
-兕
-兗
-兪
-冂
-円
-冇
-冊
-冑
-冖
-冧
-冨
-冪
-冫
-冴
-凃
-凜
-凞
-凪
-凵
-刄
-刎
-別
-刦
-刧
-刼
-則
-剋
-剏
-剝
-剣
-剮
-劄
-劊
-劌
-劔
-劬
-効
-劼
-勔
-勖
-勗
-勛
-勞
-勣
-勦
-勱
-勲
-勳
-勵
-勷
-勸
-勻
-匂
-匄
-匏
-匚
-匱
-匸
-卋
-卍
-卐
-卣
-卬
-卮
-卲
-卹
-卺
-卻
-卽
-厓
-厔
-厙
-厭
-厰
-厲
-厴
-厶
-叄
-収
-叕
-叡
-叵
-吔
-吥
-吳
-吶
-呂
-呉
-呎
-呾
-咁
-咑
-咗
-咘
-咟
-咥
-咲
-咼
-咾
-哂
-哏
-哐
-哖
-哱
-唃
-唄
-唫
-唭
-唵
-唸
-啁
-啍
-啚
-啞
-啣
-啯
-啱
-啲
-啷
-喈
-喚
-喢
-喦
-喪
-喲
-喼
-嗄
-嗆
-嗇
-嗊
-嗎
-嗚
-嗢
-嗩
-嗶
-嗹
-嘅
-嘆
-嘍
-嘏
-嘔
-嘗
-嘚
-嘜
-嘥
-嘩
-嘮
-嘯
-嘰
-嘸
-噍
-噏
-噓
-噝
-噠
-噥
-噦
-噯
-噰
-噲
-噴
-噸
-噹
-嚇
-嚈
-嚐
-嚕
-嚗
-嚙
-嚟
-嚤
-嚦
-嚧
-嚨
-嚩
-嚮
-嚳
-嚶
-嚿
-囀
-囂
-囃
-囉
-囑
-囒
-囓
-囝
-団
-囧
-囪
-囮
-囯
-囲
-図
-囶
-囷
-圂
-圄
-圉
-圏
-圓
-圪
-圯
-坌
-坖
-坣
-坬
-坮
-坵
-垈
-垍
-垕
-垞
-垯
-垰
-垵
-垻
-垿
-埅
-埇
-埈
-埏
-埒
-埜
-埡
-埤
-埧
-埨
-埪
-埮
-埴
-埵
-埻
-埼
-堅
-堈
-堉
-堊
-堍
-堖
-堝
-堦
-堮
-堯
-堺
-塀
-塅
-塆
-塋
-塏
-塙
-塜
-塡
-塢
-塤
-塨
-塩
-塭
-塰
-塱
-塲
-塵
-塹
-塽
-墀
-墎
-増
-墘
-墜
-墡
-墣
-墫
-墬
-墮
-墱
-墳
-墺
-墼
-墾
-壄
-壆
-壋
-壌
-壎
-壔
-壘
-壙
-壞
-壟
-壠
-壢
-壩
-壯
-壱
-壺
-変
-夊
-夠
-夤
-夾
-奀
-奐
-奣
-奩
-奫
-奭
-奮
-妀
-妁
-妏
-妑
-妠
-妧
-妭
-妸
-妺
-姀
-姁
-姃
-姈
-姉
-姍
-姦
-姪
-姫
-姮
-姵
-姶
-姸
-娋
-娍
-娎
-娖
-娛
-娫
-娳
-娸
-婁
-婑
-婯
-婻
-婼
-媃
-媊
-媐
-媓
-媖
-媗
-媜
-媞
-媧
-媭
-媯
-媺
-媼
-媿
-嫄
-嫈
-嫘
-嫪
-嫲
-嫳
-嫵
-嫺
-嫻
-嬅
-嬈
-嬋
-嬌
-嬛
-嬝
-嬡
-嬤
-嬨
-嬪
-嬬
-嬭
-嬸
-嬾
-嬿
-孀
-孆
-孋
-孌
-孮
-孻
-孿
-宍
-実
-宧
-宮
-寀
-寁
-寈
-寊
-寔
-寖
-寗
-寘
-寛
-寜
-寢
-審
-寯
-尋
-尗
-尢
-尪
-屄
-屇
-屍
-屓
-屚
-屜
-屢
-屬
-屭
-屺
-屻
-岀
-岈
-岡
-岣
-岧
-岪
-岬
-岰
-岵
-岻
-峅
-峇
-峍
-峘
-峚
-峠
-峴
-峼
-峽
-崁
-崈
-崍
-崐
-崑
-崒
-崗
-崘
-崙
-崚
-崞
-崟
-崠
-崢
-崱
-崵
-崶
-嵎
-嵒
-嵕
-嵖
-嵗
-嵙
-嵛
-嵜
-嵨
-嵮
-嵰
-嵴
-嵻
-嵿
-嶁
-嶃
-嶄
-嶇
-嶋
-嶌
-嶍
-嶒
-嶔
-嶗
-嶝
-嶠
-嶢
-嶦
-嶧
-嶬
-嶰
-嶲
-嶴
-嶷
-嶸
-嶼
-巂
-巄
-巆
-巋
-巌
-巎
-巑
-巒
-巔
-巖
-巘
-巛
-巰
-巶
-巻
-巿
-帔
-帙
-帡
-帢
-帳
-幀
-幃
-幗
-幟
-幣
-幪
-幫
-幵
-幷
-幾
-庀
-庁
-広
-庢
-庲
-庼
-廁
-廂
-廄
-廆
-廈
-廋
-廌
-廍
-廑
-廔
-廕
-廙
-廝
-廞
-廟
-廡
-廢
-廧
-廨
-廩
-廬
-廱
-廸
-廻
-廼
-弁
-弅
-弇
-弉
-弐
-弒
-弔
-弖
-弢
-弨
-弸
-弾
-彀
-彄
-彅
-彆
-彊
-彎
-彔
-彖
-彘
-彙
-彜
-彞
-彠
-彡
-彣
-彥
-彫
-彿
-徂
-徑
-從
-徠
-徧
-徫
-徬
-徭
-徴
-徸
-忉
-忝
-忞
-忬
-忯
-忳
-怍
-怙
-怛
-怵
-恆
-恊
-恥
-恵
-悆
-悛
-悝
-悞
-悧
-悪
-悰
-悳
-惇
-惔
-惣
-惱
-惲
-愃
-愆
-愍
-愐
-愒
-愔
-愜
-愨
-愭
-愴
-愷
-愼
-愾
-慄
-慘
-慚
-慜
-慟
-慣
-慥
-慮
-慳
-慾
-憂
-憊
-憍
-憐
-憑
-憓
-憕
-憙
-憚
-憤
-憫
-憲
-憺
-憻
-懃
-懇
-懌
-懍
-懐
-懣
-懮
-懲
-懶
-懸
-懺
-懼
-懽
-懾
-戇
-戔
-戕
-戙
-戡
-戥
-戦
-戩
-戰
-戱
-戲
-戸
-戻
-戽
-扆
-扥
-抃
-抇
-抦
-拋
-拏
-拝
-拡
-拺
-挙
-挵
-挹
-挻
-挾
-捒
-捜
-捦
-捨
-捩
-捫
-捭
-捱
-掃
-掄
-掙
-掛
-掞
-掟
-採
-掾
-揀
-揄
-揆
-揔
-揮
-揺
-搖
-搗
-搠
-搢
-搳
-搵
-搶
-搾
-摂
-摜
-摟
-摠
-摭
-摯
-摳
-摴
-摵
-摶
-摺
-摻
-摽
-撈
-撐
-撓
-撖
-撙
-撚
-撣
-撥
-撫
-撲
-撳
-撾
-撿
-擁
-擇
-擊
-擋
-擔
-擠
-擥
-擬
-擯
-擰
-擱
-擲
-擴
-擷
-擺
-擼
-擾
-攏
-攔
-攖
-攜
-攞
-攢
-攣
-攤
-攪
-攬
-攴
-攷
-攽
-敍
-敎
-敔
-敗
-敘
-敫
-敭
-敵
-敻
-敾
-斂
-斃
-斎
-斕
-斖
-斝
-斬
-斷
-斿
-旂
-旃
-旄
-旉
-旙
-旛
-旡
-旲
-旳
-旻
-旼
-旽
-旾
-旿
-昃
-昉
-昍
-昐
-昚
-昛
-昜
-昞
-昡
-昣
-昤
-昪
-昫
-昰
-昺
-晈
-晉
-晊
-晙
-晛
-晝
-晩
-晪
-晫
-晭
-晸
-暅
-暈
-暉
-暊
-暌
-暎
-暏
-暐
-暕
-暘
-暝
-暟
-暠
-暦
-暫
-暱
-暲
-暸
-暻
-暾
-曄
-曅
-曆
-曇
-曌
-曔
-曖
-曠
-曧
-曨
-曩
-曮
-曶
-曷
-曺
-曽
-朊
-朏
-朓
-朖
-朧
-朶
-杁
-杌
-杓
-杙
-杣
-杤
-杧
-杬
-杴
-杻
-杼
-枏
-枖
-枛
-枠
-枡
-枲
-枹
-柁
-柃
-柉
-柊
-柎
-柝
-柟
-柰
-柵
-柶
-柷
-査
-柾
-栃
-栄
-栐
-栒
-栜
-栝
-栞
-栨
-栲
-栴
-栻
-桄
-桕
-桙
-桜
-桝
-桫
-桱
-桲
-桴
-桿
-梀
-梂
-梃
-梉
-梔
-梘
-梟
-梠
-梣
-梫
-梱
-梶
-梽
-棄
-棆
-棐
-棓
-棖
-棗
-棡
-棧
-棨
-棩
-棪
-棫
-棲
-棶
-棹
-棻
-棼
-椆
-椇
-椏
-椙
-椥
-椪
-椲
-椵
-楙
-楡
-楢
-楤
-楧
-楨
-楫
-楮
-楯
-楳
-榊
-榍
-榎
-榑
-榖
-榗
-榘
-榢
-榣
-榤
-榦
-榲
-榿
-槀
-槁
-槃
-槊
-槓
-槔
-槙
-槤
-槩
-槭
-槰
-槱
-槳
-槺
-槻
-槼
-樀
-樁
-樅
-樆
-樋
-樑
-樗
-樘
-樞
-権
-樫
-樺
-樻
-橈
-橐
-橒
-橓
-橚
-橢
-橫
-橿
-檄
-檇
-檉
-檊
-檎
-檜
-檞
-檠
-檡
-檢
-檣
-檦
-檨
-檯
-檳
-檵
-檻
-檽
-櫂
-櫃
-櫆
-櫈
-櫓
-櫚
-櫛
-櫞
-櫟
-櫨
-櫪
-櫱
-櫸
-櫻
-櫾
-櫿
-欄
-欉
-欏
-欒
-欖
-欞
-欥
-欸
-欹
-欽
-歊
-歎
-歛
-歩
-歲
-歳
-歴
-歿
-殂
-殄
-殑
-殘
-殛
-殞
-殟
-殤
-殭
-殮
-殯
-殲
-殳
-毀
-毆
-毉
-毌
-毎
-毐
-毖
-毘
-毬
-毴
-毸
-毿
-氂
-氈
-氍
-氫
-氬
-氷
-氹
-氻
-氾
-汎
-汜
-汧
-汭
-沄
-沆
-沇
-沍
-沒
-沖
-沘
-沚
-沜
-沢
-沨
-沯
-沺
-況
-泂
-泆
-泇
-泐
-泖
-泚
-洌
-洎
-洢
-洣
-洤
-洨
-洩
-洸
-洹
-浄
-浛
-浞
-浟
-浡
-浤
-浯
-浵
-浹
-涙
-涼
-淍
-淎
-淏
-淓
-淛
-淠
-淥
-淪
-淯
-淰
-淵
-淶
-淸
-淺
-淽
-渃
-済
-渉
-渋
-渕
-渙
-渟
-渦
-渫
-渼
-渽
-渾
-湉
-湊
-湔
-湜
-湞
-湣
-湥
-湧
-湳
-湴
-湼
-満
-溁
-溈
-溋
-溎
-準
-溙
-溦
-溲
-溵
-溼
-滀
-滄
-滅
-滈
-滉
-滌
-滎
-滝
-滯
-滲
-滷
-滸
-滹
-滻
-滽
-滾
-漇
-漈
-漎
-漚
-漣
-漬
-漲
-漴
-漵
-漷
-漸
-漼
-漿
-潁
-潑
-潛
-潟
-潯
-潰
-潲
-潽
-潾
-潿
-澀
-澁
-澂
-澆
-澇
-澉
-澋
-澌
-澔
-澗
-澠
-澣
-澥
-澪
-澮
-澯
-澱
-澻
-濁
-濊
-濋
-濘
-濙
-濫
-濬
-濰
-濲
-濶
-濺
-濼
-濾
-瀁
-瀅
-瀆
-瀉
-瀍
-瀏
-瀔
-瀕
-瀝
-瀞
-瀟
-瀠
-瀦
-瀧
-瀨
-瀬
-瀰
-瀲
-瀴
-瀶
-瀾
-灃
-灊
-灑
-灘
-灝
-灤
-灧
-灴
-災
-炁
-炆
-炘
-炟
-炤
-炱
-炲
-炷
-炻
-烉
-烋
-烒
-烔
-烝
-烱
-烴
-焃
-焄
-焌
-焓
-焜
-焞
-焴
-焻
-焿
-煇
-煉
-煐
-煒
-煔
-煕
-煚
-煠
-煩
-煬
-煳
-煵
-煶
-熅
-熇
-熈
-熒
-熖
-熗
-熜
-熤
-熯
-熲
-熺
-熼
-熾
-熿
-燄
-燉
-燊
-燏
-燐
-燔
-燝
-燫
-燬
-燭
-燹
-燻
-燼
-燾
-燿
-爀
-爌
-爔
-爚
-爛
-爝
-爿
-牁
-牂
-牆
-牕
-牖
-牘
-牝
-牠
-牻
-牼
-牽
-犂
-犎
-犖
-犛
-犢
-犧
-犨
-犰
-犴
-犽
-狎
-狓
-狛
-狟
-狦
-狨
-狳
-狶
-狷
-狹
-狻
-猁
-猄
-猇
-猊
-猙
-猞
-猢
-猨
-猳
-猶
-猺
-猻
-獁
-獃
-獄
-獇
-獎
-獏
-獢
-獣
-獬
-獮
-獯
-獰
-獵
-獷
-獺
-獼
-獾
-玀
-玆
-玎
-玏
-玓
-玕
-玗
-玘
-玙
-玠
-玡
-玢
-玧
-玨
-玭
-玶
-玹
-玾
-珅
-珌
-珎
-珖
-珝
-珡
-珤
-珦
-珧
-珪
-珮
-珵
-珹
-珽
-琁
-琄
-琇
-琍
-琎
-琡
-琤
-琱
-琹
-琺
-琿
-瑀
-瑂
-瑆
-瑈
-瑊
-瑋
-瑑
-瑒
-瑝
-瑠
-瑢
-瑣
-瑤
-瑥
-瑧
-瑨
-瑯
-瑱
-瑳
-瑴
-瑺
-璄
-璆
-璉
-璌
-璕
-璘
-璙
-璚
-璠
-璡
-璣
-璥
-璦
-璪
-璫
-璬
-璮
-璱
-璵
-璸
-璹
-璽
-璿
-瓈
-瓊
-瓌
-瓏
-瓑
-瓔
-瓖
-瓘
-瓚
-瓛
-瓞
-甂
-甌
-甍
-甑
-甕
-甡
-甦
-甪
-畀
-畇
-畊
-畋
-畎
-畑
-畝
-畠
-畢
-畧
-畬
-畯
-異
-畳
-畷
-疇
-疊
-疋
-疍
-疒
-疕
-痍
-痙
-痟
-痩
-痲
-痺
-瘍
-瘓
-瘜
-瘞
-瘡
-瘧
-瘰
-瘺
-癀
-癆
-癇
-癒
-癘
-癟
-癡
-癢
-癤
-癥
-癩
-癬
-癭
-癮
-癯
-癰
-癱
-癲
-発
-皐
-皚
-皛
-皝
-皞
-皰
-皷
-皸
-盃
-盋
-盌
-盞
-盡
-監
-盦
-盧
-盨
-盩
-盪
-盫
-盷
-盺
-眀
-県
-眛
-眜
-眥
-眵
-眾
-睜
-睞
-睥
-睪
-睭
-睺
-瞋
-瞞
-瞢
-瞫
-瞼
-瞽
-矇
-矍
-矚
-矧
-矯
-砢
-砩
-砫
-砮
-砯
-砲
-砳
-砵
-硃
-硇
-硏
-硐
-硓
-硜
-硤
-硨
-硭
-硯
-碕
-碡
-碪
-碭
-碸
-碻
-碽
-磔
-磘
-磙
-磜
-磡
-磪
-磯
-磱
-磲
-磵
-磻
-磾
-礄
-礎
-礐
-礑
-礒
-礙
-礠
-礦
-礪
-礫
-礬
-礮
-礱
-礽
-祂
-祆
-祇
-祋
-祏
-祓
-祕
-祧
-祹
-祿
-禃
-禇
-禍
-禎
-禑
-禓
-禔
-禕
-禘
-禛
-禟
-禠
-禤
-禨
-禩
-禰
-禱
-禵
-禼
-禿
-秈
-秠
-秳
-稅
-稈
-稉
-稑
-稘
-稙
-稜
-稟
-稱
-稲
-稺
-稾
-穀
-穈
-穉
-穌
-積
-穎
-穟
-穠
-穡
-穢
-穣
-穫
-窅
-窋
-窣
-窩
-窪
-窮
-窯
-窰
-窶
-窺
-竄
-竅
-竇
-竈
-竊
-竑
-竜
-竦
-竩
-竻
-笄
-笘
-笞
-笥
-笩
-笪
-笭
-笮
-笯
-笱
-笳
-笹
-筅
-筊
-筌
-筍
-筘
-筥
-筦
-筧
-筬
-筭
-筲
-筳
-筶
-筻
-箆
-箇
-箋
-箏
-箑
-箒
-箜
-範
-篊
-篋
-篌
-篔
-篠
-篤
-篥
-篩
-篭
-篯
-篳
-簀
-簃
-簉
-簍
-簑
-簕
-簗
-簞
-簠
-簫
-簷
-簹
-簺
-簽
-簾
-籀
-籌
-籐
-籙
-籛
-籜
-籝
-籟
-籠
-籥
-籪
-籬
-籮
-籲
-籾
-粄
-粍
-粦
-粩
-糀
-糌
-糎
-糞
-糢
-糧
-糬
-糰
-糴
-糶
-糸
-糹
-糺
-糾
-紂
-紆
-紇
-紈
-紉
-紐
-紑
-紓
-紕
-紘
-紙
-紛
-紜
-紝
-紞
-紮
-紱
-紲
-紳
-紵
-紺
-紿
-絃
-絆
-経
-絎
-絕
-絛
-絜
-絞
-絢
-絨
-絪
-絳
-絵
-絹
-絺
-綃
-綈
-綎
-綏
-綖
-継
-続
-綜
-綝
-綞
-綠
-綢
-綣
-綧
-綬
-綮
-綰
-綳
-綴
-綸
-綺
-綻
-綽
-綾
-綿
-緁
-緃
-緄
-緈
-緊
-緋
-総
-緒
-緘
-緜
-緝
-緞
-締
-緡
-緤
-編
-緩
-緬
-緱
-緲
-練
-縂
-縄
-縈
-縉
-縊
-縕
-縛
-縝
-縞
-縠
-縡
-縯
-縱
-縴
-縵
-縷
-縹
-縻
-績
-繃
-繆
-繇
-繒
-繕
-繖
-繙
-繚
-繞
-繩
-繫
-繭
-繰
-繳
-繻
-繼
-繽
-繾
-纁
-纈
-纍
-纏
-纓
-纔
-纕
-纖
-纘
-纜
-缶
-缽
-罃
-罅
-罈
-罉
-罌
-罍
-罟
-罨
-罰
-罳
-罵
-罶
-罷
-罽
-羂
-羆
-羈
-羋
-羕
-羗
-羣
-羥
-羨
-羱
-翀
-翂
-翃
-翕
-翙
-翜
-翬
-翮
-翹
-耎
-耔
-耨
-耬
-聃
-聒
-聟
-聰
-聱
-聳
-聴
-聶
-聽
-聾
-肅
-肏
-肜
-肫
-肸
-肹
-胂
-胅
-胇
-胊
-胙
-胝
-胼
-脅
-脇
-脈
-脛
-脣
-脩
-脫
-脬
-脭
-脳
-脷
-脹
-腧
-腫
-腳
-膂
-膣
-膥
-膩
-膮
-膽
-膾
-膿
-臉
-臍
-臏
-臚
-臞
-臟
-臠
-臯
-舂
-舉
-舎
-舘
-舢
-舥
-舨
-舩
-舲
-舺
-艅
-艉
-艋
-艎
-艏
-艔
-艙
-艚
-艱
-艸
-艽
-芑
-芛
-芨
-芴
-芻
-苅
-苤
-苧
-苳
-苺
-苻
-苾
-茀
-茇
-茈
-茘
-茚
-茛
-茝
-茮
-茲
-茷
-茺
-荅
-荇
-荊
-荎
-荖
-荳
-莕
-莖
-莙
-莛
-莢
-莧
-莩
-莿
-菈
-菉
-菍
-菑
-菔
-菝
-菥
-菫
-菰
-菴
-菶
-菸
-菹
-菺
-菼
-菾
-萇
-萐
-萠
-萡
-萣
-萩
-萵
-萹
-葃
-葊
-葎
-葙
-葜
-葝
-葦
-葯
-葰
-葶
-葷
-蒍
-蒎
-蒐
-蒓
-蒔
-蒗
-蒞
-蒢
-蒧
-蒨
-蒭
-蒯
-蒴
-蒹
-蒺
-蒼
-蒾
-蓀
-蓁
-蓂
-蓆
-蓍
-蓘
-蓚
-蓧
-蓨
-蓪
-蓭
-蓯
-蓳
-蓽
-蔆
-蔎
-蔔
-蔕
-蔘
-蔝
-蔞
-蔣
-蔥
-蔦
-蔭
-蔴
-蔵
-蕁
-蕅
-蕎
-蕑
-蕖
-蕘
-蕚
-蕡
-蕢
-蕩
-蕪
-蕭
-蕷
-蕺
-蕻
-薀
-薆
-薊
-薌
-薐
-薑
-薔
-薗
-薘
-薙
-薜
-薞
-薟
-薨
-薫
-薲
-薷
-薸
-薺
-薾
-薿
-藎
-藟
-藦
-藨
-藪
-藶
-藸
-藹
-藺
-蘂
-蘄
-蘅
-蘊
-蘋
-蘐
-蘓
-蘗
-蘘
-蘚
-蘞
-蘢
-蘧
-蘩
-蘵
-蘶
-蘿
-虉
-虓
-虖
-虛
-虜
-虧
-虨
-虯
-虵
-虺
-蚆
-蚋
-蚍
-蚖
-蚡
-蚢
-蚵
-蚺
-蚼
-蛄
-蛉
-蛍
-蛑
-蛞
-蛯
-蛸
-蛺
-蛻
-蜆
-蜉
-蜑
-蜞
-蜢
-蜣
-蜨
-蜮
-蜯
-蜾
-蝀
-蝍
-蝓
-蝕
-蝘
-蝚
-蝟
-蝣
-蝤
-蝦
-蝨
-蝮
-蝯
-蝰
-蝲
-蝸
-螄
-螅
-螋
-螐
-螔
-螞
-螠
-螢
-螣
-螥
-螫
-螭
-螶
-螻
-螽
-螾
-蟄
-蟅
-蟊
-蟌
-蟎
-蟜
-蟥
-蟪
-蟫
-蟬
-蟯
-蟲
-蟳
-蟴
-蟶
-蟻
-蠂
-蠃
-蠅
-蠆
-蠊
-蠋
-蠍
-蠐
-蠑
-蠘
-蠙
-蠟
-蠣
-蠱
-蠲
-蠵
-蠶
-蠷
-蠻
-衂
-衎
-衕
-衚
-衜
-衝
-衞
-衽
-袓
-袛
-袞
-袴
-袾
-裊
-裎
-裒
-裖
-裬
-裵
-裾
-裿
-褌
-褍
-褎
-褘
-褙
-褞
-褧
-褫
-褭
-褸
-褻
-襌
-襖
-襞
-襠
-襤
-襦
-襪
-襯
-襲
-襴
-襶
-襻
-襾
-覇
-覈
-規
-覓
-覚
-覡
-覦
-覧
-覬
-覲
-観
-覽
-覿
-觔
-觙
-觚
-觜
-觭
-觱
-觴
-觶
-觿
-訁
-訃
-訇
-訌
-討
-訏
-訐
-訒
-訔
-訕
-訖
-託
-訛
-訝
-訟
-訥
-訴
-訶
-註
-証
-詁
-詆
-詈
-詐
-詒
-詔
-評
-詛
-詞
-詠
-詡
-詣
-詥
-詧
-詫
-詭
-詮
-詰
-詳
-詵
-詼
-誄
-誅
-誇
-認
-誒
-誕
-誡
-誣
-誤
-誥
-誦
-誨
-說
-読
-誰
-誴
-誹
-誼
-誾
-談
-諍
-諏
-諒
-論
-諗
-諜
-諟
-諠
-諡
-諤
-諦
-諧
-諪
-諫
-諭
-諮
-諱
-諲
-諳
-諴
-諶
-諷
-諸
-諺
-諼
-謀
-謁
-謂
-謄
-謊
-謌
-謎
-謏
-謐
-謔
-謖
-謗
-謙
-謚
-謜
-謠
-謤
-謨
-謩
-謫
-謬
-謳
-謾
-譏
-譓
-譔
-譙
-譚
-譞
-譫
-譭
-譯
-議
-譲
-譳
-譴
-譽
-譿
-讃
-讌
-讎
-讓
-讖
-讙
-讚
-讜
-讞
-谿
-豈
-豎
-豔
-豢
-豨
-豬
-豳
-豸
-豿
-貐
-貒
-貓
-貘
-貞
-貤
-貧
-貪
-貫
-責
-貮
-貯
-貲
-貳
-貶
-貸
-貺
-貽
-賁
-賂
-賃
-賄
-賈
-賊
-賑
-賒
-賔
-賕
-賚
-賞
-賡
-賤
-賦
-賨
-賬
-賭
-賹
-賺
-賻
-賽
-賾
-贄
-贅
-贇
-贊
-贌
-贍
-贓
-贔
-贖
-贛
-赧
-赬
-趐
-趕
-趖
-趨
-趺
-趼
-跅
-跏
-跗
-跡
-跣
-跩
-踎
-踐
-踰
-踴
-蹕
-蹟
-蹠
-蹤
-蹯
-蹺
-蹻
-躂
-躄
-躉
-躋
-躍
-躑
-躒
-躔
-躝
-躪
-躰
-軀
-軋
-軔
-軛
-軟
-転
-軫
-軲
-軸
-軹
-軺
-軻
-軼
-軾
-較
-輄
-輅
-輋
-輒
-輓
-輔
-輛
-輞
-輟
-輥
-輦
-輩
-輬
-輭
-輶
-輻
-輾
-輿
-轀
-轂
-轄
-轅
-轆
-轍
-轎
-轘
-轝
-轟
-轤
-辭
-辮
-辯
-辵
-辺
-辻
-込
-迴
-迵
-迺
-逈
-逋
-逌
-逎
-逕
-逖
-逤
-逨
-逴
-遄
-遊
-違
-遘
-遙
-遜
-遞
-遯
-遲
-遶
-遷
-遹
-遺
-遼
-邁
-邇
-邉
-邊
-邙
-邠
-邲
-邽
-邾
-郃
-郄
-郇
-郋
-郞
-郟
-郤
-郪
-郳
-郷
-郿
-鄃
-鄆
-鄋
-鄑
-鄒
-鄔
-鄖
-鄗
-鄘
-鄚
-鄜
-鄠
-鄤
-鄧
-鄩
-鄫
-鄰
-鄲
-鄳
-鄴
-酃
-酆
-酈
-酎
-酏
-酔
-酢
-酩
-酴
-酺
-酼
-醁
-醂
-醃
-醅
-醞
-醢
-醣
-醮
-醯
-醾
-醿
-釁
-釆
-釋
-釐
-釒
-釓
-釔
-釕
-釗
-釘
-釙
-釚
-釤
-釦
-釧
-釩
-釪
-釭
-釴
-釵
-釷
-釹
-釺
-鈀
-鈁
-鈄
-鈇
-鈈
-鈉
-鈊
-鈍
-鈏
-鈐
-鈑
-鈔
-鈕
-鈖
-鈞
-鈢
-鈣
-鈥
-鈦
-鈫
-鈮
-鈰
-鈳
-鈷
-鈸
-鈹
-鈾
-鈿
-鉀
-鉆
-鉈
-鉉
-鉋
-鉍
-鉏
-鉑
-鉓
-鉗
-鉚
-鉛
-鉞
-鉟
-鉤
-鉦
-鉬
-鉭
-鉲
-鉶
-鉷
-鉸
-鉻
-鉾
-鉿
-銂
-銃
-銅
-銋
-銍
-銓
-銕
-銖
-銚
-銜
-銠
-銣
-銥
-銦
-銨
-銩
-銪
-銫
-銬
-銱
-銲
-銳
-銶
-銹
-銻
-銼
-銾
-鋁
-鋅
-鋆
-鋇
-鋌
-鋏
-鋐
-鋒
-鋕
-鋗
-鋙
-鋡
-鋤
-鋥
-鋦
-鋨
-鋮
-鋯
-鋰
-鋱
-鋳
-鋶
-鋸
-鋹
-錀
-錏
-錐
-錒
-錕
-錘
-錚
-錞
-錟
-錠
-錡
-錢
-錨
-錫
-錬
-錮
-錳
-錸
-錻
-鍀
-鍇
-鍈
-鍉
-鍍
-鍏
-鍔
-鍘
-鍛
-鍝
-鍟
-鍠
-鍥
-鍩
-鍬
-鍱
-鍳
-鍶
-鍷
-鍺
-鍼
-鍾
-鎂
-鎅
-鎊
-鎌
-鎓
-鎔
-鎗
-鎘
-鎚
-鎛
-鎣
-鎦
-鎧
-鎪
-鎬
-鎭
-鎰
-鎳
-鎵
-鏃
-鏇
-鏈
-鏊
-鏌
-鏐
-鏑
-鏓
-鏗
-鏘
-鏝
-鏞
-鏟
-鏤
-鏦
-鏳
-鏴
-鏵
-鏷
-鏻
-鏽
-鐃
-鐇
-鐈
-鐓
-鐔
-鐙
-鐠
-鐤
-鐦
-鐧
-鐫
-鐬
-鐭
-鐮
-鐲
-鐳
-鐸
-鐺
-鐽
-鐿
-鑀
-鑁
-鑂
-鑄
-鑅
-鑊
-鑌
-鑑
-鑛
-鑠
-鑣
-鑨
-鑪
-鑭
-鑰
-鑲
-鑴
-鑷
-鑼
-鑾
-鑿
-閂
-閆
-閉
-閎
-閒
-閔
-閘
-閜
-閞
-閦
-閨
-閬
-閭
-閰
-閱
-閶
-閹
-閻
-閼
-閾
-閿
-闆
-闇
-闈
-闊
-闋
-闌
-闍
-闐
-闓
-闔
-闕
-闖
-闘
-闞
-闡
-闢
-闥
-阭
-阯
-陁
-陔
-陘
-陜
-陝
-陞
-陬
-陸
-険
-隄
-隈
-隊
-階
-隕
-隣
-險
-隰
-隱
-隲
-隳
-隴
-隷
-隸
-隻
-雋
-雑
-雖
-雛
-雝
-雩
-雫
-雱
-霅
-霈
-霊
-霑
-霙
-霤
-霧
-霨
-霶
-霽
-靁
-靂
-靄
-靉
-靚
-靫
-靬
-靭
-靺
-靼
-鞆
-鞏
-鞞
-鞥
-鞦
-鞨
-鞮
-鞴
-韁
-韃
-韆
-韋
-韌
-韑
-韙
-韜
-韞
-韠
-韡
-韮
-韺
-韾
-頁
-頃
-項
-須
-頊
-頌
-頍
-頎
-頏
-預
-頑
-頒
-頗
-領
-頜
-頠
-頦
-頫
-頴
-頵
-頷
-頸
-頹
-頻
-頼
-顆
-額
-顎
-顒
-顓
-顕
-顗
-願
-顙
-顛
-顥
-顫
-顰
-顱
-顳
-顴
-颮
-颯
-颱
-颶
-颺
-颼
-飆
-飈
-飠
-飡
-飢
-飥
-飩
-飪
-飫
-飭
-飴
-飽
-餃
-餄
-餉
-餌
-餎
-餒
-餓
-餗
-餚
-餛
-餞
-餡
-餵
-餺
-餾
-餿
-饋
-饌
-饑
-饒
-饗
-饞
-饟
-饢
-馘
-馛
-馦
-馭
-馯
-馱
-馳
-馼
-駁
-駄
-駅
-駆
-駐
-駑
-駒
-駔
-駘
-駙
-駛
-駝
-駟
-駢
-駭
-駰
-駱
-騁
-騂
-騄
-騅
-騋
-騎
-騏
-験
-騖
-騙
-騤
-騨
-騫
-騭
-騮
-騶
-騾
-驁
-驃
-驄
-驅
-驌
-驍
-驎
-驒
-驕
-驚
-驛
-驟
-驢
-驤
-驥
-驩
-驪
-骯
-髀
-髎
-髏
-髑
-髒
-髡
-髭
-髲
-髷
-髹
-鬄
-鬅
-鬆
-鬍
-鬚
-鬢
-鬥
-鬧
-鬨
-鬩
-鬪
-鬬
-鬮
-鬯
-鬱
-鬹
-鬻
-魃
-魈
-魋
-魍
-魎
-魕
-魘
-魛
-魞
-魟
-魣
-魨
-魩
-魮
-魯
-魴
-魷
-鮀
-鮁
-鮃
-鮄
-鮊
-鮋
-鮍
-鮐
-鮑
-鮒
-鮓
-鮗
-鮜
-鮟
-鮠
-鮡
-鮣
-鮨
-鮪
-鮫
-鮭
-鮰
-鮸
-鮹
-鮻
-鯀
-鯁
-鯃
-鯇
-鯊
-鯏
-鯒
-鯓
-鯔
-鯕
-鯖
-鯗
-鯙
-鯛
-鯡
-鯢
-鯤
-鯧
-鯪
-鯭
-鯮
-鯰
-鯶
-鯷
-鯻
-鯿
-鰂
-鰃
-鰆
-鰈
-鰉
-鰍
-鰏
-鰒
-鰓
-鰕
-鰗
-鰛
-鰜
-鰟
-鰣
-鰤
-鰧
-鰨
-鰩
-鰭
-鰮
-鰱
-鰲
-鰳
-鰶
-鰷
-鰹
-鰺
-鰻
-鰼
-鰾
-鱀
-鱂
-鱅
-鱇
-鱈
-鱉
-鱊
-鱒
-鱓
-鱔
-鱖
-鱗
-鱘
-鱚
-鱝
-鱟
-鱠
-鱣
-鱥
-鱧
-鱨
-鱮
-鱰
-鱲
-鱵
-鱷
-鱺
-鳧
-鳩
-鳰
-鳴
-鳶
-鳽
-鴆
-鴇
-鴉
-鴒
-鴓
-鴕
-鴗
-鴛
-鴝
-鴞
-鴟
-鴡
-鴣
-鴦
-鴫
-鴯
-鴰
-鴴
-鵂
-鵄
-鵎
-鵐
-鵑
-鵒
-鵓
-鵙
-鵜
-鵝
-鵞
-鵟
-鵠
-鵡
-鵪
-鵯
-鵰
-鵲
-鵵
-鵼
-鵾
-鶆
-鶇
-鶉
-鶒
-鶓
-鶘
-鶚
-鶡
-鶥
-鶩
-鶬
-鶯
-鶲
-鶹
-鶺
-鶻
-鶼
-鶿
-鷂
-鷉
-鷎
-鷓
-鷙
-鷚
-鷟
-鷥
-鷦
-鷫
-鷯
-鷲
-鷳
-鷸
-鸊
-鸌
-鸐
-鸑
-鸕
-鸘
-鸚
-鸛
-鸜
-鸝
-鸞
-鹮
-鹵
-鹹
-鹼
-麅
-麇
-麈
-麊
-麐
-麞
-麩
-麪
-麴
-麹
-麼
-麿
-黁
-黇
-黌
-黐
-黙
-黥
-黧
-黨
-黴
-黶
-黻
-黼
-黽
-黿
-鼂
-鼇
-鼈
-鼉
-鼐
-鼒
-鼕
-鼢
-鼩
-鼯
-鼱
-鼴
-鼷
-齒
-齕
-齡
-齣
-齦
-齧
-齲
-齶
-龎
-龐
-龑
-龔
-龕
-龜
-龝
-龠 
-ず
-梌
-叀
-晢
-媸
-錾
-鐖
-䰡
-櫬
-锱
-υ
-鼗
-媪
-澴
-苈
-眴
-𝜏
-缱
-𝜶
-조
-晡
-≡
-ࠀ
-н
-廇
-嗛
-篚
-ώ
-莰
-윤
-纚
-𢢞
-闼
-熌
-饎
-蓊
-倅
-년
-聭
-耩
-≅
-
-≺
-诌
-
-
-
-耰
-菗
-僦
-⇣
-甊
-冓
-缷
-枊
-沕
-𝐴
-❹
-형
-秾
-
-щ
-厹
-
-˗
-疔
-䩦
-髴
-⨂
-莏
-≧
-垆
-銌
-桤
-隤
-ギ
-벽
-⑸
-✘
-̣
-辶
-铼
-게
-へ
-獶
-藳
-祍
-黉
-跱
-⽬
-埙
-だ
-蓣
-亯
-구
-
-鹎
-
-⾃
-楩
-⌘
-汏
-虒
-谖
-
-﹜
-劖
-じ
-瑇
-㮑
-揕
-⇔
-𤔲
-薉
-𝑾
-硗
-〈
-は
-盍
-狽
-ж
-я
-挆
-槨
-γ
-阏
-襕
-𝜉
-❖
-└
-총
-시
-
-ν
-刲
-ด
-嬲
-绤
-𝐰
-飦
-扱
-帻
-辀
-廴
-к
-蔖
-–
-같
-熭
-巣
-
-裛
-𝑶
-蓺
-蔊
-그
-匳
-玚
-Ц
-璲
-련
-𨒅
-변
-㤵
-饫
-𨚵
-Ｘ
-筇
-镡
-ⅳ
-𝛿
-轸
-𝑭
-鋈
-鵩
-縁
-˙
-ɿ
-𝒴
-㝮
-𝜂
-栠
-橦
-緇
-肰
-
-跼
-䭜
-蜅
-訸
-㻶
-𝑉
-เ
-
-嚢
-鼔
-𝒆
-閫
-阃
-𥞹
-杪
-誊
-
-鲋
-骍
-τ
-莾
-凊
-﹡
-箚
-蛱
-樯
-喾
-幞
-欕
-搡
-戉
-瘖
-᙭
-砟
-ས
-∤
-ี
-メ
-𝝁
-穑
-渶
-𦬁
-서
-⊗
-穇
-⌊
-を
-鐻
-蘤
-≫
-◐
-汙
-蒒
-⑷
-蹨
-ｘ
-裥
-嶤
-ァ
-従
-침
-稂
-𪧶
-で
-𝑹
-⑫
-闩
-槫
-舮
-𝑿
-戁
-간
-戯
-
-ོ
-æ
-わ
-チ
-砉
-Ψ
-劂
-･
-В
-鬭
-钔
-盭
-黓
-⎯
-𝐏
-함
-钪
-𝑸
-澰
-래
-藒
-龃
-瞀
-伧
-♂
-¹
-ƞ
-澼
-餍
-倶
-ð
-
-嚱
-跬
-貙
-磿
-娬
-氿
-鹘
-𝐁
-摅
-ヱ
-傰
-พ
-湝
-ˆ
-Л
-翾
-≃
-에
-滫
-С
-嫕
-あ
-㈣
-ⅇ
-垧
-⺮
-∠
-躐
-硌
-眢
-乧
-𝑐
-泃
-轫
-↔
-㎝
-≜
-⽇
-撟
-⟹
-脿
-
-㸁
-靯
-う
-⁠
-懬
-搷
-瀓
-ˁ
-ⅲ
-훈
-
-お
-𝛄
-瓅
-葻
-猋
-ら
-⾳
-喣
-⽿
-č
-鈎
-⑤
-å
-阸
-름
-て
-圮
-⚫
-⻄
-胨
-琠
-戄
-箄
-𝒳
-鼍
-й
-⼲
-廪
-睃
-囫
-͞
-죄
-
-호
-み
-饩
-
-⊆
-х
-欚
-瘚
-≯
-瞗
-ž
-嗵
-근
-ま
-
-⾔
-罥
-ʹ
-鼃
-д
-✳
-ゃ
-悊
-𝐅
-영
-＠
-ɣ
-𝛷
-𝜁
-ǜ
-犄
-⽂
-ཆ
-胒
-﹦
-谫
-є
-・
-𝐻
-狺
-백
-舳
-𝑁
-ษ
-
-𝜓
-𝒦
-盕
-유
-𪯐
-茑
-礤
-거
-コ
-肂
-鸻
-ã
-⑬
-铚
-걸
-磳
-綷
-𝒚
-舭
-腚
-㈩
-榱
-𝐌
-畾
-馐
-罾
-∕
-𝔛
-𝑬
-ç
-楬
-櫽
-顼
-阋
-
-꺼
-諛
-̌
-้
-㮀
-乵
-沬
-⼀
-ư
-鲠
-䜩
-樉
-鹈
-搧
-轾
-䟒
-등
-𝝉
-잠
-짤
-า
-蘨
-愪
-ྟ
-慪
-鮝
-𝛑
-び
-𥞪
-𝐾
-レ
-교
-ྲ
-달
-𝐩
-
-殹
-踇
-狥
-ベ
-미
-매
-⑭
-钁
-Θ
-못
-𝜇
-侂
-ę
-ฟ
-邶
-諣
-颃
-𡢕
-昑
-𝒖
-讱
-﹤
-緵
-骢
-朢
-骘
-ℜ
-
-ゞ
-愬
-鹬
-
-ッ
-ར
-급
-‚
-鸶
-蒫
-餽
-蓃
-ข
-辠
-ğ
-氺
-暆
-笿
-迚
-甝
-ή
-徼
-旣
-ϖ
-ヲ
-倕
-匽
-蓱
-리
-剷
-ู
-逪
-
-나
-堋
-焠
-Δ
-炑
-爫
-蒖
-𝒓
-悫
-𝛱
-
-𝐮
-騧
-ⅴ
-饾
-贠
-𝚲
-崀
-磀
-柤
-肈
-⻮
-鶄
-狲
-跫
-지
-鳇
-痖
-跂
-秫
-ʒ
-합
-ไ
-迨
-𝜐
-
-屦
-𝐶
-;
-辎
-∵
-鴁
-撏
-ς
-⟶
-薮
-㟪
-犮
-ب
-ビ
-藡
-甏
-
-眡
-訿
-鉥
-媵
-
-柫
-𝒞
-ь
-萏
-ค
-트
-訮
-汚
-眚
-〞
-き
-ほ
-刖
-髄
-蘀
-や
-ة
-诹
-т
-ན
-𝒃
-掼
-䓁
-僥
-팰
-枵
-✔
-³
-ེ
-鼖
-屖
-鍮
-砇
-カ
-舐
-牴
-𝜎
-㡿
-攉
-⽤
-晅
-労
-蛕
-𝐽
-Ʃ
-く
-穽
-孥
-𝒏
-
-
-ɬ
-玦
-檮
-ョ
-∥
-중
-萯
-呲
-䰈
-새
-
-釶
-
-ɢ
-⊂
-臮
-
-梼
-デ
-骖
-ス
-蹩
-羼
-▽
-Π
-≪
-匛
-𝐼
-稊
-่
-茠
-䢉
-秝
-茐
-齎
-そ
-
-芕
-噚
-癉
-蹱
-蓜
-𝐬
-ϑ
-е
-瀋
-ϕ
-χ
-镟
-霂
-隒
-▱
-ヶ
-撄
-둔
-¢
-こ
-跲
-莻
-𝑠
-輮
-็
-堠
-푟
-赕
-◦
-ا
-런
-帒
-汘
-̱
-尥
-蘠
-𦟜
-옥
-腠
-夨
-⩾
-𝑝
-歯
-刱
-여
-け
-溘
-釰
-肍
-擗
-矱
-鍌
-芧
-술
-발
-鼫
-舾
-⼯
-𝝓
-ƒ
-怸
-པ
-𣐼
-疎
-铷
-Η
-⑺
-蒏
-림
-⃛
-゜
-褴
-𨒪
-れ
-揢
-さ
-櫫
-櫑
-䋎
-灋
-櫜
-诓
-❶
-𝐃
-Ｑ
-袳
-ℒ
-菂
-
-荙
-ℛ
-⁄
-堙
-贋
-̅
-鳏
-̂
-､
-茍
-泜
-𝑈
-즉
-噔
-
-
-迓
-Ⅸ
-❷
-이
-＿
-⾊
-Ö
-铥
-耹
-䶮
-
-무
-
-セ
-饳
-อ
-篾
-통
-‒
-ย
-덕
-말
-艨
-Ω
-𝐨
-螓
-澐
-巠
-⋅
-钶
-도
-鸱
-齍
-恑
-褛
-剟
-준
-勶
-𠟠
-ß
-箅
-𝑆
-悃
-蘥
-
-Ξ
-𝑘
-妣
-𝑖
-𝐑
-纡
-釿
-⺌
-ヴ
-𝕀
-涻
-箙
-塚
-
-⼠
-墈
-∷
-疴
-ク
-ㄕ
-𝒂
-蒪
-蓡
-
-鷇
-瘏
-𣹳
-橰
-嵚
-帀
-주
-ド
-盓
-爇
-φ
-觋
-𝜑
-钍
-화
-표
-Ɛ
-篰
-명
-週
-с
-蓛
-裢
-穜
-㱃
-玊
-鲕
-蒕
-箪
-⑯
-苽
-矦
-偰
-盝
-佊
-僨
-駉
-𝑳
-머
-ª
-絅
-
-锒
-
-苆
-ั
-𝛻
-碹
-咺
-竝
-и
-づ
-강
-辁
-́
-铽
-纩
-齑
-𝝎
-어
-ユ
-躡
-𝒄
-ซ
-畛
-鸰
-ླ
-
-骉
-❸
-揲
-廃
-湋
-𝑲
-
-
-旤
-
-蹷
-钌
-국
-豙
-鬳
-
-ɛ
-轳
-俜
-眄
-萮
-𝐡
-颵
-箓
-魑
-𝑅
-漍
-ℤ
-
-逡
-학
-浖
-ょ
-¬
-怴
-𝛤
-怿
-祌
-纥
-𝒑
-⃑
-棅
-笵
-낭
-栦
-⑰
-บ
-𝔽
-𝑇
-埝
-⽓
-孱
-埶
-匜
-鸼
-
-
-벌
-ル
-锸
-斫
-妟
-뽀
-昬
-댁
-ʂ
-暯
-夳
-ノ
-堞
-懘
-榼
-鞫
-오
-𝑡
-偑
-戗
-∴
-伥
-끝
-𬌗
-稯
-岜
-Ε
-犲
-𩓞
-연
-鹚
-
-ག
-诜
-嗍
-倥
-鳣
-庑
-屾
-雚
-
-椄
-颏
-酤
-𝒋
-欛
-
-း
-려
-缋
-¾
-ゴ
-籑
-笤
-鞛
-鏺
-蓒
-설
-緍
-⑩
-迀
-鼋
-ɮ
-위
-锪
-∨
-滆
-€
-躅
-鋓
-柀
-䐶
-啎
-𝛵
-骃
-ć
-갈
-卨
-い
-𝑺
-鸲
-壻
-偯
-𝑞
-譖
-곤
-溍
-
-噫
-순
-
-𝑽
-ы
-赑
-蓸
-鸮
-稃
-っ
-詗
-으
-⨀
-屮
-俦
-伛
-畱
-늬
-𝑂
-朼
-沰
-겨
-з
-骀
-鸩
-𝜈
-º
-苊
-诎
-皤
-
-하
-̀
-砑
-凷
-翄
-𝑛
-赪
-≮
-浗
-𝐍
-û
-オ
-ƹ
-𝜅
-묘
-曛
-鳊
-𝛩
-癹
-磒
-ば
-⑨
-礆
-
-乼
-
-∽
-褱
-藴
-縶
-觥
-に
-식
-凫
-
-佥
-槷
-阍
-䰍
-졸
-전
-葢
-㝸
-も
-⻔
-遽
-
-蹰
-𝛺
-裏
-།
-를
-
-ろ
-짭
-
-ぐ
-싶
-渰
-⊤
-浳
-൯
-∃
-옛
-蟞
-과
-芠
-飖
-⼆
-敶
-粝
-𥃩
-坿
-䩉
-𝑯
-｢
-矰
-
-사
-𝛶
-𝑎
-挐
-푎
-동
-ℝ
-Γ
-︃
-珒
-鹍
-κ
-鑓
-傁
-惓
-臿
-丣
-悒
-侔
-ñ
-訳
-櫭
-賛
-觏
-辂
-覅
-濓
-堿
-擪
-฀
-𝑵
-扨
-嫫
-珰
-
-寃
-𝒔
-曱
-髣
-인
-≌
-莵
-踳
-ⅱ
-Ø
-⌋
-¯
-挢
-̇
-﹪
-哕
-𦫳
-
-襛
-昳
-铙
-铫
-軱
-汔
-ネ
-
-躩
-옷
-ถ
-엄
-皊
-臑
-𧄝
-𝑃
-
-䢅
-𝐝
-𝒍
-ℱ
-𝐓
-蓾
-𝑻
-䋁
-裼
-개
-ത
-𝒊
-僪
-瞂
-𦞠
-요
-￣
-荍
-𝜔
-ф
-峣
-庋
-檏
-袢
-绬
-Σ
-향
-钫
-え
-枅
-≝
-荦
-들
-勍
-ö
-𝒕
-툰
-遬
-𝐵
-擧
-咢
-钘
-
-𝒢
-Ⅷ
-➢
-讧
-ω
-簟
-廐
-刳
-阘
-б
-⊘
-髟
-臓
-루
-⎧
-诳
-у
-诮
-蠪
-梹
-耤
-パ
-ن
-∆
-
-𝑫
-น
-べ
-坼
-
-𝑤
-褽
-憼
-심
-∇
-迖
-휆
-叚
-없
-⼿
-钖
-斠
-䪵
-胠
-𝜋
-殽
-剜
-⾝
-−
-慸
-𝛽
-椔
-⟩
-皦
-筚
-奰
-Å
-물
-𝒐
-嫱
-钆
-ï
-∪
-⇢
-ş
-
-㖞
-璗
-葸
-殢
-𝜺
-夲
-骒
-ち
-회
-선
-睒
-轡
-ξ
-
-鲧
-镞
-碜
-놈
-Å
-紴
-
-⇤
-ྷ
-⑪
-喟
-𦼰
-
-蔩
-埦
-𝜆
-耋
-˜
-한
-舣
-馓
-⑻
-
-ɐ
-椘
-し
-莐
-辔
-憰
-碛
-⁃
-飏
-颀
-跽
-⇥
-赀
-撺
-襜
-ɒ
-袧
-л
-정
-꾸
-콩
-
-박
-缑
-柈
-
-樲
-𝑮
-詘
-µ
-𝑷
-鹪
-𝛼
-차
-讬
-掯
-硎
-𝑨
-舄
-‹
-누
-バ
-ก
-萀
-兇
-숙
-貍
-
-踈
-친
-𝜽
-摰
-甿
-坜
-遑
-삼
-배
-Μ
-을
-譊
-沩
-빈
-飑
-钹
-镨
-鐉
-宭
-桗
-ə
-歺
-А
-⇒
-锞
-𝒪
-棊
-愓
-莶
-琲
-འ
-プ
-་
-𝐿
-艟
-欬
-ิ
-в
-ų
-纻
-㎎
-婄
-Ρ
-歟
-椢
-粜
-종
-خ
-ね
-剞
-베
-斄
-幠
-ト
-疛
-よ
-╳
-醳
-군
-諂
-芰
-穋
-禆
-길
-秊
-噙
-ｙ
-锓
-⁵
-
-拠
-Ĥ
-𝑒
-窬
-抈
-︰
-퐶
-铳
-각
-ش
-錉
-ù
-臝
-闶
-𝒟
-芐
-韎
-권
-萚
-
-ど
-羮
-镕
-欔
-瘐
-받
-𝑚
-𢦟
-髤
-腙
-⽽
-상
-铘
-장
-𤇈
-ニ
-凂
-ȷ
-육
-а
-살
-雠
-荑
-태
-穤
-ɯ
-
-圬
-楑
-단
-ง
-⾯
-λ
-⁰
-성
-萿
-缌
-
-毣
-矅
-
-푚
-˘
-貣
-∂
-은
-ė
-䌛
-경
-せ
-
-拫
-⅞
-餕
-鐨
-翭
-ื
-ɵ
-⺍
-Փ
-▬
-ว
-희
-짐
-屙
-洫
-ေ
-∏
-臜
-
-剸
-芓
-운
-∓
-계
-祔
-鶵
-𝝅
-柂
-訢
-禊
-扽
-恫
-𝜙
-狢
-勠
-ི
-𝜒
-จ
-铯
-ྱ
-𝑙
-蟇
-울
-莤
-牱
-𝒗
-詇
-靃
-殓
-栍
-踟
-ي
-
-鲄
-㓷
-贳
-ナ
-鲓
-𝒙
-薁
-Χ
-侪
-恌
-㰤
-목
-̄
-丱
-―
-𝛔
-𝑔
-
-鸷
-﹣
-籢
-脢
-δ
-窭
-‐
-阒
-석
-아
-ォ
-두
-𝐦
-浬
-搰
-褃
-
-ལ
-乇
-腘
-眊
-偬
-Ⅻ
-ℳ
-畤
-芟
-曈
-飧
-堌
-═
-谶
-櫝
-嬑
-冋
-嗌
-抜
-
-腜
-공
-𝜕
-ん
-鲭
-郐
-酓
-𝑍
-⾏
-⼹
-㐬
-고
-𝟑
-缯
-碤
-濩
-ʰ
-佻
-Υ
-∗
-賅
-집
-跹
-
-ɾ
-蔧
-다
-栫
-庰
-欤
-洿
-捾
-𝜍
-𝑄
-
-攆
-夂
-檿
-荜
-ц
-柖
-唅
-ท
-ɦ
-讦
-습
-锿
-
-纆
-檑
-殰
-歠
-鼑
-Ä
-و
-☑
-緦
-悁
-偞
-ส
-絭
-저
-踯
-騀
-쉰
-蒷
-揗
-儵
-ρ
-薅
-ャ
-‗
-犒
-旟
-
-승
-ང
-소
-𝛴
-瀜
-锜
-𣱼
-谳
-
-軑
-ポ
-楁
-𝑜
-袚
-ྐ
-Á
-𝑑
-鲀
-牾
-鬌
-푥
-¤
-呴
-‑
-✓
-민
-⼦
-ⅰ
-⽉
-擿
-ч
-➝
-가
-≳
-漥
-踖
-枧
-莝
-⻘
-傧
-𝑢
-ю
-赍
-ｑ
-絫
-о
-ア
-ℐ
-髫
-齢
-湎
-甓
-揿
-
-ℋ
-怹
-자
-⑦
-져
-椟
-鶟
-浕
-ー
-𝛂
-偾
-⃗
-喑
-鹡
-≦
-磽
-ⅆ
-
-葂
-鶱
-ン
-貇
-褡
-▴
-것
-喿
-つ
-闚
-
-盳
-𝟒
-雔
-洭
-殫
-楎
-£
-＾
-葲
-𝟖
-眗
-棸
-潏
-熕
-𝟐
-품
-သ
-樳
-⁴
-イ
-㈢
-렴
-脰
-돈
-⑮
-钲
-𝒘
-訽
-爟
-幨
-枻
-亷
-猃
-σ
-黩
-嘑
-榹
-⁡
-鍧
-𝑋
-枘
-𝑥
-원
-睚
-饔
-酲
-
-顸
-람
-𝐫
-脁
-½
-긴
-ʔ
-Ⅰ
-旆
-죽
-
-궐
-
-奡
-㭃
-杝
-忾
-ม
-掮
-饍
-摛
-쓰
-慊
-踣
-푅
-悽
-礅
-毄
-俓
-데
-冣
-만
-驖
-𤉣
-̃
-廾
-匵
-阇
-𤸫
-戣
-刌
-剕
-杅
-ο
-蒥
-ː
-癃
-蒬
-â
-À
-嗥
-우
-ケ
-｣
-聩
-ë
-吽
-檌
-苰
-⑹
-Ÿ
-
-⑥
-노
-
-˄
-鸫
-廛
-㱿
-鹛
-赟
-℅
-菿
-辳
-阼
-𝒇
-哋
-♀
-氕
-砤
-†
-舡
-偝
-飜
-넓
-鈜
-ầ
-닝
-禚
-匲
-〉
-Ф
-锊
-ϵ
-∙
-惛
-箧
-𝑦
-宬
-𝑀
-臙
-𩡶
-
-¡
-潀
-수
-敃
-か
-أ
-
-苌
-饘
-咝
-𝑼
-∘
-涷
-樍
-厣
-蝼
-墻
-Ñ
-秅
-︒
-∅
-↵
-葹
-ỽ
-𝑗
-た
-일
-蒊
-치
-竢
-¨
-佢
-潵
-櫼
-軵
-𧕿
-倨
-歱
-瘅
-𝐭
-黾
-脼
-ê
-땋
-鶷
-ё
-鹯
-掲
-＼
-𨳡
-
-Г
-ι
-탁
-溞
-殪
-菭
-𝛥
-擛
-録
-㥥
-∀
-锇
-锃
-편
-餬
-瘻
-ཟ
-豤
-로
-ɸ
-ℎ
-랑
-ʃ
-鼹
-臬
-ŋ
-巵
-譁
-ｗ
-窳
-蓔
-䉜
-浧
-酂
-⒀
-མ
-椠
-槖
-
-衄
-𨐨
-̿
-ご
-⺗
-顇
-𝒫
-搕
-ミ
-𪪋
-«
-䣛
-鹩
-鴈
-п
-는
-䋰
-𝛹
-犕
-呌
-𝒮
-𝑪
-鋎
-嚻
-杚
-䕊
-蠜
-ザ
-𝐂
-☐
-𥘔
-
-赜
-Ν
-廦
-瓾
-↦
-龉
-⽅
-棂
-𝜌
-큰
-踔
-ラ
-｡
-剤
-황
-⅜
-僈
-骈
-ɕ
-О
-м
-椑
-𝑟
-纇
-𝑓
-𝐖
-Ш
-⎦
-旹
-삶
-ึ
-囵
-
-す
-ⅈ
-ت
-踽
-陴
-餱
-ป
-막
-紟
-방
-剀
-簖
-闬
-キ
-鋉
-납
-タ
-谵
-詑
-족
-垔
-荋
-旰
-𥘸
-窾
-
-신
-𝐎
-𝛃
-
-
-﹒
-縰
-猲
-郘
-파
-⊕
-镘
-𠊃
-
-呔
-𝜗
-ʊ
-𝛬
-辏
-陭
-𝑕
-庴
-ʐ
-瀌
-倄
-蕞
-ل
-絷
-러
-든
-བ
-柅
-›
-傚
-睠
-Ⅺ
-饐
-蔮
-ɟ
-莈
-𤨨
-⋯
-犭
-𝜃
-𥹳
-초
-⎞
-遌
-眇
-蓗
-綅
-토
-裰
-
-⼼
-虘
-𝑌
-觯
-漶
-钤
-讒
-げ
-螬
-鲹
-咷
-蓞
-僂
-𝒉
-が
-桮
-포
-쟈
-柽
-ウ
-綟
-缟
-䁾
-钭
-烕
-厠
-
-孭
-礉
-­
-谲
-
-⼤
-𝒒
-旒
-㫄
-
-
-鳓
-挈
-재
-ད
-𧊒
-蝝
-𝐺
-懱
-芢
-
-ⅼ
-Ú
-𝑱
-翯
-芶
-厽
-遉
-鲒
-η
-𝛾
-趮
-虆
-汸
-嬖
-糈
-窸
-
-추
-棬
-懔
-硁
-ぶ
-抟
-胕
-𝑧
-⌦
-碫
-Ⅵ
-속
-𝐚
-
-Ç
-행
-Ɵ
-⑱
-贽
-箤
-р
-敒
-衤
-풍
-⊛
-慉
-ψ
-©
-광
-ℕ
-屣
-臌
-旵
-臁
-‡
-癎
-閡
-𡵂
-襐
-畟
-
-萪
-娒
-瘼
-庳
-천
-觌
-Α
-と
-奁
-煃
-؟
-◯
-의
-攎
-
-𝐞
-Ｊ
-𢦒
-❑
-벤
-𝐒
-リ
-蒉
-𝐱
-朹
-㈤
-„
-䗬
-Ι
-ཀ
-𡜵
-俣
-疬
-
-墥
-吣
-У
-榀
-絟
-
-旓
-𝐛
-𝜷
-瑮
-≔
-笾
-ζ
-김
-暵
-𝜹
-逶
-萙
-欇
-俧
-籴
-絰
-揶
-ǔ
-宂
-伩
-Ө
-菞
-梕
-エ
-蹚
-제
-Š
-沝
-
-𦳢
-𝒱
-揠
-ℏ
-𝐹
-箝
-규
-氒
-⼊
-鰌
-筮
-⼩
-대
-𝔾
-䄃
-𝐸
-﹕
-부
-
-刓
-ȵ
-缛
-기
-缊
-𝟎
-𨟻
-め
-捃
-⽚
-鍪
-灆
-迻
-⾦
-荗
-ｖ
-𡒊
-汍
-斲
-姕
-
-儴
-偒
-辤
-芀
-蝥
-ń
-臥
-椀
-㪚
-š
-담
-ø
-䈰
-睏
-テ
-﹐
-
-참
-楘
-𝒌
-劓
-ɪ
-醑
-绹
-諓
-𝛉
-ズ
-怼
-埘
-臽
-잡
-镢
-𝜖
-진
-踬
-谠
-﹥
-髺
-腞
-현
-嘭
-ʑ
-蓌
-〜
-锠
-蓶
-る
-
-∼
-枎
-緗
-薠
-芈
-耪
-𝒎
-謼
-
-瘳
-诨
-忤
-œ
-⇡
-
-鲣
-ⅵ
-Τ
-㯮
-
-㶲
-ⅹ
-䙴
-坴
-馑
-缹
-椦
-ô
-⼈
-フ
-誏
-э
-哙
-愎
-
-埽
-祲
-마
-殍
-菋
-懑
-
-辇
-鍤
-𝜀
-ɜ
-り
-𝐷
-㕞
-瑵
-
-蔨
-Ⅶ
-镴
-ภ
-𝝀
-𢶡
-⃝
-ơ
-柢
-𧴗
-ʁ
-攙
-
-な
-𝑏
-挴
-餧
-絇
-怄
-曏
-洟
-軷
-∉
-咍
-⎪
-樛
-𝑣
-웃
-椊
-黢
-𝑩
-誩
-伓
-戠
-橥
-⟨
-豰
-懥
-涖
-綘
-詬
-ွ
-˚
-刽
-ɑ
-격
-稖
-𝟏
-禝
-墦
-𝑊
-
-択
-檙
-∝
-颟
-诂
-𝐧
-踲
-𝜛
-𝑰
-
-鲬
-⁸
-ǎ
-문
-
-紬
-楲
-䊭
-枨
-膑
-õ
-던
-Ⅴ
-秏
-馔
-拊
-缗
-隠
-牀
-淲
-鬰
-綵
-鶑
-詎
-慙
-劒
-閲
-鎡
-淒
-屨
-鉢
-扃
-鳬
-閤
-馿
-翛
-駸
-蛩
-驂
-嵲
-覩
-牋
-湲
-蓴
-賸
-遡
-翫
-嫋
-惻
-妬
-罇
-龏
-鵷
-閙
-鎸
-朅
-巉
-僞
-洶
-磧
-筯
-慇
-鷁
-斾
-斸
-酹
-幘
-羶
-閽
-薤
-泝
-覯
-怱
-缾
-氳
-躊
-檝
-擣
-虀
-誚
-甃
-艤
-櫳
-醖
-壚
-涴
-崦
-秪
-潄
-濆
-駡
-坰
-闉
-縑
-躕
-颻
-燠
-輙
-鏁
-嶮
-薖
-輈
-綆
-覷
-蹔
-攄
-鐩
-鑱
-羃
-轓
-麤
-驀
-欵
-亙
-朮
-邐
-箠
-怳
-鋩
-鷃
-篘
-蔌
-諄
-旐
-慍
-欷
-頽
-蜺
-覊
-禋
-秔
-蜩
-嚬
-櫺
-軿
-痾
-笻
-猱
-毳
-泬
-竚
-齪
-搘
-欻
-釂
-嚥
-誑
-籩
-韉
-幙
-舠
-飣
-颭
-颸
-歔
-屧
-巇
-艫
-壖
-牓
-薝
-銛
-皪
-枿
-剗
-歘
-鸂
-邅
-衒
-荄
-鴂
-嫗
-顦
-瀼
-繄
-搆
-狖
-貰
-醆
-秖
-蹀
-頳
-纒
-憇
-溽
-澦
-讐
-灩
-箎
-螿
-鰥
-飀
-澒
-矻
-枌
-擡
-鷖
-齬
-纊
-挼
-齟
-錙
-屩
-蠧
-氅
-漭
-躚
-翺
-瘵
-螘
-鵶
-㶁
-斵
-饜
-岏
-䍦
-哢
-彴
-豗
-靨
-鋋
-禳
-覘
-鞚
-擻
-涘
-溷
-沴
-嶓
-褊
-罏
-齏
-醲
-繅
-舴
-釃
-厖
-闤
-閴
-藂
-譍
-糲
-籞
-躞
-餳
-遰
-倐
-嘖
-鷀
-暍
-韝
-蘺
-齁
-醽
-醨
-憀
-䕷
-跕
-拶
-垤
-鸎
-漙
-躭
-傴
-蕕
-嘒
-晻
-糵
-閈
-嫠
-斁
-鶗
-詶
-囘
-羇
-橛
-鞬
-磈
-粃
-阨
-塿
-敧
-氊
-芼
-襆
-迍
-鬛
-憒
-釅
-蓐
-奬
-頲
-髠
-抔
-葅
-槧
-跧
-揜
-渇
-餔
-罝
-裯
-蹁
-椶
-幰
-逰
-饁
-棃
-噀
-轔
-囁
-惸
-騑
-呪
-鬐
-綌
-醤
-䆉
-艣
-鐶
-夐
-摐
-鸇
-睎
-羝
-紼
-鞿
-噉
-磑
-闗
-筩
-駮
-蹌
-縢
-闠
-鬙
-谹
-榾
-觳
-皁
-晼
-啗
-簣
-騕
-蹣
-麰
-觧
-怊
-朞
-鱍
-蟣
-畚
-蠁
-舁
-瞇
-劚
-鰐
-籯
-鬖
-柮
-飱
-帟
-酇
-崿
-霪
-緌
-踆
-欃
-縟
-搦
-琖
-搥
-倀
-觫
-遝
-嚅
-聵
-藋
-筴
-喁
-窻
-穏
-牥
-鎩
-礲
-膴
-鞵
-醵
-斚
-縲
-裀
-齷
-騃
-袠
-谺
-靦
-帬
-鯈
-曀
-灔
-崷
-趂
-徯
-闃
-洧
-獪
-稏
-煢
-歈
-呶
-壈
-襃
-旴
-檟
-簦
-谽
-箵
-謡
-慝
-餖
-稌
-朣
-禖
-嚀
-嵂
-黷
-甖
-洑
-釡
-蕝
-甆
-翣
-篸
-隮
-滃
-裌
-蔀
-籖
-秬
-鷴
-啅
-慼
-捄
-咮
-睟
-譎
-嘷
-駃
-檥
-蹐
-窊
-駞
-雘
-趯
-篲
-讋
-睍
-毰
-憗
-鳷
-嚲
-圞
-歃
-緼
-賫
-籋
-繐
-麏
-灕
-礧
-歜
-飇
-鵁
-疢
-賖
-窆
-螮
-毹
-硉
-耡
-甔
-篛
-娭
-髩
-燋
-輜
-籧
-撝
-嬙
-徃
-驦
-𡏖
-麕
-馹
-覔
-鶠
-褷
-綍
-螗
-嗈
-彯
-篨
-炰
-鄮
-噞
-尅
-鷰
-鋭
-饉
-霢
-䔩
-坱
-裓
-帨
-忺
-豅
-栱
-謦
-傯
-誵
-骭
-潩
-鬒
-嵫
-悮
-扊
-扅
-轢
-惝
-臈
-舃
-鞾
-譟
-袵
-眎
-簏
-埸
-堧
-憸
-雰
-腷
-嵓
-隩
-趄
-墐
-褦
-艑
-狴
-玿
-竪
-恧
-姱
-抆
-恓
-霣
-躓
-鞲
-晬
-簴
-唼
-曵
-褕
-罣
-縐
-衘
-巃
-攲
-輀
-貎
-哳
-醭
-鋣
-僛
-迕
-蠭
-膓
-欝
-洊
-敺
-纎
-栟
-鞓
-蛬
-灺
-痏
-恡
-踸
-霔
-濵
-昻
-鉘
-楖
-竛
-竮
-窱
-幬
-慤
-儗
-黮
-嘐
-睆
-頇
-麑
-壼
-㦸
-顋
-瘥
-苖
-韈
-盻
-袷
-矼
-塼
-鐍
-傞
-苶
-吷
-噇
-鶖
-僣
-髧
-䅉
-鯫
-襏
-縳
-蠨
-痡
-髽
-剉
-蝱
-鄽
-匼
-嚚
-襫
-缿
-鵊
-燖
-忸
-摝
-攅
-牷
-氎
-騣
-颿
-虡
-腯
-漘
-矓
-祫
-顢
-綀
-弮
-柙
-蔾
-胾
-筤
-馽
-砆
-冩
-賙
-唶
-麛
-簜
-蹏
-屼
-鞶
-煑
-踠
-愀
-蠒
-頬
-韲
-戞
-畆
-笊
-搨
-捽
-絙
-覉
-澨
-趫
-矹
-穮
-愠
-劘
-轣
-卭
-鼪
-杕
-轗
-擐
-蚿
-恚
-檛
-𩕳
-靆
-轕
-餼
-頮
-槹
-蔉
-皜
-扄
-鮆
-轑
-蹡
-嵽
-甎
-蟈
-橅
-笴
-膰
-蕣
-澘
-髿
-樕
-褵
-蜋
-窼
-櫧
-雊
-胷
-嘵
-麄
-裋
-繢
-啐
-臛
-簁
-巓
-羜
-攧
-簮
-壊
-齩
-晹
-臲
-鬵
-齠
-媮
-幮
-壍
-蠛
-槜
-羖
-窓
-隃
-嚘
-輳
-籹
-凴
-崕
-獍
-嗸
-趦
-囅
-戺
-涬
-諉
-箯
-輊
-桹
-嵷
-㲲
-愊
-蒱
-洚
-赩
-輴
-幈
-齔
-嗁
-阽
-圠
-荈
-碔
-揎
-巀
-洏
-卼
-𨁝
-痁
-黳
-嗾
-䆗
-戃
-蕆
-頋
-悤
-掎
-㯝
-吚
-猘
-鮎
-鬴
-墁
-飋
-呿
-窀
-沲
-枒
-窌
-爼
-頞
-譡
-鶋
-湩
-㦬
-僾
-斒
-醼
-鶂
-磥
-揫
-犗
-齗
-鄶
-囏
-崪
-爞
-籓
-斮
-觝
-嵸
-驔
-䨴
-頺
-萑
-珓
-牸
-闒
-凘
-悢
-蟭
-濈
-嬄
-翽
-旍
-鶢
-罋
-輠
-怩
-頖
-趍
-壝
-嫮
-蕋
-踦
-轇
-眘
-巗
-嶭
-糓
-甽
-籺
-矟
-佖
-絏
-憮
-懡
-駈
-擕
-淟
-皡
-膋
-潨
-鳲
-趠
-麨
-頩
-漻
-輗
-墄
-賮
-㴩
-莟
-縦
-岝
-醻
-曚
-䙰
-噭
-醥
-筰
-躧
-踘
-鑕
-咈
-僶
-鶊
-鬂
-聼
-騐
-壒
-磎
-歗
-淈
-隟
-狃
-縋
-媻
-趲
-惙
-呫
-聮
-羾
-尫
-佽
-髼
-繋
-鬘
-旜
-疐
-阬
-䰐
-塈
-徤
-祊
-灂
-祅
-樷
-颾
-凟
-頀
-蠏
-塒
-衹
-婬
-裩
-粞
-憯
-匭
-筈
-盬
-霮
-黕
-靮
-伻
-緺
-瘝
-羑
-醸
-樝
-僎
-絓
-噆
-愞
-痗
-愽
-岊
-黤
-湑
-搉
-綯
-焮
-疉
-楛
-玼
-喤
-粔
-飂
-贐
-緉
-覰
-胔
-鞳
-摑
-墢
-斅
-誶
-僝
-鹺
-諌
-齅
-嵼
-讟
-冦
-脝
-婣
-緐
-茰
-飶
-欎
-慁
-抝
-瘉
-𡎺
-鈯
-瘃
-麫
-匊
-窞
-羓
-氄
-嚌
-姤
-橑
-駬
-冺
-騠
-㕙
-楶
-靸
-圎
-尀
-䙀
-鄏
-軃
-竁
-㹠
-刜
-剨
-罛
-鏹
-鬉
-簨
-藭
-藷
-僇
-瀫
-袨
-忮
-冡
-齯
-楪
-囋
-蟉
-醱
-尠
-牣
-攟
-袿
-齾
-甞
-啑
-潚
-樐
-絖
-酖
-觖
-骹
-嶅
-玃
-嫜
-廹
-儤
-矂
-艓
-挱
-骳
-嵳
-洴
-礓
-厪
-﨑
-禜
-籊
-瓻
-彛
-狁
-腪
-骾
-娯
-罻
-璅
-簳
-姢
-猰
-眹
-䴥
-堘
-搯
-怤
-緫
-聫
-涊
-熛
-輤
-䡾
-譌
-髇
-攛
-稭
-媕
-鬷
-跰
-縚
-鉧
-踧
-嚭
-襮
-藞
-滮
-颷
-荂
-蓰
-怫
-閧
-臕
-稛
-怗
-闑
-抶
-薶
-嶕
-瀺
-𥫗
-墝
-埆
-皥
-惷
-鞔
-鞺
-蟛
-瀡
-鎁
-酧
-恝
-齓
-嚄
-簔
-蟺
-㔶
-胹
-憖
-惄
-鸒
-貛
-軏
-縗
-蓻
-娵
-抺
-鼛
-虩
-歕
-矑
-繂
-襚
-倂
-廥
-諝
-虗
-弜
-兟
-繿
-偘
-翶
-肻
-棙
-斆
-碨
-醎
-蟢
-銙
-躠
-櫩
-椮
-絀
-鷾
-溳
-詖
-葓
-谼
-𦩘
-韔
-翿
-呑
-馡
-騊
-堁
-蓏
-䟃
-頟
-渢
-趑
-堄
-滛
-擫
-豭
-騩
-騘
-䍧
-彍
-忭
-餙
-馺
-忩
-芣
-矴
-噂
-滍
-慫
-𨍏
-怲
-扵
-搊
-昩
-嶻
-禬
-憃
-忼
-榰
-箾
-撁
-鈆
-袗
-脤
-騞
-哤
-螀
-靧
-梲
-囦
-魖
-褠
-䭔
-煆
-挃
-宷
-熉
-朘
-憭
-䒠
-謭
-鷤
-拕
-骫
-穾
-襭
-喓
-冞
-勩
-媢
-麚
-椓
-俙
-幐
-磝
-蜎
-灙
-漦
-㛹
-䭀
-㜷
-粻
-懟
-箳
-滣
-糉
-冐
-韤
-湱
-糭
-栳
-勌
-慱
-㸌
-罫
-筞
-霿
-躶
-玞
-磉
-罦
-祴
-媟
-猒
-擭
-恠
-嵁
-屴
-慆
-庬
-蟁
-㹀
-薧
-鷕
-渻
-朂
-愯
-齚
-蝻
-胏
-饙
-鳦
-鸃
-叅
-肧
-篂
-脗
-雺
-飰
-筀
-頥
-毶
-弌
-逓
-瞍
-絁
-鏚
-嚵
-攂
-醄
-奼
-獫
-絣
-靷
-畮
-褉
-棁
-揑
-楥
-橤
-襥
-蹮
-窔
-忪
-潠
-杇
-黲
-擄
-蚻
-蘙
-虙
-袐
-陿
-帊
-醟
-髖
-㞦
-鱭
-譸
-鮞
-栧
-扺
-脽
-擉
-岨
-黈
-餻
-佪
-遻
-鋟
-瞶
-廽
-懨
-墖
-玁
-籉
-宼
-鰋
-瑽
-垖
-酕
-漰
-戹
-蝛
-瑲
-阤
-褆
-儛
-䍽
-觕
-箘
-碯
-灨
-燀
-膇
-韀
-䳏
-詿
-禂
-韣
-踡
-碏
-尵
-莭
-庻
-篿
-狤
-㘞
-艭
-殱
-鵔
-槮
-猧
-劙
-獝
-㭊
-㾕
-蠚
-帤
-蹢
-蛚
-輼
-麀
-檃
-䰒
-䪫
-悾
-濳
-杗
-揾
-駏
-撦
-耈
-蟟
-狌
-鸖
-䨥
-餫
-鍰
-耉
-毚
-袽
-䱥
-慓
-䔿
-艖
-舋
-弰
-蠺
-嫓
-蚳
-髾
-喨
-鴐
-䍥
-韍
-柹
-掁
-薋
-攕
-飺
-凢
-麌
-嫰
-鑚
-黦
-葠
-吿
-栰
-踶
-芿
-穭
-啝
-筓
-褁
-稇
-顚
-䎘
-恇
-珷
-緪
-墠
-蛣
-蛜
-讕
-疻
-惎
-袝
-霡
-罸
-鬽
-苢
-喭
-飊
-唎
-澾
-襋
-皭
-廏
-蔿
-穊
-䝟
-駊
-獹
-夣
-褾
-慴
-軥
-讁
-軰
-瞷
-𡋯
-晜
-潗
-衋
-揵
-覼
-鱐
-醡
-䏰
-侐
-亁
-桞
-驘
-鬋
-鷽
-懞
-㵳
-儳
-豝
-傺
-搒
-縧
-硾
-䏶
-覻
-薍
-憝
-榠
-湆
-皵
-鎞
-菆
-糇
-矉
-搤
-紃
-峿
-磹
-甒
-琭
-𩥇
-菢
-禡
-渹
-刅
-迒
-敂
-蹜
-磓
-傪
-縿
-㕮
-涏
-䰀
-㡛
-韛
-犠
-餦
-圝
-焫
-㝢
-潬
-馵
-澟
-鱏
-譾
-㪍
-煼
-鍜
-窽
-紾
-堨
-䕸
-穅
-戅
-穄
-駴
-偫
-煗
-媠
-酘
-矬
-貆
-茞
-骩
-扠
-岞
-潓
-炧
-陊
-栭
-釱
-㡚
-篴
-耞
-鞉
-䋏
-𤫩
-椸
-儜
-痀
-謷
-潙
-寠
-牐
-嫭
-慅
-獧
-鈒
-欿
-薳
-蟂
-郲
-軨
-斨
-訦
-𠴲
-剺
-駪
-贙
-禫
-噣
-茢
-茙
-鄼
-揷
-魌
-䫻
-嗋
-噐
-侲
-諵
-𠺕
-挍
-䑳
-㨷
-槸
-靘
-㩧
-虣
-瑿
-衱
-襹
-餭
-㗶
-枑
-悋
-纑
-嶫
-儓
-髵
-甗
-榝
-㗭
-贗
-熸
-嬃
-礌
-偭
-樠
-栮
-鷼
-鵀
-澬
-眂
-牿
-骴
-呞
-爕
-牎
-巹
-帉
-砠
-梴
-䛏
-攃
-餁
-哿
-蹝
-崺
-閌
-醝
-臡
-麖
-駼
-賵
-夘
-骻
-愡
-俔
-諐
-觩
-莂
-饈
-殣
-溠
-冱
-埓
-厫
-虥
-芄
-慽
-竃
-埿
-仭
-褼
-倛
-韸
-牗
-幖
-禈
-穧
-蜧
-諞
-脞
-蝃
-飃
-煁
-涒
-谾
-覢
-赮
-鼘
-艗
-䶉
-鴥
-轒
-睅
-傔
-惵
-唈
-懆
-磣
-膢
-堶
-囈
-瘕
-誷
-瑘
-絝
-鬈
-嘽
-鷅
-梜
-喎
-鼟
-㟧
-劻
-眑
-剴
-痎
-餟
-庌
-菷
-梐
-吺
-躘
-慞
-罼
-穨
-摏
-釄
-莋
-呺
-砅
-鴽
-㘭
-㟅
-艴
-犉
-籕
-跐
-惏
-陗
-刋
-襘
-醹
-紽
-痌
-㗀
-撋
-陼
-駷
-艼
-踼
-癏
-慠
-趒
-邍
-姞
-䂬
-堲
-苙
-椌
-嗃
-挶
-岯
-禗
-嵔
-觡
-豜
-睩
-㒿
-塠
-燂
-扤
-恟
-鬝
-鬇
-鬡
-揳
-霠
-㗫
-苐
-蒀
-圌
-戭
-䖃
-𥈭
-勮
-耝
-轞
-胮
-墯
-枮
-罿
-浺
-綪
-爓
-蘃
-襍
-轜
-閠
-畽
-鄊
-嶆
-籭
-蠯
-陑
-瘽
-迆
-賷
-䍡
-韂
-躃
-禴
-簄
-瓟
-碐
-躨
-侜
-岍
-䃸
-趚
-髐
-榅
-粣
-屝
-鴃
-圁
-蝜
-黫
-僽
-丗
-靣
-湏
-抏
-㟽
-跙
-餤
-朙
-㹞
-瞖
-繣
-㨫
-罙
-糒
-惉
-葽
-鼮
-蕳
-豏
-𥱼
-鵮
-獦
-悕
-𠴨
-闟
-惽
-慿
-隉
-椷
-𩅰
-艛
-眽
-凓
-儃
-奨
-埀
-瑫
-駚
-濇
-緶
-峉
-礨
-髢
-瞯
-壥
-姡
-㟯
-髬
-啀
-㶿
-歅
-殀
-縩
-疈
-鳸
-霳
-稬
-圊
-彚
-裠
-埳
-褋
-㔩
-矲
-剶
-硋
-聦
-峞
-浰
-窵
-嘂
-睘
-簵
-腒
-韘
-躣
-甈
-忲
-舽
-襂
-硠
-脃
-鐏
-奯
-脧
-矕
-䠞
-駹
-豶
-訑
-柸
-鰅
-瘨
-趿
-糦
-蟏
-饛
-尰
-諑
-汃
-毺
-鋃
-絚
-馧
-艬
-枍
-爊
-峗
-泙
-碖
-鵕
-尩
-閗
-𤧚
-幩
-塉
-箊
-覂
-玒
-橧
-謟
-庨
-籔
-欑
-厎
-尭
-氉
-蠈
-䓞
-矙
-梡
-瀩
-溔
-煴
-蔲
-僬
-嵢
-梩
-弝
-𣙙
-鞟
-敉
-鮚
-湠
-鐐
-爣
-裻
-䶎
-𦨴
-謿
-垾
-蝂
-睂
-癙
-韽
-㟳
-桒
-鳿
-樏
-峛
-瑉
-僄
-顣
-衺
-殗
-肦
-圑
-朒
-喌
-犦
-㰅
-疁
-氃
-吰
-陻
-盰
-娀
-魶
-㖃
-曒
-娿
-獱
-孏
-酅
-蝡
-齰
-莬
-鄀
-逥
-挿
-觵
-縆
-㟝
-繍
-碙
-㑂
-䎳
-兾
-壸
-賝
-桯
-跁
-跒
-蔍
-舼
-忀
-懭
-媌
-罭
-菵
-狔
-靿
-拪
-㲉
-䔲
-嬀
-鵽
-涳
-朾
-𡸣
-𢫫
-虈
-㜮
-顑
-櫋
-蔪
-旝
-湡
-蹛
-稆
-唽
-㟏
-熂
-龡
-煟
-韅
-韐
-慂
-剳
-掫
-兠
-摋
-羫
-璊
-鵻
-駓
-佌
-蜹
-晲
-矒
-玅
-剰
-斶
-紖
-懴
-駜
-羢
-麳
-㳷
-馞
-爥
-鍚
-鑢
-螵
-嗺
-鏨
-𠙶
-疪
-鷔
-鮧
-轊
-栘
-鼜
-睗
-蟘
-枓
-䖟
-剠
-瞤
-圛
-椳
-籸
-䪌
-鯹
-湌
-丳
-賧
-縭
-檾
-𦨻
-撆
-䩫
-磢
-惥
-譀
-罤
-鞸
-鉎
-㶏
-膁
-甋
-瓀
-懹
-槢
-硊
-弆
-琫
-嵠
-駻
-湢
-杮
-䌨
-訹
-藇
-穯
-蠉
-曭
-蹎
-詄
-毷
-𩃎
-熁
-灜
-蜫
-蜳
-昈
-帩
-鈋
-䐹
-顖
-鄹
-匶
-毾
-礜
-堭
-婞
-鷿
-㙞
-詀
-瘮
-䫜
-㾪
-捘
-屫
-誧
-䲔
-閍
-蒳
-㬋
-遟
-嶀
-葐
-蜼
-㻱
-曡
-䃜
-濴
-䦱
-霫
-譆
-霋
-蕰
-襓
-氋
-鴷
-魦
-㩻
-㡠
-灉
-贑
-燑
-峝
-輷
-烻
-耼
-螉
-跜
-豩
-㑃
-藙
-鋂
-胐
-𣔻
-紒
-瓓
-塯
-辴
-趷
-堛
-㒟
-㗲
-㬊
-䄡
-卄
-姧
-猓
-躗
-覤
-醊
-兎
-罯
-痯
-覸
-詉
-癿
-岋
-歝
-茟
-㘆
-㮰
-淜
-𥉌
-㫰
-鈌
-毵
-狉
-贜
-峬
-汻
-誖
-烓
-睋
-潎
-䲺
-㠓
-歖
-𠜱
-槵
-熚
-萷
-磤
-絸
-鷞
-聻
-屷
-㝵
-諕
-瘂
-㺷
-蚰
-柦
-䍐
-泿
-礰
-摎
-㜕
-㻞
-洓
-喍
-囌
-囐
-䙱
-腨
-妉
-鄛
-鄥
-㵝
-輧
-鱄
-騟
-鈚
-廜
-𨗨
-㶼
-膞
-崯
-硞
-萆
-眒
-譩
-揬
-藑
-匌
-㠾
-㥏
-㢮
-䕢
-帣
-酭
-枦
-孅
-鞙
-丷
-鍭
-䤴
-餂
-愗
-冘
-埛
-㒇
-郕
-蔯
-簰
-刔
-蠩
-耏
-鞹
-𧑅
-觹
-䐑
-磶
-蹵
-鵃
-耛
-蓤
-臄
-轙
-庤
-㒩
-翐
-榥
-晀
-輣
-蟚
-拲
-皠
-穱
-䃔
-䃧
-窡
-絍
-礿
-鑞
-栯
-㾓
-掿
-厞
-淂
-撶
-伹
-鹻
-軓
-岹
-蚷
-榸
-刾
-艂
-㤝
-塕
-蚔
-藾
-攓
-鏬
-珫
-黪
-蟧
-猭
-漑
-粺
-驆
-撘
-亾
-㼌
-蝑
-澓
-揞
-欱
-愶
-泲
-醷
-螴
-芚
-絻
-轃
-漮
-唪
-岉
-鬀
-䱹
-齖
-䂓
-趢
-荓
-覶
-鯾
-諿
-槥
-嚆
-爢
-瓬
-笐
-篢
-舝
-襵
-鎒
-𤝞
-肭
-瘇
-笓
-餑
-豋
-湗
-緎
-肐
-胲
-掤
-潫
-䖴
-𠎝
-𨺗
-諢
-毈
-寱
-唲
-䃭
-峮
-狘
-韊
-䬝
-呰
-㹱
-碞
-畞
-㠌
-黭
-蚘
-豵
-穥
-尯
-㳇
-隵
-灇
-壜
-楰
-彲
-甤
-綹
-旞
-𡏟
-曁
-喩
-𥲤
-郈
-塺
-訧
-絿
-掔
-蠮
-𡱰
-䃺
-宻
-灎
-羵
-𨠵
-糚
-摉
-壷
-勴
-瑃
-鎝
-𥜥
-婥
-鬺
-扢
-肣
-溰
-磩
-耇
-宎
-㔇
-霱
-敚
-汳
-鏄
-儹
-隥
-㿉
-膆
-崏
-𦭵
-郔
-扂
-垗
-㳂
-礛
-缻
-垜
-晱
-訩
-蘪
-珇
-怮
-垝
-㔢
-憛
-痝
-蟨
-鞁
-鶤
-肎
-傝
-䢆
-䰄
-𥊚
-㖀
-㠭
-壵
-墋
-㠔
-橜
-怓
-蚹
-塛
-憪
-鋝
-腶
-嶾
-翍
-溓
-齼
-蔂
-䃂
-鉺
-攑
-瓐
-泎
-眤
-邘
-崝
-稡
-愸
-髥
-輹
-詨
-髆
-麃
-虤
-洐
-婐
-挏
-峑
-嶣
-篬
-葄
-瑎
-瓉
-㳅
-葼
-姙
-䪜
-𩇕
-焭
-剚
-濪
-霵
-僒
-
-羭
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt
deleted file mode 100644
index 567898b49de2707853454682f05e0c504c0085b9..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_dict.txt
+++ /dev/null
@@ -1,18383 +0,0 @@
-　
-一
-乙
-二
-十
-丁
-厂
-七
-卜
-八
-人
-入
-儿
-匕
-几
-九
-刁
-了
-刀
-力
-乃
-又
-三
-干
-于
-亏
-工
-土
-士
-才
-下
-寸
-大
-丈
-与
-万
-上
-小
-口
-山
-巾
-千
-乞
-川
-亿
-个
-夕
-久
-么
-勺
-凡
-丸
-及
-广
-亡
-门
-丫
-义
-之
-尸
-己
-已
-巳
-弓
-子
-卫
-也
-女
-刃
-飞
-习
-叉
-马
-乡
-丰
-王
-开
-井
-天
-夫
-元
-无
-云
-专
-丐
-扎
-艺
-木
-五
-支
-厅
-不
-犬
-太
-区
-历
-歹
-友
-尤
-匹
-车
-巨
-牙
-屯
-戈
-比
-互
-切
-瓦
-止
-少
-曰
-日
-中
-贝
-冈
-内
-水
-见
-午
-牛
-手
-气
-毛
-壬
-升
-夭
-长
-仁
-什
-片
-仆
-化
-仇
-币
-仍
-仅
-斤
-爪
-反
-介
-父
-从
-仑
-今
-凶
-分
-乏
-公
-仓
-月
-氏
-勿
-欠
-风
-丹
-匀
-乌
-勾
-凤
-六
-文
-亢
-方
-火
-为
-斗
-忆
-计
-订
-户
-认
-冗
-讥
-心
-尺
-引
-丑
-巴
-孔
-队
-办
-以
-允
-予
-邓
-劝
-双
-书
-幻
-玉
-刊
-未
-末
-示
-击
-打
-巧
-正
-扑
-卉
-扒
-功
-扔
-去
-甘
-世
-艾
-古
-节
-本
-术
-可
-丙
-左
-厉
-石
-右
-布
-夯
-戊
-龙
-平
-灭
-轧
-东
-卡
-北
-占
-凸
-卢
-业
-旧
-帅
-归
-旦
-目
-且
-叶
-甲
-申
-叮
-电
-号
-田
-由
-只
-叭
-史
-央
-兄
-叽
-叼
-叫
-叩
-叨
-另
-叹
-冉
-皿
-凹
-囚
-四
-生
-矢
-失
-乍
-禾
-丘
-付
-仗
-代
-仙
-们
-仪
-白
-仔
-他
-斥
-瓜
-乎
-丛
-令
-用
-甩
-印
-尔
-乐
-句
-匆
-册
-卯
-犯
-外
-处
-冬
-鸟
-务
-包
-饥
-主
-市
-立
-冯
-玄
-闪
-兰
-半
-汁
-汇
-头
-汉
-宁
-穴
-它
-讨
-写
-让
-礼
-训
-议
-必
-讯
-记
-永
-司
-尼
-民
-弗
-弘
-出
-辽
-奶
-奴
-召
-加
-皮
-边
-孕
-发
-圣
-对
-台
-矛
-纠
-母
-幼
-丝
-邦
-式
-迂
-刑
-戎
-动
-扛
-寺
-吉
-扣
-考
-托
-老
-巩
-圾
-执
-扩
-扫
-地
-场
-扬
-耳
-芋
-共
-芒
-亚
-芝
-朽
-朴
-机
-权
-过
-臣
-吏
-再
-协
-西
-压
-厌
-戌
-在
-百
-有
-存
-而
-页
-匠
-夸
-夺
-灰
-达
-列
-死
-成
-夹
-夷
-轨
-邪
-尧
-划
-迈
-毕
-至
-此
-贞
-师
-尘
-尖
-劣
-光
-当
-早
-吁
-吐
-吓
-虫
-曲
-团
-吕
-同
-吊
-吃
-因
-吸
-吗
-吆
-屿
-屹
-岁
-帆
-回
-岂
-则
-刚
-网
-肉
-年
-朱
-先
-丢
-廷
-舌
-竹
-迁
-乔
-迄
-伟
-传
-乒
-乓
-休
-伍
-伏
-优
-臼
-伐
-延
-仲
-件
-任
-伤
-价
-伦
-份
-华
-仰
-仿
-伙
-伪
-自
-伊
-血
-向
-似
-后
-行
-舟
-全
-会
-杀
-合
-兆
-企
-众
-爷
-伞
-创
-肌
-肋
-朵
-杂
-危
-旬
-旨
-旭
-负
-匈
-名
-各
-多
-争
-色
-壮
-冲
-妆
-冰
-庄
-庆
-亦
-刘
-齐
-交
-衣
-次
-产
-决
-亥
-充
-妄
-闭
-问
-闯
-羊
-并
-关
-米
-灯
-州
-汗
-污
-江
-汛
-池
-汝
-汤
-忙
-兴
-宇
-守
-宅
-字
-安
-讲
-讳
-军
-讶
-许
-讹
-论
-讼
-农
-讽
-设
-访
-诀
-寻
-那
-迅
-尽
-导
-异
-弛
-孙
-阵
-阳
-收
-阶
-阴
-防
-奸
-如
-妇
-妃
-好
-她
-妈
-戏
-羽
-观
-欢
-买
-红
-驮
-纤
-驯
-约
-级
-纪
-驰
-纫
-巡
-寿
-弄
-麦
-玖
-玛
-形
-进
-戒
-吞
-远
-违
-韧
-运
-扶
-抚
-坛
-技
-坏
-抠
-扰
-扼
-拒
-找
-批
-址
-扯
-走
-抄
-贡
-汞
-坝
-攻
-赤
-折
-抓
-扳
-抡
-扮
-抢
-孝
-坎
-均
-抑
-抛
-投
-坟
-坑
-抗
-坊
-抖
-护
-壳
-志
-块
-扭
-声
-把
-报
-拟
-却
-抒
-劫
-芙
-芜
-苇
-芽
-花
-芹
-芥
-芬
-苍
-芳
-严
-芦
-芯
-劳
-克
-芭
-苏
-杆
-杠
-杜
-材
-村
-杖
-杏
-杉
-巫
-极
-李
-杨
-求
-甫
-匣
-更
-束
-吾
-豆
-两
-酉
-丽
-医
-辰
-励
-否
-还
-尬
-歼
-来
-连
-轩
-步
-卤
-坚
-肖
-旱
-盯
-呈
-时
-吴
-助
-县
-里
-呆
-吱
-吠
-呕
-园
-旷
-围
-呀
-吨
-足
-邮
-男
-困
-吵
-串
-员
-呐
-听
-吟
-吩
-呛
-吻
-吹
-呜
-吭
-吧
-邑
-吼
-囤
-别
-吮
-岖
-岗
-帐
-财
-针
-钉
-牡
-告
-我
-乱
-利
-秃
-秀
-私
-每
-兵
-估
-体
-何
-佐
-佑
-但
-伸
-佃
-作
-伯
-伶
-佣
-低
-你
-住
-位
-伴
-身
-皂
-伺
-佛
-囱
-近
-彻
-役
-返
-余
-希
-坐
-谷
-妥
-含
-邻
-岔
-肝
-肛
-肚
-肘
-肠
-龟
-甸
-免
-狂
-犹
-狈
-角
-删
-条
-彤
-卵
-灸
-岛
-刨
-迎
-饭
-饮
-系
-言
-冻
-状
-亩
-况
-床
-库
-庇
-疗
-吝
-应
-这
-冷
-庐
-序
-辛
-弃
-冶
-忘
-闰
-闲
-间
-闷
-判
-兑
-灶
-灿
-灼
-弟
-汪
-沐
-沛
-汰
-沥
-沙
-汽
-沃
-沦
-汹
-泛
-沧
-没
-沟
-沪
-沈
-沉
-沁
-怀
-忧
-忱
-快
-完
-宋
-宏
-牢
-究
-穷
-灾
-良
-证
-启
-评
-补
-初
-社
-祀
-识
-诈
-诉
-罕
-诊
-词
-译
-君
-灵
-即
-层
-屁
-尿
-尾
-迟
-局
-改
-张
-忌
-际
-陆
-阿
-陈
-阻
-附
-坠
-妓
-妙
-妖
-姊
-妨
-妒
-努
-忍
-劲
-矣
-鸡
-纬
-驱
-纯
-纱
-纲
-纳
-驳
-纵
-纷
-纸
-纹
-纺
-驴
-纽
-奉
-玩
-环
-武
-青
-责
-现
-玫
-表
-规
-抹
-卦
-坷
-坯
-拓
-拢
-拔
-坪
-拣
-坦
-担
-坤
-押
-抽
-拐
-拖
-者
-拍
-顶
-拆
-拎
-拥
-抵
-拘
-势
-抱
-拄
-垃
-拉
-拦
-幸
-拌
-拧
-拂
-拙
-招
-坡
-披
-拨
-择
-抬
-拇
-拗
-其
-取
-茉
-苦
-昔
-苛
-若
-茂
-苹
-苗
-英
-苟
-苑
-苞
-范
-直
-茁
-茄
-茎
-苔
-茅
-枉
-林
-枝
-杯
-枢
-柜
-枚
-析
-板
-松
-枪
-枫
-构
-杭
-杰
-述
-枕
-丧
-或
-画
-卧
-事
-刺
-枣
-雨
-卖
-郁
-矾
-矿
-码
-厕
-奈
-奔
-奇
-奋
-态
-欧
-殴
-垄
-妻
-轰
-顷
-转
-斩
-轮
-软
-到
-非
-叔
-歧
-肯
-齿
-些
-卓
-虎
-虏
-肾
-贤
-尚
-旺
-具
-味
-果
-昆
-国
-哎
-咕
-昌
-呵
-畅
-明
-易
-咙
-昂
-迪
-典
-固
-忠
-呻
-咒
-咋
-咐
-呼
-鸣
-咏
-呢
-咄
-咖
-岸
-岩
-帖
-罗
-帜
-帕
-岭
-凯
-败
-账
-贩
-贬
-购
-贮
-图
-钓
-制
-知
-迭
-氛
-垂
-牧
-物
-乖
-刮
-秆
-和
-季
-委
-秉
-佳
-侍
-岳
-供
-使
-例
-侠
-侥
-版
-侄
-侦
-侣
-侧
-凭
-侨
-佩
-货
-侈
-依
-卑
-的
-迫
-质
-欣
-征
-往
-爬
-彼
-径
-所
-舍
-金
-刹
-命
-肴
-斧
-爸
-采
-觅
-受
-乳
-贪
-念
-贫
-忿
-肤
-肺
-肢
-肿
-胀
-朋
-股
-肮
-肪
-肥
-服
-胁
-周
-昏
-鱼
-兔
-狐
-忽
-狗
-狞
-备
-饰
-饱
-饲
-变
-京
-享
-庞
-店
-夜
-庙
-府
-底
-疟
-疙
-疚
-剂
-卒
-郊
-庚
-废
-净
-盲
-放
-刻
-育
-氓
-闸
-闹
-郑
-券
-卷
-单
-炬
-炒
-炊
-炕
-炎
-炉
-沫
-浅
-法
-泄
-沽
-河
-沾
-泪
-沮
-油
-泊
-沿
-泡
-注
-泣
-泞
-泻
-泌
-泳
-泥
-沸
-沼
-波
-泼
-泽
-治
-怔
-怯
-怖
-性
-怕
-怜
-怪
-怡
-学
-宝
-宗
-定
-宠
-宜
-审
-宙
-官
-空
-帘
-宛
-实
-试
-郎
-诗
-肩
-房
-诚
-衬
-衫
-视
-祈
-话
-诞
-诡
-询
-该
-详
-建
-肃
-录
-隶
-帚
-屉
-居
-届
-刷
-屈
-弧
-弥
-弦
-承
-孟
-陋
-陌
-孤
-陕
-降
-函
-限
-妹
-姑
-姐
-姓
-妮
-始
-姆
-迢
-驾
-叁
-参
-艰
-线
-练
-组
-绅
-细
-驶
-织
-驹
-终
-驻
-绊
-驼
-绍
-绎
-经
-贯
-契
-贰
-奏
-春
-帮
-玷
-珍
-玲
-玻
-毒
-型
-拭
-挂
-封
-持
-拷
-拱
-项
-垮
-挎
-城
-挟
-挠
-政
-赴
-赵
-挡
-拽
-哉
-挺
-括
-垢
-拴
-拾
-挑
-垛
-指
-垫
-挣
-挤
-拼
-挖
-按
-挥
-挪
-拯
-某
-甚
-荆
-茸
-革
-茬
-荐
-巷
-带
-草
-茧
-茵
-茶
-荒
-茫
-荡
-荣
-荤
-荧
-故
-胡
-荫
-荔
-南
-药
-标
-栈
-柑
-枯
-柄
-栋
-相
-查
-柏
-栅
-柳
-柱
-柿
-栏
-柠
-树
-勃
-要
-柬
-咸
-威
-歪
-研
-砖
-厘
-厚
-砌
-砂
-泵
-砚
-砍
-面
-耐
-耍
-牵
-鸥
-残
-殃
-轴
-轻
-鸦
-皆
-韭
-背
-战
-点
-虐
-临
-览
-竖
-省
-削
-尝
-昧
-盹
-是
-盼
-眨
-哇
-哄
-哑
-显
-冒
-映
-星
-昨
-咧
-昭
-畏
-趴
-胃
-贵
-界
-虹
-虾
-蚁
-思
-蚂
-虽
-品
-咽
-骂
-勋
-哗
-咱
-响
-哈
-哆
-咬
-咳
-咪
-哪
-哟
-炭
-峡
-罚
-贱
-贴
-贻
-骨
-幽
-钙
-钝
-钞
-钟
-钢
-钠
-钥
-钦
-钧
-钩
-钮
-卸
-缸
-拜
-看
-矩
-毡
-氢
-怎
-牲
-选
-适
-秒
-香
-种
-秋
-科
-重
-复
-竿
-段
-便
-俩
-贷
-顺
-修
-俏
-保
-促
-俄
-俐
-侮
-俭
-俗
-俘
-信
-皇
-泉
-鬼
-侵
-禹
-侯
-追
-俊
-盾
-待
-徊
-衍
-律
-很
-须
-叙
-剑
-逃
-食
-盆
-胚
-胧
-胆
-胜
-胞
-胖
-脉
-胎
-勉
-狭
-狮
-独
-狰
-狡
-狱
-狠
-贸
-怨
-急
-饵
-饶
-蚀
-饺
-饼
-峦
-弯
-将
-奖
-哀
-亭
-亮
-度
-迹
-庭
-疮
-疯
-疫
-疤
-咨
-姿
-亲
-音
-帝
-施
-闺
-闻
-闽
-阀
-阁
-差
-养
-美
-姜
-叛
-送
-类
-迷
-籽
-娄
-前
-首
-逆
-兹
-总
-炼
-炸
-烁
-炮
-炫
-烂
-剃
-洼
-洁
-洪
-洒
-柒
-浇
-浊
-洞
-测
-洗
-活
-派
-洽
-染
-洛
-浏
-济
-洋
-洲
-浑
-浓
-津
-恃
-恒
-恢
-恍
-恬
-恤
-恰
-恼
-恨
-举
-觉
-宣
-宦
-室
-宫
-宪
-突
-穿
-窃
-客
-诫
-冠
-诬
-语
-扁
-袄
-祖
-神
-祝
-祠
-误
-诱
-诲
-说
-诵
-垦
-退
-既
-屋
-昼
-屏
-屎
-费
-陡
-逊
-眉
-孩
-陨
-除
-险
-院
-娃
-姥
-姨
-姻
-娇
-姚
-娜
-怒
-架
-贺
-盈
-勇
-怠
-癸
-蚤
-柔
-垒
-绑
-绒
-结
-绕
-骄
-绘
-给
-绚
-骆
-络
-绝
-绞
-骇
-统
-耕
-耘
-耗
-耙
-艳
-泰
-秦
-珠
-班
-素
-匿
-蚕
-顽
-盏
-匪
-捞
-栽
-捕
-埂
-捂
-振
-载
-赶
-起
-盐
-捎
-捍
-捏
-埋
-捉
-捆
-捐
-损
-袁
-捌
-都
-哲
-逝
-捡
-挫
-换
-挽
-挚
-热
-恐
-捣
-壶
-捅
-埃
-挨
-耻
-耿
-耽
-聂
-恭
-莽
-莱
-莲
-莫
-莉
-荷
-获
-晋
-恶
-莹
-莺
-真
-框
-梆
-桂
-桔
-栖
-档
-桐
-株
-桥
-桦
-栓
-桃
-格
-桩
-校
-核
-样
-根
-索
-哥
-速
-逗
-栗
-贾
-酌
-配
-翅
-辱
-唇
-夏
-砸
-砰
-砾
-础
-破
-原
-套
-逐
-烈
-殊
-殉
-顾
-轿
-较
-顿
-毙
-致
-柴
-桌
-虑
-监
-紧
-党
-逞
-晒
-眠
-晓
-哮
-唠
-鸭
-晃
-哺
-晌
-剔
-晕
-蚌
-畔
-蚣
-蚊
-蚪
-蚓
-哨
-哩
-圃
-哭
-哦
-恩
-鸯
-唤
-唁
-哼
-唧
-啊
-唉
-唆
-罢
-峭
-峨
-峰
-圆
-峻
-贼
-贿
-赂
-赃
-钱
-钳
-钻
-钾
-铁
-铃
-铅
-缺
-氧
-氨
-特
-牺
-造
-乘
-敌
-秤
-租
-积
-秧
-秩
-称
-秘
-透
-笔
-笑
-笋
-债
-借
-值
-倚
-俺
-倾
-倒
-倘
-俱
-倡
-候
-赁
-俯
-倍
-倦
-健
-臭
-射
-躬
-息
-倔
-徒
-徐
-殷
-舰
-舱
-般
-航
-途
-拿
-耸
-爹
-舀
-爱
-豺
-豹
-颁
-颂
-翁
-胰
-脆
-脂
-胸
-胳
-脏
-脐
-胶
-脑
-脓
-逛
-狸
-狼
-卿
-逢
-鸵
-留
-鸳
-皱
-饿
-馁
-凌
-凄
-恋
-桨
-浆
-衰
-衷
-高
-郭
-席
-准
-座
-症
-病
-疾
-斋
-疹
-疼
-疲
-脊
-效
-离
-紊
-唐
-瓷
-资
-凉
-站
-剖
-竞
-部
-旁
-旅
-畜
-阅
-羞
-羔
-瓶
-拳
-粉
-料
-益
-兼
-烤
-烘
-烦
-烧
-烛
-烟
-烙
-递
-涛
-浙
-涝
-浦
-酒
-涉
-消
-涡
-浩
-海
-涂
-浴
-浮
-涣
-涤
-流
-润
-涧
-涕
-浪
-浸
-涨
-烫
-涩
-涌
-悖
-悟
-悄
-悍
-悔
-悯
-悦
-害
-宽
-家
-宵
-宴
-宾
-窍
-窄
-容
-宰
-案
-请
-朗
-诸
-诺
-读
-扇
-诽
-袜
-袖
-袍
-被
-祥
-课
-冥
-谁
-调
-冤
-谅
-谆
-谈
-谊
-剥
-恳
-展
-剧
-屑
-弱
-陵
-祟
-陶
-陷
-陪
-娱
-娟
-恕
-娥
-娘
-通
-能
-难
-预
-桑
-绢
-绣
-验
-继
-骏
-球
-琐
-理
-琉
-琅
-捧
-堵
-措
-描
-域
-捺
-掩
-捷
-排
-焉
-掉
-捶
-赦
-堆
-推
-埠
-掀
-授
-捻
-教
-掏
-掐
-掠
-掂
-培
-接
-掷
-控
-探
-据
-掘
-掺
-职
-基
-聆
-勘
-聊
-娶
-著
-菱
-勒
-黄
-菲
-萌
-萝
-菌
-萎
-菜
-萄
-菊
-菩
-萍
-菠
-萤
-营
-乾
-萧
-萨
-菇
-械
-彬
-梦
-婪
-梗
-梧
-梢
-梅
-检
-梳
-梯
-桶
-梭
-救
-曹
-副
-票
-酝
-酗
-厢
-戚
-硅
-硕
-奢
-盔
-爽
-聋
-袭
-盛
-匾
-雪
-辅
-辆
-颅
-虚
-彪
-雀
-堂
-常
-眶
-匙
-晨
-睁
-眯
-眼
-悬
-野
-啪
-啦
-曼
-晦
-晚
-啄
-啡
-距
-趾
-啃
-跃
-略
-蚯
-蛀
-蛇
-唬
-累
-鄂
-唱
-患
-啰
-唾
-唯
-啤
-啥
-啸
-崖
-崎
-崭
-逻
-崔
-帷
-崩
-崇
-崛
-婴
-圈
-铐
-铛
-铝
-铜
-铭
-铲
-银
-矫
-甜
-秸
-梨
-犁
-秽
-移
-笨
-笼
-笛
-笙
-符
-第
-敏
-做
-袋
-悠
-偿
-偶
-偎
-偷
-您
-售
-停
-偏
-躯
-兜
-假
-衅
-徘
-徙
-得
-衔
-盘
-舶
-船
-舵
-斜
-盒
-鸽
-敛
-悉
-欲
-彩
-领
-脚
-脖
-脯
-豚
-脸
-脱
-象
-够
-逸
-猜
-猪
-猎
-猫
-凰
-猖
-猛
-祭
-馅
-馆
-凑
-减
-毫
-烹
-庶
-麻
-庵
-痊
-痒
-痕
-廊
-康
-庸
-鹿
-盗
-章
-竟
-商
-族
-旋
-望
-率
-阎
-阐
-着
-羚
-盖
-眷
-粘
-粗
-粒
-断
-剪
-兽
-焊
-焕
-清
-添
-鸿
-淋
-涯
-淹
-渠
-渐
-淑
-淌
-混
-淮
-淆
-渊
-淫
-渔
-淘
-淳
-液
-淤
-淡
-淀
-深
-涮
-涵
-婆
-梁
-渗
-情
-惜
-惭
-悼
-惧
-惕
-惟
-惊
-惦
-悴
-惋
-惨
-惯
-寇
-寅
-寄
-寂
-宿
-窒
-窑
-密
-谋
-谍
-谎
-谐
-袱
-祷
-祸
-谓
-谚
-谜
-逮
-敢
-尉
-屠
-弹
-隋
-堕
-随
-蛋
-隅
-隆
-隐
-婚
-婶
-婉
-颇
-颈
-绩
-绪
-续
-骑
-绰
-绳
-维
-绵
-绷
-绸
-综
-绽
-绿
-缀
-巢
-琴
-琳
-琢
-琼
-斑
-替
-揍
-款
-堪
-塔
-搭
-堰
-揩
-越
-趁
-趋
-超
-揽
-堤
-提
-博
-揭
-喜
-彭
-揣
-插
-揪
-搜
-煮
-援
-搀
-裁
-搁
-搓
-搂
-搅
-壹
-握
-搔
-揉
-斯
-期
-欺
-联
-葫
-散
-惹
-葬
-募
-葛
-董
-葡
-敬
-葱
-蒋
-蒂
-落
-韩
-朝
-辜
-葵
-棒
-棱
-棋
-椰
-植
-森
-焚
-椅
-椒
-棵
-棍
-椎
-棉
-棚
-棕
-棺
-榔
-椭
-惠
-惑
-逼
-粟
-棘
-酣
-酥
-厨
-厦
-硬
-硝
-确
-硫
-雁
-殖
-裂
-雄
-颊
-雳
-暂
-雅
-翘
-辈
-悲
-紫
-凿
-辉
-敞
-棠
-赏
-掌
-晴
-睐
-暑
-最
-晰
-量
-鼎
-喷
-喳
-晶
-喇
-遇
-喊
-遏
-晾
-景
-畴
-践
-跋
-跌
-跑
-跛
-遗
-蛙
-蛛
-蜓
-蜒
-蛤
-喝
-鹃
-喂
-喘
-喉
-喻
-啼
-喧
-嵌
-幅
-帽
-赋
-赌
-赎
-赐
-赔
-黑
-铸
-铺
-链
-销
-锁
-锄
-锅
-锈
-锋
-锌
-锐
-甥
-掰
-短
-智
-氮
-毯
-氯
-鹅
-剩
-稍
-程
-稀
-税
-筐
-等
-筑
-策
-筛
-筒
-筏
-答
-筋
-筝
-傲
-傅
-牌
-堡
-集
-焦
-傍
-储
-皓
-皖
-粤
-奥
-街
-惩
-御
-循
-艇
-舒
-逾
-番
-释
-禽
-腊
-脾
-腋
-腔
-腕
-鲁
-猩
-猬
-猾
-猴
-惫
-然
-馈
-馋
-装
-蛮
-就
-敦
-斌
-痘
-痢
-痪
-痛
-童
-竣
-阔
-善
-翔
-羡
-普
-粪
-尊
-奠
-道
-遂
-曾
-焰
-港
-滞
-湖
-湘
-渣
-渤
-渺
-湿
-温
-渴
-溃
-溅
-滑
-湃
-渝
-湾
-渡
-游
-滋
-渲
-溉
-愤
-慌
-惰
-愕
-愣
-惶
-愧
-愉
-慨
-割
-寒
-富
-寓
-窜
-窝
-窖
-窗
-窘
-遍
-雇
-裕
-裤
-裙
-禅
-禄
-谢
-谣
-谤
-谦
-犀
-属
-屡
-强
-粥
-疏
-隔
-隙
-隘
-媒
-絮
-嫂
-媚
-婿
-登
-缅
-缆
-缉
-缎
-缓
-缔
-缕
-骗
-编
-骚
-缘
-瑟
-鹉
-瑞
-瑰
-瑙
-魂
-肆
-摄
-摸
-填
-搏
-塌
-鼓
-摆
-携
-搬
-摇
-搞
-塘
-摊
-聘
-斟
-蒜
-勤
-靴
-靶
-鹊
-蓝
-墓
-幕
-蓬
-蓄
-蒲
-蓉
-蒙
-蒸
-献
-椿
-禁
-楚
-楷
-榄
-想
-槐
-榆
-楼
-概
-赖
-酪
-酬
-感
-碍
-碘
-碑
-碎
-碰
-碗
-碌
-尴
-雷
-零
-雾
-雹
-辐
-辑
-输
-督
-频
-龄
-鉴
-睛
-睹
-睦
-瞄
-睫
-睡
-睬
-嗜
-鄙
-嗦
-愚
-暖
-盟
-歇
-暗
-暇
-照
-畸
-跨
-跷
-跳
-跺
-跪
-路
-跤
-跟
-遣
-蜈
-蜗
-蛾
-蜂
-蜕
-嗅
-嗡
-嗓
-署
-置
-罪
-罩
-蜀
-幌
-错
-锚
-锡
-锣
-锤
-锥
-锦
-键
-锯
-锰
-矮
-辞
-稚
-稠
-颓
-愁
-筹
-签
-简
-筷
-毁
-舅
-鼠
-催
-傻
-像
-躲
-魁
-衙
-微
-愈
-遥
-腻
-腰
-腥
-腮
-腹
-腺
-鹏
-腾
-腿
-鲍
-猿
-颖
-触
-解
-煞
-雏
-馍
-馏
-酱
-禀
-痹
-廓
-痴
-痰
-廉
-靖
-新
-韵
-意
-誊
-粮
-数
-煎
-塑
-慈
-煤
-煌
-满
-漠
-滇
-源
-滤
-滥
-滔
-溪
-溜
-漓
-滚
-溢
-溯
-滨
-溶
-溺
-粱
-滩
-慎
-誉
-塞
-寞
-窥
-窟
-寝
-谨
-褂
-裸
-福
-谬
-群
-殿
-辟
-障
-媳
-嫉
-嫌
-嫁
-叠
-缚
-缝
-缠
-缤
-剿
-静
-碧
-璃
-赘
-熬
-墙
-墟
-嘉
-摧
-赫
-截
-誓
-境
-摘
-摔
-撇
-聚
-慕
-暮
-摹
-蔓
-蔑
-蔡
-蔗
-蔽
-蔼
-熙
-蔚
-兢
-模
-槛
-榴
-榜
-榨
-榕
-歌
-遭
-酵
-酷
-酿
-酸
-碟
-碱
-碳
-磁
-愿
-需
-辖
-辗
-雌
-裳
-颗
-瞅
-墅
-嗽
-踊
-蜻
-蜡
-蝇
-蜘
-蝉
-嘛
-嘀
-赚
-锹
-锻
-镀
-舞
-舔
-稳
-熏
-箕
-算
-箩
-管
-箫
-舆
-僚
-僧
-鼻
-魄
-魅
-貌
-膜
-膊
-膀
-鲜
-疑
-孵
-馒
-裹
-敲
-豪
-膏
-遮
-腐
-瘩
-瘟
-瘦
-辣
-彰
-竭
-端
-旗
-精
-粹
-歉
-弊
-熄
-熔
-煽
-潇
-漆
-漱
-漂
-漫
-滴
-漾
-演
-漏
-慢
-慷
-寨
-赛
-寡
-察
-蜜
-寥
-谭
-肇
-褐
-褪
-谱
-隧
-嫩
-翠
-熊
-凳
-骡
-缩
-慧
-撵
-撕
-撒
-撩
-趣
-趟
-撑
-撮
-撬
-播
-擒
-墩
-撞
-撤
-增
-撰
-聪
-鞋
-鞍
-蕉
-蕊
-蔬
-蕴
-横
-槽
-樱
-橡
-樟
-橄
-敷
-豌
-飘
-醋
-醇
-醉
-磕
-磊
-磅
-碾
-震
-霄
-霉
-瞒
-题
-暴
-瞎
-嘻
-嘶
-嘲
-嘹
-影
-踢
-踏
-踩
-踪
-蝶
-蝴
-蝠
-蝎
-蝌
-蝗
-蝙
-嘿
-嘱
-幢
-墨
-镇
-镐
-镑
-靠
-稽
-稻
-黎
-稿
-稼
-箱
-篓
-箭
-篇
-僵
-躺
-僻
-德
-艘
-膝
-膛
-鲤
-鲫
-熟
-摩
-褒
-瘪
-瘤
-瘫
-凛
-颜
-毅
-糊
-遵
-憋
-潜
-澎
-潮
-潭
-鲨
-澳
-潘
-澈
-澜
-澄
-懂
-憔
-懊
-憎
-额
-翩
-褥
-谴
-鹤
-憨
-慰
-劈
-履
-豫
-缭
-撼
-擂
-操
-擅
-燕
-蕾
-薯
-薛
-薇
-擎
-薪
-薄
-颠
-翰
-噩
-橱
-橙
-橘
-整
-融
-瓢
-醒
-霍
-霎
-辙
-冀
-餐
-嘴
-踱
-蹄
-蹂
-蟆
-螃
-器
-噪
-鹦
-赠
-默
-黔
-镜
-赞
-穆
-篮
-篡
-篷
-篱
-儒
-邀
-衡
-膨
-雕
-鲸
-磨
-瘾
-瘸
-凝
-辨
-辩
-糙
-糖
-糕
-燃
-濒
-澡
-激
-懒
-憾
-懈
-窿
-壁
-避
-缰
-缴
-戴
-擦
-藉
-鞠
-藏
-藐
-檬
-檐
-檀
-礁
-磷
-霜
-霞
-瞭
-瞧
-瞬
-瞳
-瞩
-瞪
-曙
-蹋
-蹈
-螺
-蟋
-蟀
-嚎
-赡
-穗
-魏
-簧
-簇
-繁
-徽
-爵
-朦
-臊
-鳄
-癌
-辫
-赢
-糟
-糠
-燥
-懦
-豁
-臀
-臂
-翼
-骤
-藕
-鞭
-藤
-覆
-瞻
-蹦
-嚣
-镰
-翻
-鳍
-鹰
-瀑
-襟
-璧
-戳
-孽
-警
-蘑
-藻
-攀
-曝
-蹲
-蹭
-蹬
-巅
-簸
-簿
-蟹
-颤
-靡
-癣
-瓣
-羹
-鳖
-爆
-疆
-鬓
-壤
-馨
-耀
-躁
-蠕
-嚼
-嚷
-巍
-籍
-鳞
-魔
-糯
-灌
-譬
-蠢
-霸
-露
-霹
-躏
-黯
-髓
-赣
-囊
-镶
-瓤
-罐
-矗
-乂
-乜
-兀
-弋
-孑
-孓
-幺
-亓
-韦
-廿
-丏
-卅
-仄
-厄
-仃
-仉
-仂
-兮
-刈
-爻
-卞
-闩
-讣
-尹
-夬
-爿
-毋
-邗
-邛
-艽
-艿
-札
-叵
-匝
-丕
-匜
-劢
-卟
-叱
-叻
-仨
-仕
-仟
-仡
-仫
-仞
-卮
-氐
-犰
-刍
-邝
-邙
-汀
-讦
-讧
-讪
-讫
-尻
-阡
-尕
-弁
-驭
-匡
-耒
-玎
-玑
-邢
-圩
-圬
-圭
-扦
-圪
-圳
-圹
-扪
-圮
-圯
-芊
-芍
-芄
-芨
-芑
-芎
-芗
-亘
-厍
-夼
-戍
-尥
-乩
-旯
-曳
-岌
-屺
-凼
-囡
-钇
-缶
-氘
-氖
-牝
-伎
-伛
-伢
-佤
-仵
-伥
-伧
-伉
-伫
-囟
-汆
-刖
-夙
-旮
-刎
-犷
-犸
-舛
-凫
-邬
-饧
-汕
-汔
-汐
-汲
-汜
-汊
-忖
-忏
-讴
-讵
-祁
-讷
-聿
-艮
-厾
-阱
-阮
-阪
-丞
-妁
-牟
-纡
-纣
-纥
-纨
-玕
-玙
-抟
-抔
-圻
-坂
-坍
-坞
-抃
-抉
-㧐
-芫
-邯
-芸
-芾
-苈
-苣
-芷
-芮
-苋
-芼
-苌
-苁
-芩
-芪
-芡
-芟
-苄
-苎
-苡
-杌
-杓
-杞
-杈
-忑
-孛
-邴
-邳
-矶
-奁
-豕
-忒
-欤
-轫
-迓
-邶
-忐
-卣
-邺
-旰
-呋
-呒
-呓
-呔
-呖
-呃
-旸
-吡
-町
-虬
-呗
-吽
-吣
-吲
-帏
-岐
-岈
-岘
-岑
-岚
-兕
-囵
-囫
-钊
-钋
-钌
-迕
-氙
-氚
-牤
-佞
-邱
-攸
-佚
-佝
-佟
-佗
-伽
-彷
-佘
-佥
-孚
-豸
-坌
-肟
-邸
-奂
-劬
-狄
-狁
-鸠
-邹
-饨
-饩
-饪
-饫
-饬
-亨
-庑
-庋
-疔
-疖
-肓
-闱
-闳
-闵
-羌
-炀
-沣
-沅
-沔
-沤
-沌
-沏
-沚
-汩
-汨
-沂
-汾
-沨
-汴
-汶
-沆
-沩
-泐
-怃
-怄
-忡
-忤
-忾
-怅
-忻
-忪
-怆
-忭
-忸
-诂
-诃
-诅
-诋
-诌
-诏
-诒
-孜
-陇
-陀
-陂
-陉
-妍
-妩
-妪
-妣
-妊
-妗
-妫
-妞
-姒
-妤
-邵
-劭
-刭
-甬
-邰
-纭
-纰
-纴
-纶
-纾
-玮
-玡
-玭
-玠
-玢
-玥
-玦
-盂
-忝
-匦
-坩
-抨
-拤
-坫
-拈
-垆
-抻
-劼
-拃
-拊
-坼
-坻
-㧟
-坨
-坭
-抿
-坳
-耶
-苷
-苯
-苤
-茏
-苫
-苜
-苴
-苒
-苘
-茌
-苻
-苓
-茚
-茆
-茑
-茓
-茔
-茕
-茀
-苕
-枥
-枇
-杪
-杳
-枧
-杵
-枨
-枞
-枋
-杻
-杷
-杼
-矸
-砀
-刳
-奄
-瓯
-殁
-郏
-轭
-郅
-鸢
-盱
-昊
-昙
-杲
-昃
-咂
-呸
-昕
-昀
-旻
-昉
-炅
-咔
-畀
-虮
-咀
-呷
-黾
-呱
-呤
-咚
-咆
-咛
-呶
-呣
-呦
-咝
-岢
-岿
-岬
-岫
-帙
-岣
-峁
-刿
-迥
-岷
-剀
-帔
-峄
-沓
-囹
-罔
-钍
-钎
-钏
-钒
-钕
-钗
-邾
-迮
-牦
-竺
-迤
-佶
-佬
-佰
-侑
-侉
-臾
-岱
-侗
-侃
-侏
-侩
-佻
-佾
-侪
-佼
-佯
-侬
-帛
-阜
-侔
-徂
-刽
-郄
-怂
-籴
-瓮
-戗
-肼
-䏝
-肽
-肱
-肫
-剁
-迩
-郇
-狙
-狎
-狍
-狒
-咎
-炙
-枭
-饯
-饴
-冽
-冼
-庖
-疠
-疝
-疡
-兖
-妾
-劾
-炜
-𬉼
-炖
-炘
-炝
-炔
-泔
-沭
-泷
-泸
-泱
-泅
-泗
-泠
-泺
-泖
-泫
-泮
-沱
-泯
-泓
-泾
-怙
-怵
-怦
-怛
-怏
-怍
-㤘
-怩
-怫
-怿
-宕
-穹
-宓
-诓
-诔
-诖
-诘
-戾
-诙
-戽
-郓
-衩
-祆
-祎
-祉
-祇
-诛
-诜
-诟
-诠
-诣
-诤
-诧
-诨
-诩
-戕
-孢
-亟
-陔
-妲
-妯
-姗
-帑
-弩
-孥
-驽
-虱
-迦
-迨
-绀
-绁
-绂
-驷
-驸
-绉
-绌
-驿
-骀
-甾
-珏
-珐
-珂
-珑
-玳
-珀
-顸
-珉
-珈
-拮
-垭
-挝
-垣
-挞
-垤
-赳
-贲
-垱
-垌
-郝
-垧
-垓
-挦
-垠
-茜
-荚
-荑
-贳
-荜
-莒
-茼
-茴
-茱
-莛
-荞
-茯
-荏
-荇
-荃
-荟
-荀
-茗
-荠
-茭
-茨
-垩
-荥
-荦
-荨
-荩
-剋
-荪
-茹
-荬
-荮
-柰
-栉
-柯
-柘
-栊
-柩
-枰
-栌
-柙
-枵
-柚
-枳
-柞
-柝
-栀
-柢
-栎
-枸
-柈
-柁
-枷
-柽
-剌
-酊
-郦
-甭
-砗
-砘
-砒
-斫
-砭
-砜
-奎
-耷
-虺
-殂
-殇
-殄
-殆
-轱
-轲
-轳
-轶
-轸
-虿
-毖
-觇
-尜
-哐
-眄
-眍
-𠳐
-郢
-眇
-眊
-眈
-禺
-哂
-咴
-曷
-昴
-昱
-昵
-咦
-哓
-哔
-畎
-毗
-呲
-胄
-畋
-畈
-虼
-虻
-盅
-咣
-哕
-剐
-郧
-咻
-囿
-咿
-哌
-哙
-哚
-咯
-咩
-咤
-哝
-哏
-哞
-峙
-峣
-罘
-帧
-峒
-峤
-峋
-峥
-贶
-钚
-钛
-钡
-钣
-钤
-钨
-钫
-钯
-氡
-氟
-牯
-郜
-秕
-秭
-竽
-笈
-笃
-俦
-俨
-俅
-俪
-叟
-垡
-牮
-俣
-俚
-皈
-俑
-俟
-逅
-徇
-徉
-舢
-俞
-郗
-俎
-郤
-爰
-郛
-瓴
-胨
-胪
-胛
-胂
-胙
-胍
-胗
-胝
-朐
-胫
-鸨
-匍
-狨
-狯
-飑
-狩
-狲
-訇
-逄
-昝
-饷
-饸
-饹
-胤
-孪
-娈
-弈
-奕
-庥
-疬
-疣
-疥
-疭
-庠
-竑
-彦
-飒
-闼
-闾
-闿
-阂
-羑
-迸
-籼
-酋
-炳
-炻
-炽
-炯
-烀
-炷
-烃
-洱
-洹
-洧
-洌
-浃
-洇
-洄
-洙
-涎
-洎
-洫
-浍
-洮
-洵
-浒
-浔
-浕
-洳
-恸
-恓
-恹
-恫
-恺
-恻
-恂
-恪
-恽
-宥
-扃
-衲
-衽
-衿
-袂
-祛
-祜
-祓
-祚
-诮
-祗
-祢
-诰
-诳
-鸩
-昶
-郡
-咫
-弭
-牁
-胥
-陛
-陟
-娅
-姮
-娆
-姝
-姣
-姘
-姹
-怼
-羿
-炱
-矜
-绔
-骁
-骅
-绗
-绛
-骈
-耖
-挈
-珥
-珙
-顼
-珰
-珩
-珧
-珣
-珞
-琤
-珲
-敖
-恚
-埔
-埕
-埘
-埙
-埚
-挹
-耆
-耄
-埒
-捋
-贽
-垸
-捃
-盍
-荸
-莆
-莳
-莴
-莪
-莠
-莓
-莜
-莅
-荼
-莩
-荽
-莸
-荻
-莘
-莎
-莞
-莨
-渇
-鸪
-莼
-栲
-栳
-郴
-桓
-桡
-桎
-桢
-桤
-梃
-栝
-桕
-桁
-桧
-桅
-栟
-桉
-栩
-逑
-逋
-彧
-鬲
-豇
-酐
-逦
-厝
-孬
-砝
-砹
-砺
-砧
-砷
-砟
-砼
-砥
-砣
-剞
-砻
-轼
-轾
-辂
-鸫
-趸
-龀
-鸬
-虔
-逍
-眬
-唛
-晟
-眩
-眙
-哧
-哽
-唔
-晁
-晏
-鸮
-趵
-趿
-畛
-蚨
-蚜
-蚍
-蚋
-蚬
-蚝
-蚧
-唢
-圄
-唣
-唏
-盎
-唑
-崂
-崃
-罡
-罟
-峪
-觊
-赅
-钰
-钲
-钴
-钵
-钹
-钺
-钽
-钼
-钿
-铀
-铂
-铄
-铆
-铈
-铉
-铊
-铋
-铌
-铍
-䥽
-铎
-氩
-氤
-氦
-毪
-舐
-秣
-秫
-盉
-笄
-笕
-笊
-笏
-笆
-俸
-倩
-俵
-偌
-俳
-俶
-倬
-倏
-恁
-倭
-倪
-俾
-倜
-隼
-隽
-倌
-倥
-臬
-皋
-郫
-倨
-衄
-颀
-徕
-舫
-釜
-奚
-衾
-胯
-胱
-胴
-胭
-脍
-胼
-朕
-脒
-胺
-鸱
-玺
-鸲
-狷
-猁
-狳
-猃
-狺
-逖
-桀
-袅
-饽
-凇
-栾
-挛
-亳
-疳
-疴
-疸
-疽
-痈
-疱
-痂
-痉
-衮
-凋
-颃
-恣
-旆
-旄
-旃
-阃
-阄
-訚
-阆
-恙
-粑
-朔
-郸
-烜
-烨
-烩
-烊
-剡
-郯
-烬
-涑
-浯
-涞
-涟
-娑
-涅
-涠
-浞
-涓
-浥
-涔
-浜
-浠
-浣
-浚
-悚
-悭
-悝
-悒
-悌
-悛
-宸
-窈
-剜
-诹
-冢
-诼
-袒
-袢
-祯
-诿
-谀
-谂
-谄
-谇
-屐
-屙
-陬
-勐
-奘
-牂
-蚩
-陲
-姬
-娠
-娌
-娉
-娲
-娩
-娴
-娣
-娓
-婀
-畚
-逡
-绠
-骊
-绡
-骋
-绥
-绦
-绨
-骎
-邕
-鸶
-彗
-耜
-焘
-舂
-琏
-琇
-麸
-揶
-埴
-埯
-捯
-掳
-掴
-埸
-埵
-赧
-埤
-捭
-逵
-埝
-堋
-堍
-掬
-鸷
-掖
-捽
-掊
-堉
-掸
-捩
-掮
-悫
-埭
-埽
-掇
-掼
-聃
-菁
-萁
-菘
-堇
-萘
-萋
-菽
-菖
-萜
-萸
-萑
-棻
-菔
-菟
-萏
-萃
-菏
-菹
-菪
-菅
-菀
-萦
-菰
-菡
-梵
-梿
-梏
-觋
-桴
-桷
-梓
-棁
-桫
-棂
-啬
-郾
-匮
-敕
-豉
-鄄
-酞
-酚
-戛
-硎
-硭
-硒
-硖
-硗
-硐
-硇
-硌
-鸸
-瓠
-匏
-厩
-龚
-殒
-殓
-殍
-赉
-雩
-辄
-堑
-眭
-眦
-啧
-晡
-晤
-眺
-眵
-眸
-圊
-喏
-喵
-啉
-勖
-晞
-唵
-晗
-冕
-啭
-畦
-趺
-啮
-跄
-蚶
-蛄
-蛎
-蛆
-蚰
-蛊
-圉
-蚱
-蛉
-蛏
-蚴
-啁
-啕
-唿
-啐
-唼
-唷
-啖
-啵
-啶
-啷
-唳
-唰
-啜
-帻
-崚
-崦
-帼
-崮
-崤
-崆
-赇
-赈
-赊
-铑
-铒
-铗
-铙
-铟
-铠
-铡
-铢
-铣
-铤
-铧
-铨
-铩
-铪
-铫
-铬
-铮
-铯
-铰
-铱
-铳
-铵
-铷
-氪
-牾
-鸹
-秾
-逶
-笺
-筇
-笸
-笪
-笮
-笠
-笥
-笤
-笳
-笾
-笞
-偾
-偃
-偕
-偈
-傀
-偬
-偻
-皑
-皎
-鸻
-徜
-舸
-舻
-舴
-舷
-龛
-翎
-脬
-脘
-脲
-匐
-猗
-猡
-猞
-猝
-斛
-猕
-馗
-馃
-馄
-鸾
-孰
-庹
-庾
-痔
-痍
-疵
-翊
-旌
-旎
-袤
-阇
-阈
-阉
-阊
-阋
-阍
-阏
-羟
-粝
-粕
-敝
-焐
-烯
-焓
-烽
-焖
-烷
-焗
-渍
-渚
-淇
-淅
-淞
-渎
-涿
-淖
-挲
-淠
-涸
-渑
-淦
-淝
-淬
-涪
-淙
-涫
-渌
-淄
-惬
-悻
-悱
-惝
-惘
-悸
-惆
-惚
-惇
-惮
-窕
-谌
-谏
-扈
-皲
-谑
-裆
-袷
-裉
-谒
-谔
-谕
-谖
-谗
-谙
-谛
-谝
-逯
-郿
-隈
-粜
-隍
-隗
-婧
-婊
-婕
-娼
-婢
-婵
-胬
-袈
-翌
-恿
-欸
-绫
-骐
-绮
-绯
-绱
-骒
-绲
-骓
-绶
-绺
-绻
-绾
-骖
-缁
-耠
-琫
-琵
-琶
-琪
-瑛
-琦
-琥
-琨
-靓
-琰
-琮
-琯
-琬
-琛
-琚
-辇
-鼋
-揳
-堞
-搽
-揸
-揠
-堙
-趄
-揖
-颉
-塄
-揿
-耋
-揄
-蛩
-蛰
-塆
-摒
-揆
-掾
-聒
-葑
-葚
-靰
-靸
-葳
-葺
-葸
-萼
-葆
-葩
-葶
-蒌
-萱
-戟
-葭
-楮
-棼
-椟
-棹
-椤
-棰
-赍
-椋
-椁
-椪
-棣
-椐
-鹁
-覃
-酤
-酢
-酡
-鹂
-厥
-殚
-殛
-雯
-雱
-辊
-辋
-椠
-辍
-辎
-斐
-睄
-睑
-睇
-睃
-戢
-喋
-嗒
-喃
-喱
-喹
-晷
-喈
-跖
-跗
-跞
-跚
-跎
-跏
-跆
-蛱
-蛲
-蛭
-蛳
-蛐
-蛔
-蛞
-蛴
-蛟
-蛘
-喁
-喟
-啾
-嗖
-喑
-嗟
-喽
-嗞
-喀
-喔
-喙
-嵘
-嵖
-崴
-遄
-詈
-嵎
-崽
-嵬
-嵛
-嵯
-嵝
-嵫
-幄
-嵋
-赕
-铻
-铼
-铿
-锃
-锂
-锆
-锇
-锉
-锏
-锑
-锒
-锔
-锕
-掣
-矬
-氰
-毳
-毽
-犊
-犄
-犋
-鹄
-犍
-嵇
-黍
-稃
-稂
-筚
-筵
-筌
-傣
-傈
-舄
-牍
-傥
-傧
-遑
-傩
-遁
-徨
-媭
-畲
-弑
-颌
-翕
-釉
-鹆
-舜
-貂
-腈
-腌
-腓
-腆
-腴
-腑
-腚
-腱
-鱿
-鲀
-鲂
-颍
-猢
-猹
-猥
-飓
-觞
-觚
-猱
-颎
-飧
-馇
-馊
-亵
-脔
-裒
-痣
-痨
-痦
-痞
-痤
-痫
-痧
-赓
-竦
-瓿
-啻
-颏
-鹇
-阑
-阒
-阕
-粞
-遒
-孳
-焯
-焜
-焙
-焱
-鹈
-湛
-渫
-湮
-湎
-湜
-渭
-湍
-湫
-溲
-湟
-溆
-湲
-湔
-湉
-渥
-湄
-滁
-愠
-惺
-愦
-惴
-愀
-愎
-愔
-喾
-寐
-谟
-扉
-裢
-裎
-裥
-祾
-祺
-谠
-幂
-谡
-谥
-谧
-遐
-孱
-弼
-巽
-骘
-媪
-媛
-婷
-巯
-翚
-皴
-婺
-骛
-缂
-缃
-缄
-彘
-缇
-缈
-缌
-缑
-缒
-缗
-飨
-耢
-瑚
-瑁
-瑜
-瑗
-瑄
-瑕
-遨
-骜
-韫
-髡
-塬
-鄢
-趔
-趑
-摅
-摁
-蜇
-搋
-搪
-搐
-搛
-搠
-摈
-彀
-毂
-搦
-搡
-蓁
-戡
-蓍
-鄞
-靳
-蓐
-蓦
-鹋
-蒽
-蓓
-蓖
-蓊
-蒯
-蓟
-蓑
-蒿
-蒺
-蓠
-蒟
-蒡
-蒹
-蒴
-蒗
-蓥
-颐
-楔
-楠
-楂
-楝
-楫
-楸
-椴
-槌
-楯
-皙
-榈
-槎
-榉
-楦
-楣
-楹
-椽
-裘
-剽
-甄
-酮
-酰
-酯
-酩
-蜃
-碛
-碓
-硼
-碉
-碚
-碇
-碜
-鹌
-辏
-龃
-龅
-訾
-粲
-虞
-睚
-嗪
-韪
-嗷
-嗉
-睨
-睢
-雎
-睥
-嘟
-嗑
-嗫
-嗬
-嗔
-嗝
-戥
-嗄
-煦
-暄
-遢
-暌
-跬
-跶
-跸
-跐
-跣
-跹
-跻
-蛸
-蜊
-蜍
-蜉
-蜣
-畹
-蛹
-嗣
-嗯
-嗥
-嗲
-嗳
-嗌
-嗍
-嗨
-嗐
-嗤
-嗵
-罨
-嵊
-嵩
-嵴
-骰
-锗
-锛
-锜
-锝
-锞
-锟
-锢
-锨
-锩
-锭
-锱
-雉
-氲
-犏
-歃
-稞
-稗
-稔
-筠
-筢
-筮
-筲
-筱
-牒
-煲
-敫
-徭
-愆
-艄
-觎
-毹
-貊
-貅
-貉
-颔
-腠
-腩
-腼
-腭
-腧
-塍
-媵
-詹
-鲅
-鲆
-鲇
-鲈
-稣
-鲋
-鲐
-肄
-鹐
-飕
-觥
-遛
-馐
-鹑
-亶
-瘃
-痱
-痼
-痿
-瘐
-瘁
-瘆
-麂
-裔
-歆
-旒
-雍
-阖
-阗
-阙
-羧
-豢
-粳
-猷
-煳
-煜
-煨
-煅
-煊
-煸
-煺
-滟
-溱
-溘
-漭
-滢
-溥
-溧
-溽
-裟
-溻
-溷
-滗
-滫
-溴
-滏
-滃
-滦
-溏
-滂
-滓
-溟
-滪
-愫
-慑
-慊
-鲎
-骞
-窦
-窠
-窣
-裱
-褚
-裨
-裾
-裰
-禊
-谩
-谪
-媾
-嫫
-媲
-嫒
-嫔
-媸
-缙
-缜
-缛
-辔
-骝
-缟
-缡
-缢
-缣
-骟
-耥
-璈
-瑶
-瑭
-獒
-觏
-慝
-嫠
-韬
-叆
-髦
-摽
-墁
-撂
-摞
-撄
-翥
-踅
-摭
-墉
-墒
-榖
-綦
-蔫
-蔷
-靺
-靼
-鞅
-靿
-甍
-蔸
-蔟
-蔺
-戬
-蕖
-蔻
-蓿
-斡
-鹕
-蓼
-榛
-榧
-榻
-榫
-榭
-槔
-榱
-槁
-槟
-槠
-榷
-僰
-酽
-酶
-酹
-厮
-碡
-碴
-碣
-碲
-磋
-臧
-豨
-殡
-霆
-霁
-辕
-蜚
-裴
-翡
-龇
-龈
-睿
-䁖
-睽
-嘞
-嘈
-嘌
-嘁
-嘎
-暧
-暝
-踌
-踉
-蜞
-蜥
-蜮
-蝈
-蜴
-蜱
-蜩
-蜷
-蜿
-螂
-蜢
-嘘
-嘡
-鹗
-嘣
-嘤
-嘚
-嗾
-嘧
-罴
-罱
-幔
-嶂
-幛
-赙
-罂
-骷
-骶
-鹘
-锲
-锴
-锶
-锷
-锸
-锵
-镁
-镂
-犒
-箐
-箦
-箧
-箍
-箸
-箬
-箅
-箪
-箔
-箜
-箢
-箓
-毓
-僖
-儆
-僳
-僭
-劁
-僮
-魃
-魆
-睾
-艋
-鄱
-膈
-膑
-鲑
-鲔
-鲚
-鲛
-鲟
-獐
-觫
-雒
-夤
-馑
-銮
-塾
-麽
-瘌
-瘊
-瘘
-瘙
-廖
-韶
-旖
-膂
-阚
-鄯
-鲞
-粿
-粼
-粽
-糁
-槊
-鹚
-熘
-熥
-潢
-漕
-滹
-漯
-漶
-潋
-潴
-漪
-漉
-漳
-漩
-澉
-潍
-慵
-搴
-窨
-寤
-綮
-谮
-褡
-褙
-褓
-褛
-褊
-谯
-谰
-谲
-暨
-屣
-鹛
-嫣
-嫱
-嫖
-嫦
-嫚
-嫘
-嫡
-鼐
-翟
-瞀
-鹜
-骠
-缥
-缦
-缧
-缨
-骢
-缪
-缫
-耦
-耧
-瑾
-璜
-璀
-璎
-璁
-璋
-璇
-奭
-髯
-髫
-撷
-撅
-赭
-撸
-鋆
-撙
-撺
-墀
-聩
-觐
-鞑
-蕙
-鞒
-蕈
-蕨
-蕤
-蕞
-蕺
-瞢
-蕃
-蕲
-赜
-槿
-樯
-槭
-樗
-樘
-樊
-槲
-醌
-醅
-靥
-魇
-餍
-磔
-磙
-霈
-辘
-龉
-龊
-觑
-瞌
-瞋
-瞑
-嘭
-噎
-噶
-颙
-暹
-噘
-踔
-踝
-踟
-踒
-踬
-踮
-踯
-踺
-踞
-蝽
-蝾
-蝻
-蝰
-蝮
-螋
-蝓
-蝣
-蝼
-噗
-嘬
-颚
-噍
-噢
-噙
-噜
-噌
-噔
-颛
-幞
-幡
-嶙
-嶝
-骺
-骼
-骸
-镊
-镉
-镌
-镍
-镏
-镒
-镓
-镔
-稷
-箴
-篑
-篁
-篌
-篆
-牖
-儋
-徵
-磐
-虢
-鹞
-膘
-滕
-鲠
-鲡
-鲢
-鲣
-鲥
-鲧
-鲩
-獗
-獠
-觯
-馓
-馔
-麾
-廛
-瘛
-瘼
-瘢
-瘠
-齑
-羯
-羰
-𥻗
-遴
-糌
-糍
-糅
-熜
-熵
-熠
-澍
-澌
-潸
-潦
-潲
-鋈
-潟
-潼
-潺
-憬
-憧
-寮
-窳
-谳
-褴
-褟
-褫
-谵
-熨
-屦
-嬉
-勰
-戮
-蝥
-缬
-缮
-缯
-骣
-畿
-耩
-耨
-耪
-璞
-璟
-靛
-璠
-璘
-聱
-螯
-髻
-髭
-髹
-擀
-熹
-甏
-擞
-縠
-磬
-颞
-蕻
-鞘
-颟
-薤
-薨
-檠
-薏
-薮
-薜
-薅
-樾
-橛
-橇
-樵
-檎
-橹
-樽
-樨
-橼
-墼
-橐
-翮
-醛
-醐
-醍
-醚
-磲
-赝
-飙
-殪
-霖
-霏
-霓
-錾
-辚
-臻
-遽
-氅
-瞟
-瞠
-瞰
-嚄
-嚆
-噤
-暾
-蹀
-踹
-踵
-踽
-蹉
-蹁
-螨
-蟒
-螈
-螅
-螭
-螠
-螟
-噱
-噬
-噫
-噻
-噼
-罹
-圜
-䦃
-镖
-镗
-镘
-镚
-镛
-镝
-镞
-镠
-氇
-氆
-憩
-穑
-篝
-篥
-篦
-篪
-篙
-盥
-劓
-翱
-魉
-魈
-徼
-歙
-膳
-膦
-膙
-鲮
-鲱
-鲲
-鲳
-鲴
-鲵
-鲷
-鲻
-獴
-獭
-獬
-邂
-鹧
-廨
-赟
-瘰
-廪
-瘿
-瘵
-瘴
-癃
-瘳
-斓
-麇
-麈
-嬴
-壅
-羲
-糗
-瞥
-甑
-燎
-燠
-燔
-燧
-濑
-濉
-潞
-澧
-澹
-澥
-澶
-濂
-褰
-寰
-窸
-褶
-禧
-嬖
-犟
-隰
-嬗
-颡
-缱
-缲
-缳
-璨
-璩
-璐
-璪
-螫
-擤
-壕
-觳
-罄
-擢
-薹
-鞡
-鞬
-薷
-薰
-藓
-藁
-檄
-檩
-懋
-醢
-翳
-礅
-磴
-鹩
-龋
-龌
-豳
-壑
-黻
-嚏
-嚅
-蹑
-蹒
-蹊
-蟥
-螬
-螵
-疃
-螳
-蟑
-嚓
-羁
-罽
-罾
-嶷
-黜
-黝
-髁
-髀
-镡
-镢
-镣
-镦
-镧
-镩
-镪
-镫
-罅
-黏
-簌
-篾
-篼
-簖
-簋
-鼢
-黛
-儡
-鹪
-鼾
-皤
-魍
-龠
-繇
-貘
-邈
-貔
-臌
-膻
-臆
-臃
-鲼
-鲽
-鳀
-鳃
-鳅
-鳇
-鳊
-螽
-燮
-鹫
-襄
-糜
-縻
-膺
-癍
-麋
-懑
-濡
-濮
-濞
-濠
-濯
-蹇
-謇
-邃
-襁
-檗
-擘
-孺
-隳
-嬷
-蟊
-鹬
-鍪
-鏊
-鳌
-鬈
-鬃
-瞽
-鞯
-鞨
-鞫
-鞧
-鞣
-藜
-藠
-藩
-醪
-蹙
-礓
-燹
-餮
-瞿
-曛
-颢
-曜
-躇
-蹚
-鹭
-蟛
-蟪
-蟠
-蟮
-鹮
-黠
-黟
-髅
-髂
-镬
-镭
-镯
-馥
-簟
-簪
-鼬
-雠
-艟
-鳎
-鳏
-鳐
-癞
-癔
-癜
-癖
-糨
-蹩
-鎏
-懵
-彝
-邋
-鬏
-攉
-攒
-鞲
-鞴
-藿
-蘧
-蘅
-麓
-醮
-醯
-酃
-霪
-霭
-霨
-黼
-嚯
-蹰
-蹶
-蹽
-蹼
-蹴
-蹾
-蹿
-蠖
-蠓
-蟾
-蠊
-黢
-髋
-髌
-镲
-籀
-籁
-齁
-魑
-艨
-鳓
-鳔
-鳕
-鳗
-鳙
-麒
-鏖
-羸
-㸆
-瀚
-瀣
-瀛
-襦
-谶
-襞
-骥
-缵
-瓒
-攘
-蘩
-蘖
-醴
-霰
-酆
-矍
-曦
-躅
-鼍
-巉
-黩
-黥
-黪
-镳
-镴
-黧
-纂
-璺
-鼯
-臜
-鳜
-鳝
-鳟
-獾
-孀
-骧
-瓘
-鼙
-醺
-礴
-颦
-曩
-鳢
-癫
-麝
-夔
-爝
-灏
-禳
-鐾
-羼
-蠡
-耱
-懿
-蘸
-鹳
-霾
-氍
-饕
-躐
-髑
-镵
-穰
-饔
-鬻
-鬟
-趱
-攫
-攥
-颧
-躜
-鼹
-癯
-麟
-蠲
-蠹
-躞
-衢
-鑫
-灞
-襻
-纛
-鬣
-攮
-囔
-馕
-戆
-爨
-齉
-亍
-尢
-彳
-卬
-殳
-𠙶
-毌
-邘
-戋
-圢
-氕
-伋
-仝
-冮
-氿
-汈
-氾
-忉
-宄
-讱
-扞
-圲
-圫
-芏
-芃
-朳
-朸
-𨙸
-邨
-吒
-吖
-屼
-屾
-辿
-钆
-仳
-伣
-伈
-癿
-甪
-邠
-犴
-冱
-邡
-闫
-汋
-䜣
-讻
-孖
-纩
-玒
-玓
-玘
-玚
-刬
-坜
-坉
-扽
-坋
-扺
-㧑
-毐
-芰
-芣
-苊
-苉
-芘
-芴
-芠
-芤
-杕
-杙
-杄
-杧
-杩
-尪
-尨
-轪
-坒
-芈
-旴
-旵
-呙
-㕮
-岍
-岠
-岜
-呇
-冏
-觃
-岙
-伾
-㑇
-伭
-佖
-伲
-佁
-飏
-狃
-闶
-汧
-汫
-𣲘
-𣲗
-沄
-沘
-汭
-㳇
-沇
-忮
-忳
-忺
-祃
-诇
-邲
-诎
-诐
-屃
-岊
-阽
-䢺
-阼
-妧
-妘
-𨚕
-纮
-驲
-纻
-纼
-玤
-玞
-玱
-玟
-邽
-邿
-坥
-坰
-坬
-坽
-弆
-耵
-䢼
-𦭜
-茋
-苧
-苾
-苠
-枅
-㭎
-枘
-枍
-矼
-矻
-匼
-旿
-昇
-昄
-昒
-昈
-咉
-咇
-咍
-岵
-岽
-岨
-岞
-峂
-㟃
-囷
-钐
-钔
-钖
-牥
-佴
-垈
-侁
-侹
-佸
-佺
-隹
-㑊
-侂
-佽
-侘
-郈
-舠
-郐
-郃
-攽
-肭
-肸
-肷
-狉
-狝
-饳
-忞
-於
-炌
-炆
-泙
-沺
-泂
-泜
-泃
-泇
-怊
-峃
-穸
-祋
-祊
-鸤
-弢
-弨
-陑
-陎
-卺
-乸
-妭
-姈
-迳
-叕
-驵
-䌹
-驺
-绋
-绐
-砉
-耔
-㛃
-玶
-珇
-珅
-珋
-玹
-珌
-玿
-韨
-垚
-垯
-垙
-垲
-埏
-垍
-耇
-垎
-垴
-垟
-垞
-挓
-垵
-垏
-拶
-荖
-荁
-荙
-荛
-茈
-茽
-荄
-茺
-荓
-茳
-𦰡
-茛
-荭
-㭕
-柷
-柃
-柊
-枹
-栐
-柖
-郚
-剅
-䴓
-迺
-厖
-砆
-砑
-砄
-耏
-奓
-䶮
-轵
-轷
-轹
-轺
-昺
-昽
-盷
-咡
-咺
-昳
-昣
-哒
-昤
-昫
-昡
-咥
-昪
-虷
-虸
-哃
-峘
-耑
-峛
-峗
-峧
-帡
-钘
-钜
-钪
-钬
-钭
-矧
-秬
-俫
-舁
-俜
-俙
-俍
-垕
-衎
-舣
-弇
-侴
-鸧
-䏡
-胠
-𦙶
-胈
-胩
-胣
-朏
-飐
-訄
-饻
-庤
-疢
-炣
-炟
-㶲
-洭
-洘
-洓
-洿
-㳚
-泚
-浈
-浉
-洸
-洑
-洢
-洈
-洚
-洺
-洨
-浐
-㳘
-洴
-洣
-恔
-宬
-窀
-扂
-袆
-祏
-祐
-祕
-叚
-陧
-陞
-娀
-姞
-姱
-姤
-姶
-姽
-枲
-绖
-骃
-彖
-骉
-恝
-珪
-珛
-珹
-琊
-玼
-珖
-珽
-珦
-珫
-珒
-珢
-珕
-珝
-埗
-垾
-垺
-埆
-垿
-埌
-埇
-莰
-茝
-鄀
-莶
-莝
-䓖
-莙
-栻
-桠
-桄
-梠
-栴
-梴
-栒
-酎
-酏
-砵
-砠
-砫
-砬
-硁
-恧
-翃
-郪
-𨐈
-辀
-辁
-剕
-赀
-哢
-晅
-晊
-唝
-哳
-哱
-冔
-晔
-晐
-晖
-畖
-蚄
-蚆
-帱
-崁
-峿
-崄
-帨
-崀
-赆
-钷
-眚
-甡
-笫
-倻
-倴
-脩
-倮
-倕
-倞
-倓
-倧
-衃
-虒
-舭
-舯
-舥
-瓞
-鬯
-鸰
-脎
-朓
-胲
-虓
-鱽
-狴
-峱
-狻
-眢
-勍
-痄
-疰
-痃
-竘
-羖
-羓
-桊
-敉
-烠
-烔
-烶
-烻
-涍
-浡
-浭
-浬
-涄
-涢
-涐
-浰
-浟
-浛
-浼
-浲
-涘
-悈
-悃
-悢
-宧
-窅
-窊
-窎
-扅
-扆
-袪
-袗
-袯
-祧
-隺
-堲
-疍
-𨺙
-陴
-烝
-砮
-㛚
-哿
-翀
-翂
-剟
-绤
-骍
-䂮
-琎
-珸
-珵
-琄
-琈
-琀
-珺
-掭
-堎
-堐
-埼
-掎
-埫
-堌
-晢
-掞
-埪
-壸
-㙍
-聍
-菝
-萚
-菥
-莿
-䓫
-勚
-䓬
-萆
-菂
-菍
-菼
-萣
-䓨
-菉
-䓛
-梼
-梽
-桲
-梾
-桯
-梣
-梌
-桹
-敔
-厣
-硔
-硙
-硚
-硊
-硍
-勔
-䴕
-龁
-逴
-唪
-啫
-翈
-㫰
-晙
-畤
-趼
-跂
-蛃
-蚲
-蚺
-啴
-䎃
-崧
-崟
-崞
-崒
-崌
-崡
-铏
-铕
-铖
-铘
-铚
-铞
-铥
-铴
-牻
-牿
-稆
-笱
-笯
-偰
-偡
-鸺
-偭
-偲
-偁
-㿠
-鄅
-偓
-徛
-衒
-舳
-舲
-鸼
-悆
-鄃
-瓻
-䝙
-脶
-脞
-脟
-䏲
-鱾
-猇
-猊
-猄
-觖
-𠅤
-庱
-庼
-庳
-痓
-䴔
-竫
-堃
-阌
-羝
-羕
-焆
-烺
-焌
-淏
-淟
-淜
-淴
-淯
-湴
-涴
-㥄
-惛
-惔
-悰
-惙
-寁
-逭
-袼
-裈
-祲
-谞
-艴
-弸
-弶
-隃
-婞
-娵
-婼
-媖
-婳
-婍
-婌
-婫
-婤
-婘
-婠
-绹
-骕
-絜
-珷
-琲
-琡
-琟
-琔
-琭
-堾
-堼
-揕
-㙘
-堧
-喆
-堨
-塅
-堠
-絷
-𡎚
-葜
-惎
-萳
-葙
-靬
-葴
-蒇
-蒈
-鄚
-蒉
-蓇
-萩
-蒐
-葰
-葎
-鄑
-蒎
-葖
-蒄
-萹
-棤
-棽
-棫
-椓
-椑
-鹀
-椆
-棓
-棬
-棪
-椀
-楗
-甦
-酦
-觌
-奡
-皕
-硪
-欹
-詟
-辌
-棐
-龂
-黹
-牚
-睎
-晫
-晪
-晱
-𧿹
-蛑
-畯
-斝
-喤
-崶
-嵁
-崾
-嵅
-崿
-嵚
-翙
-圌
-圐
-赑
-淼
-赒
-铹
-铽
-𨱇
-锊
-锍
-锎
-锓
-犇
-颋
-稌
-筀
-筘
-筜
-筥
-筅
-傃
-傉
-翛
-傒
-傕
-舾
-畬
-脿
-腘
-䐃
-腙
-腒
-鲃
-猰
-猯
-㺄
-馉
-鄗
-廋
-廆
-鄌
-粢
-遆
-旐
-焞
-欻
-𣸣
-溚
-溁
-湝
-渰
-湓
-㴔
-渟
-溠
-渼
-溇
-湣
-湑
-溞
-愐
-愃
-敩
-甯
-棨
-扊
-裣
-祼
-婻
-媆
-媞
-㛹
-媓
-媂
-媄
-毵
-矞
-缊
-缐
-骙
-瑃
-瑓
-瑅
-瑆
-䴖
-瑖
-瑝
-瑔
-瑀
-𤧛
-瑳
-瑂
-嶅
-瑑
-遘
-髢
-塥
-堽
-赪
-摛
-塝
-搒
-搌
-蒱
-蒨
-蓏
-蔀
-蓢
-蓂
-蒻
-蓣
-椹
-楪
-榃
-榅
-楒
-楞
-楩
-榇
-椸
-楙
-歅
-碃
-碏
-碈
-䃅
-硿
-鄠
-辒
-龆
-觜
-䣘
-暕
-鹍
-㬊
-暅
-跱
-蜐
-蜎
-嵲
-赗
-骱
-锖
-锘
-锳
-锧
-锪
-锫
-锬
-稑
-稙
-䅟
-筻
-筼
-筶
-筦
-筤
-傺
-鹎
-僇
-艅
-艉
-谼
-貆
-腽
-腨
-腯
-鲉
-鲊
-鲌
-䲟
-鲏
-雊
-猺
-飔
-觟
-𦝼
-馌
-裛
-廒
-瘀
-瘅
-鄘
-鹒
-鄜
-麀
-鄣
-阘
-煁
-煃
-煴
-煋
-煟
-煓
-滠
-溍
-溹
-滆
-滉
-溦
-溵
-漷
-滧
-滘
-滍
-愭
-慥
-慆
-塱
-裼
-禋
-禔
-禘
-禒
-谫
-鹔
-愍
-嫄
-媱
-戤
-戣
-缞
-耤
-瑧
-瑨
-瑱
-瑷
-瑢
-斠
-摏
-墕
-墈
-墐
-墘
-摴
-銎
-𡐓
-墚
-撖
-靽
-鞁
-蔌
-蔈
-蓰
-蔹
-蔊
-嘏
-榰
-榑
-槚
-𣗋
-槜
-榍
-疐
-酺
-酾
-酲
-酴
-碶
-䃎
-碨
-𥔲
-碹
-碥
-劂
-䴗
-夥
-瞍
-鹖
-㬎
-跽
-蜾
-幖
-嶍
-圙
-𨱏
-锺
-锼
-锽
-锾
-锿
-镃
-镄
-镅
-馝
-鹙
-箨
-箖
-劄
-僬
-僦
-僔
-僎
-槃
-㙦
-鲒
-鲕
-鲖
-鲗
-鲘
-鲙
-𩽾
-夐
-獍
-飗
-凘
-廑
-廙
-瘗
-瘥
-瘕
-鲝
-鄫
-熇
-漹
-漖
-潆
-漤
-潩
-漼
-漴
-㽏
-漈
-漋
-漻
-慬
-窬
-窭
-㮾
-褕
-禛
-禚
-隩
-嫕
-嫭
-嫜
-嫪
-㻬
-麹
-璆
-漦
-叇
-墣
-墦
-墡
-劐
-薁
-蕰
-蔃
-鼒
-槱
-鹝
-磏
-磉
-殣
-慭
-霅
-暵
-暲
-暶
-踦
-踣
-䗖
-蝘
-蝲
-蝤
-噇
-噂
-噀
-罶
-嶲
-嶓
-㠇
-嶟
-嶒
-镆
-镈
-镋
-镎
-镕
-稹
-儇
-皞
-皛
-䴘
-艎
-艏
-鹟
-𩾃
-鲦
-鲪
-鲬
-橥
-觭
-鹠
-鹡
-糇
-糈
-翦
-鹢
-鹣
-熛
-潖
-潵
-㵐
-澂
-澛
-瑬
-潽
-潾
-潏
-憭
-憕
-戭
-褯
-禤
-嫽
-遹
-璥
-璲
-璒
-憙
-擐
-鄹
-薳
-鞔
-黇
-蕗
-薢
-蕹
-橞
-橑
-橦
-醑
-觱
-磡
-𥕢
-磜
-豮
-鹾
-虤
-暿
-曌
-曈
-㬚
-蹅
-踶
-䗛
-螗
-疁
-㠓
-幪
-嶦
-𨱑
-馞
-穄
-篚
-篯
-簉
-鼽
-衠
-盦
-螣
-縢
-鲭
-鲯
-鲰
-鲺
-鲹
-亸
-癀
-瘭
-羱
-糒
-燋
-熻
-燊
-燚
-燏
-濩
-濋
-澪
-澽
-澴
-澭
-澼
-憷
-憺
-懔
-黉
-嬛
-鹨
-翯
-璱
-𤩽
-璬
-璮
-髽
-擿
-薿
-薸
-檑
-櫆
-檞
-醨
-繄
-磹
-磻
-瞫
-瞵
-蹐
-蟏
-㘎
-镤
-镥
-镨
-𨱔
-矰
-穙
-穜
-穟
-簕
-簃
-簏
-儦
-魋
-斶
-艚
-谿
-䲠
-鲾
-鲿
-鳁
-鳂
-鳈
-鳉
-獯
-䗪
-馘
-襕
-襚
-螱
-甓
-嬬
-嬥
-𦈡
-瓀
-釐
-鬶
-爇
-鞳
-鞮
-藟
-藦
-藨
-鹲
-檫
-黡
-礞
-礌
-𥖨
-蹢
-蹜
-蟫
-䗴
-嚚
-髃
-镮
-镱
-酂
-馧
-簠
-簝
-簰
-鼫
-鼩
-皦
-臑
-䲢
-鳑
-鳒
-鹱
-鹯
-癗
-𦒍
-旞
-翷
-冁
-䎖
-瀔
-瀍
-瀌
-襜
-䴙
-嚭
-㰀
-鬷
-醭
-蹯
-蠋
-翾
-鳘
-儳
-儴
-鼗
-𩾌
-鳚
-鳛
-麑
-麖
-蠃
-彟
-嬿
-鬒
-蘘
-欂
-醵
-颥
-甗
-𨟠
-巇
-酅
-髎
-犨
-𨭉
-㸌
-爔
-瀱
-瀹
-瀼
-瀵
-襫
-孅
-骦
-耰
-𤫉
-瓖
-鬘
-趯
-罍
-鼱
-鳠
-鳡
-鳣
-爟
-爚
-灈
-韂
-糵
-蘼
-礵
-鹴
-躔
-皭
-龢
-鳤
-亹
-籥
-鼷
-玃
-醾
-齇
-觿
-蠼
-𬣙
-𬇕
-𬣞
-𬘓
-𫭟
-𫭢
-𫇭
-𫐄
-𫵷
-𬇙
-𬣡
-𫸩
-𫘜
-𬘘
-𫘝
-𬨂
-𬀩
-𬀪
-𬬩
-𫍣
-𬣳
-𬩽
-𬮿
-𬯀
-𫰛
-𬳵
-𬳶
-𫠊
-𬍛
-鿍
-𬜬
-𪾢
-𪨰
-𫓧
-𬬮
-𬬱
-𬬭
-𬘡
-𬳽
-𬘩
-𫄧
-𪟝
-𬍤
-𫭼
-𬜯
-𬂩
-𫠆
-𬌗
-𫑡
-𪨶
-𬬸
-𬬻
-𬬹
-𬬿
-𬭁
-𫢸
-𫗧
-𬊈
-𬒈
-𬳿
-𫄨
-𬘫
-𫮃
-鿎
-𬱖
-𬟽
-𫓯
-𫟹
-𫟼
-𬇹
-𬍡
-𬤇
-𫍯
-𬤊
-𫍲
-𬯎
-𬘬
-𬘭
-𬴂
-𫘦
-𫟅
-𬘯
-𫘧
-𪣻
-𬃊
-𬷕
-𫐐
-𬹼
-𫶇
-𫖮
-鿏
-𬭊
-𫓶
-𬭎
-𫖯
-𬱟
-𫛭
-𫷷
-𬮱
-𬊤
-𬴃
-𫘨
-𬪩
-𬒔
-𬨎
-𫐓
-𫫇
-𫓹
-𬭚
-𬭛
-𬕂
-𬶋
-𬶍
-𫔶
-𫌀
-𫖳
-𫘪
-𫘬
-𫞩
-𪤗
-𬸘
-𬒗
-𫚖
-𬭤
-𫚕
-𬶐
-𬶏
-𬸚
-𬤝
-𬙂
-𬭩
-𬸣
-𫍽
-𬴊
-𬞟
-𫟦
-𬺈
-𫠜
-𪩘
-𬭬
-𬭯
-𫗴
-𬸦
-𫄷
-𬭳
-𬭶
-𫔍
-𬭸
-𬭼
-𫔎
-𬸪
-𬶟
-𬶠
-𬶨
-𫄸
-𬟁
-𬙊
-𬶭
-𬶮
-𬙋
-𬺓
-𫚭
-廠
-蔔
-兒
-幾
-幹
-虧
-纔
-與
-萬
-韆
-億
-個
-廣
-門
-義
-衛
-飛
-習
-馬
-鄉
-豐
-開
-無
-雲
-專
-藝
-廳
-區
-歷
-曆
-車
-貝
-岡
-見
-氣
-長
-僕
-幣
-僅
-從
-侖
-倉
-風
-烏
-鳳
-爲
-鬥
-憶
-計
-訂
-認
-譏
-醜
-隊
-辦
-鄧
-勸
-雙
-書
-擊
-撲
-節
-術
-厲
-龍
-滅
-軋
-東
-盧
-業
-舊
-帥
-歸
-葉
-電
-號
-衹
-隻
-嘰
-嘆
-們
-儀
-叢
-爾
-樂
-處
-鼕
-鳥
-務
-飢
-饑
-馮
-閃
-蘭
-匯
-彙
-頭
-漢
-寧
-討
-寫
-讓
-禮
-訓
-議
-訊
-記
-齣
-遼
-邊
-發
-髮
-聖
-對
-臺
-颱
-檯
-糾
-絲
-動
-鞏
-執
-擴
-掃
-場
-揚
-亞
-樸
-機
-權
-過
-協
-壓
-厭
-頁
-誇
-奪
-達
-夾
-軌
-堯
-劃
-邁
-畢
-貞
-師
-塵
-當
-噹
-籲
-嚇
-蟲
-麯
-團
-糰
-嗎
-嶼
-歲
-迴
-豈
-則
-剛
-網
-硃
-遷
-喬
-偉
-傳
-優
-傷
-價
-倫
-華
-僞
-嚮
-後
-會
-殺
-閤
-衆
-爺
-傘
-創
-雜
-負
-壯
-衝
-妝
-莊
-慶
-劉
-齊
-産
-閉
-問
-闖
-關
-燈
-湯
-興
-講
-諱
-軍
-訝
-許
-訛
-論
-訟
-農
-諷
-設
-訪
-訣
-尋
-盡
-儘
-導
-孫
-陣
-陽
-階
-陰
-婦
-媽
-戲
-觀
-歡
-買
-紅
-馱
-纖
-縴
-馴
-約
-級
-紀
-馳
-紉
-壽
-麥
-瑪
-進
-遠
-違
-韌
-運
-撫
-壇
-罎
-壞
-摳
-擾
-貢
-垻
-壩
-摺
-掄
-搶
-墳
-護
-殻
-塊
-聲
-報
-擬
-蕪
-葦
-蒼
-嚴
-蘆
-勞
-蘇
-囌
-極
-楊
-兩
-麗
-醫
-勵
-還
-殲
-來
-連
-軒
-鹵
-滷
-堅
-時
-縣
-裏
-嘔
-園
-曠
-圍
-噸
-郵
-睏
-員
-聽
-嗆
-嗚
-彆
-嶇
-崗
-帳
-財
-針
-釘
-亂
-體
-傭
-徹
-餘
-穀
-鄰
-腸
-龜
-猶
-狽
-條
-島
-飯
-飲
-係
-繫
-凍
-狀
-畝
-庫
-療
-應
-這
-廬
-閏
-閑
-間
-悶
-竈
-燦
-瀝
-淪
-滄
-溝
-滬
-瀋
-懷
-憂
-窮
-證
-啓
-評
-補
-識
-詐
-訴
-診
-詞
-譯
-靈
-層
-遲
-張
-際
-陸
-陳
-墜
-勁
-鷄
-緯
-驅
-純
-紗
-綱
-納
-駁
-縱
-紛
-紙
-紋
-紡
-驢
-紐
-環
-責
-現
-錶
-規
-攏
-揀
-擔
-頂
-擁
-勢
-攔
-擰
-撥
-擇
-蘋
-範
-莖
-樞
-櫃
-闆
-鬆
-槍
-楓
-構
-喪
-畫
-棗
-賣
-鬱
-礬
-礦
-碼
-厠
-奮
-態
-歐
-毆
-壟
-轟
-頃
-轉
-斬
-輪
-軟
-齒
-虜
-腎
-賢
-國
-暢
-嚨
-鳴
-羅
-幟
-嶺
-凱
-敗
-賬
-販
-貶
-購
-貯
-圖
-釣
-製
-颳
-俠
-僥
-偵
-側
-憑
-僑
-貨
-質
-徑
-捨
-覓
-貪
-貧
-膚
-腫
-脹
-骯
-脅
-魚
-獰
-備
-飾
-飽
-飼
-變
-龐
-廟
-瘧
-劑
-廢
-閘
-鬧
-鄭
-捲
-單
-爐
-淺
-濘
-瀉
-潑
-澤
-憐
-學
-寶
-寵
-審
-簾
-實
-試
-詩
-誠
-襯
-視
-話
-誕
-詭
-詢
-該
-詳
-肅
-録
-隸
-彌
-瀰
-陝
-駕
-參
-艱
-綫
-練
-組
-紳
-細
-駛
-織
-駒
-終
-駐
-絆
-駝
-紹
-繹
-經
-貫
-貳
-幫
-項
-挾
-撓
-趙
-擋
-墊
-擠
-揮
-薦
-帶
-繭
-蕩
-榮
-葷
-熒
-鬍
-蔭
-藥
-標
-棧
-棟
-欄
-檸
-樹
-鹹
-磚
-硯
-麵
-牽
-鷗
-殘
-軸
-輕
-鴉
-戰
-點
-臨
-覽
-竪
-嘗
-啞
-顯
-貴
-蝦
-蟻
-螞
-雖
-駡
-勛
-嘩
-響
-喲
-峽
-罰
-賤
-貼
-貽
-鈣
-鈍
-鈔
-鍾
-鐘
-鋼
-鈉
-鑰
-欽
-鈞
-鈎
-鈕
-氈
-氫
-選
-適
-種
-鞦
-復
-複
-倆
-貸
-順
-儉
-須
-鬚
-劍
-朧
-膽
-勝
-狹
-獅
-獨
-獄
-貿
-餌
-饒
-蝕
-餃
-餅
-巒
-彎
-將
-奬
-瘡
-瘋
-親
-閨
-聞
-閩
-閥
-閣
-養
-薑
-類
-婁
-總
-煉
-爍
-爛
-窪
-潔
-灑
-澆
-濁
-測
-瀏
-濟
-渾
-濃
-惱
-舉
-覺
-憲
-竊
-誡
-誣
-語
-襖
-誤
-誘
-誨
-説
-誦
-墾
-晝
-費
-遜
-隕
-險
-嬌
-賀
-壘
-綁
-絨
-結
-繞
-驕
-繪
-給
-絢
-駱
-絡
-絶
-絞
-駭
-統
-艷
-蠶
-頑
-盞
-撈
-載
-趕
-鹽
-損
-撿
-摯
-剝
-熱
-搗
-壺
-聶
-萊
-蓮
-獲
-穫
-惡
-噁
-瑩
-鶯
-檔
-橋
-樺
-樁
-樣
-賈
-礫
-礎
-顧
-轎
-較
-頓
-斃
-緻
-慮
-監
-緊
-黨
-曬
-曉
-嘮
-鴨
-暈
-鴦
-罷
-圓
-賊
-賄
-賂
-贜
-錢
-鉗
-鑽
-鉀
-鐵
-鈴
-鉛
-犧
-敵
-積
-稱
-筆
-債
-傾
-賃
-艦
-艙
-聳
-愛
-頒
-頌
-臟
-髒
-臍
-膠
-腦
-膿
-鴕
-鴛
-皺
-餓
-餒
-戀
-槳
-漿
-準
-癥
-齋
-離
-資
-競
-閲
-煩
-燒
-燭
-遞
-濤
-澇
-渦
-塗
-滌
-潤
-澗
-漲
-燙
-澀
-憫
-寬
-傢
-賓
-竅
-請
-諸
-諾
-讀
-誹
-襪
-課
-誰
-調
-諒
-諄
-談
-誼
-懇
-劇
-難
-預
-絹
-綉
-驗
-繼
-駿
-瑣
-擲
-據
-摻
-職
-蘿
-螢
-營
-蕭
-薩
-夢
-檢
-醖
-碩
-聾
-襲
-輔
-輛
-顱
-懸
-躍
-纍
-囉
-嘯
-嶄
-邏
-嬰
-銬
-鐺
-鋁
-銅
-銘
-鏟
-銀
-矯
-穢
-籠
-償
-軀
-釁
-銜
-盤
-鴿
-斂
-領
-臉
-獵
-餡
-館
-癢
-鏇
-閻
-闡
-蓋
-斷
-獸
-鴻
-漸
-淵
-漁
-澱
-滲
-慚
-懼
-驚
-慘
-慣
-謀
-諜
-謊
-諧
-禱
-禍
-謂
-諺
-謎
-彈
-墮
-隨
-隱
-嬸
-頗
-頸
-績
-緒
-續
-騎
-綽
-繩
-維
-綿
-綳
-綢
-綜
-綻
-緑
-綴
-瓊
-趨
-攬
-攙
-擱
-摟
-攪
-聯
-蔣
-韓
-橢
-確
-頰
-靂
-暫
-翹
-輩
-鑿
-輝
-賞
-睞
-噴
-疇
-踐
-遺
-鵑
-賦
-賭
-贖
-賜
-賠
-鑄
-鋪
-鏈
-銷
-鎖
-鋤
-鍋
-銹
-鋒
-鋅
-鋭
-鵝
-築
-篩
-儲
-懲
-禦
-釋
-臘
-魯
-憊
-饋
-饞
-裝
-蠻
-闊
-糞
-滯
-濕
-潰
-濺
-灣
-憤
-竄
-窩
-褲
-禪
-謝
-謡
-謗
-謙
-屬
-屢
-緬
-纜
-緝
-緞
-緩
-締
-縷
-騙
-編
-騷
-緣
-鵡
-攝
-擺
-襬
-攤
-鵲
-藍
-濛
-懞
-矇
-獻
-欖
-樓
-賴
-礙
-尷
-霧
-輻
-輯
-輸
-頻
-齡
-鑒
-蹺
-蝸
-錯
-錨
-錫
-鑼
-錘
-錐
-錦
-鍵
-鋸
-錳
-辭
-頽
-籌
-簽
-籤
-簡
-膩
-鵬
-騰
-鮑
-穎
-觸
-雛
-饃
-餾
-醬
-謄
-糧
-數
-滿
-濾
-濫
-灕
-濱
-灘
-譽
-窺
-寢
-謹
-謬
-闢
-縛
-縫
-纏
-繽
-贅
-墻
-衊
-藹
-檻
-釀
-願
-轄
-輾
-顆
-踴
-蠟
-蠅
-蟬
-賺
-鍬
-鍛
-鍍
-穩
-籮
-簫
-輿
-鮮
-饅
-瀟
-賽
-譚
-譜
-騾
-縮
-攆
-聰
-藴
-櫻
-飄
-黴
-瞞
-題
-囑
-鎮
-鎬
-鎊
-簍
-鯉
-鯽
-癟
-癱
-顔
-鯊
-瀾
-額
-譴
-鶴
-繚
-顛
-轍
-鸚
-贈
-鏡
-贊
-籃
-籬
-鯨
-癮
-辯
-瀕
-懶
-繮
-繳
-矚
-贍
-鰐
-辮
-贏
-驟
-囂
-鐮
-鰭
-鷹
-巔
-顫
-癬
-鱉
-鬢
-鱗
-躪
-贛
-鑲
-韋
-閂
-訃
-勱
-芻
-鄺
-訐
-訌
-訕
-訖
-馭
-璣
-壙
-捫
-薌
-厙
-釔
-傴
-倀
-傖
-獷
-獁
-鳬
-鄔
-餳
-懺
-謳
-詎
-訥
-紆
-紂
-紇
-紈
-璵
-摶
-塢
-㩳
-蕓
-藶
-莧
-萇
-蓯
-磯
-奩
-歟
-軔
-鄴
-嘸
-囈
-嚦
-暘
-唄
-幃
-峴
-嵐
-圇
-釗
-釙
-釕
-僉
-鳩
-鄒
-飩
-餼
-飪
-飫
-飭
-廡
-癤
-闈
-閎
-閔
-煬
-灃
-漚
-渢
-潙
-憮
-慪
-愾
-悵
-愴
-詁
-訶
-詛
-詆
-謅
-詔
-詒
-隴
-陘
-嫵
-嫗
-嬀
-剄
-紜
-紕
-紝
-綸
-紓
-瑋
-匭
-壚
-擓
-蘢
-蔦
-塋
-煢
-櫪
-梘
-棖
-樅
-碭
-甌
-郟
-軛
-鳶
-曇
-蟣
-黽
-嚀
-噝
-巋
-劌
-剴
-嶧
-釷
-釺
-釧
-釩
-釹
-釵
-儈
-儕
-儂
-劊
-慫
-糴
-戧
-膞
-邇
-梟
-餞
-飴
-癘
-瘍
-煒
-熰
-熗
-瀧
-瀘
-濼
-涇
-㥮
-懌
-誆
-誄
-詿
-詰
-詼
-鄆
-禕
-誅
-詵
-詬
-詮
-詣
-諍
-詫
-諢
-詡
-駑
-紺
-紲
-紱
-駟
-駙
-縐
-絀
-驛
-駘
-瓏
-頇
-埡
-撾
-撻
-賁
-壋
-撏
-莢
-貰
-蓽
-蕎
-薈
-薺
-堊
-滎
-犖
-蕁
-藎
-蓀
-蕒
-葤
-櫛
-櫳
-櫨
-櫟
-檉
-酈
-硨
-碸
-殤
-軲
-軻
-轤
-軼
-軫
-蠆
-覘
-瞘
-嘵
-嗶
-噦
-剮
-鄖
-噲
-噥
-嶢
-幀
-嶠
-貺
-鈈
-鈦
-鋇
-鈑
-鈐
-鎢
-鈁
-鈀
-篤
-儔
-儼
-儷
-腖
-臚
-脛
-鴇
-獪
-颮
-猻
-餉
-餄
-餎
-孿
-孌
-癧
-瘲
-颯
-闥
-閭
-闓
-閡
-熾
-烴
-浹
-澮
-滸
-潯
-濜
-慟
-懨
-愷
-惻
-惲
-誚
-禰
-誥
-誑
-鴆
-婭
-嬈
-懟
-絝
-驍
-驊
-絎
-絳
-駢
-頊
-璫
-琿
-塒
-塤
-堝
-贄
-蒔
-萵
-蕕
-鴣
-蒓
-橈
-楨
-榿
-檜
-邐
-礪
-礱
-軾
-輊
-輅
-鶇
-躉
-齔
-鸕
-矓
-嘜
-鴞
-蜆
-嗩
-嶗
-崍
-覬
-賅
-鈺
-鉦
-鈷
-鉢
-鈸
-鉞
-鉭
-鉬
-鈿
-鈾
-鉑
-鑠
-鉚
-鈰
-鉉
-鉈
-鉍
-鈮
-鈹
-鏺
-鐸
-氬
-筧
-頎
-徠
-膾
-鴟
-璽
-鴝
-獫
-裊
-餑
-欒
-攣
-癰
-痙
-頏
-閫
-鬮
-誾
-閬
-鄲
-燁
-燴
-燼
-淶
-漣
-潿
-慳
-諏
-諑
-禎
-諉
-諛
-諗
-諂
-誶
-媧
-嫻
-綆
-驪
-綃
-騁
-綏
-縧
-綈
-駸
-鷥
-燾
-璉
-麩
-擄
-摑
-鷙
-撣
-慤
-摜
-縈
-槤
-覡
-欞
-嗇
-匱
-硤
-磽
-鴯
-龔
-殞
-殮
-賚
-輒
-塹
-嘖
-囀
-嚙
-蹌
-蠣
-蠱
-蟶
-幘
-幗
-賕
-賑
-賒
-銠
-鉺
-鋏
-鐃
-銦
-鎧
-鍘
-銖
-銑
-鋌
-鏵
-銓
-鎩
-鉿
-銚
-鉻
-錚
-銫
-鉸
-銥
-銃
-銨
-銣
-鴰
-穠
-箋
-籩
-僨
-僂
-皚
-鴴
-艫
-龕
-玀
-獼
-餜
-餛
-鸞
-闍
-閾
-閹
-閶
-鬩
-閽
-閼
-羥
-糲
-燜
-漬
-瀆
-澠
-愜
-憚
-諶
-諫
-皸
-謔
-襠
-謁
-諤
-諭
-諼
-讒
-諳
-諦
-諞
-糶
-嬋
-綾
-騏
-綺
-緋
-緔
-騍
-緄
-騅
-綬
-綹
-綣
-綰
-驂
-緇
-靚
-輦
-黿
-頡
-撳
-蟄
-壪
-蔞
-櫝
-欏
-賫
-鵓
-鸝
-殫
-輥
-輞
-槧
-輟
-輜
-瞼
-躒
-蛺
-蟯
-螄
-蠐
-嘍
-嶸
-嶁
-賧
-鋙
-錸
-鏗
-鋥
-鋰
-鋯
-鋨
-銼
-鐧
-銻
-鋃
-鋦
-錒
-犢
-鵠
-篳
-牘
-儻
-儐
-儺
-嬃
-頜
-鵒
-魷
-魨
-魴
-潁
-颶
-觴
-熲
-餷
-餿
-褻
-臠
-癆
-癇
-賡
-頦
-鷳
-闌
-闃
-闋
-鵜
-憒
-嚳
-謨
-褳
-襇
-讜
-謖
-謚
-謐
-騭
-巰
-翬
-騖
-緙
-緗
-緘
-緹
-緲
-緦
-緱
-縋
-緡
-饗
-耮
-驁
-韞
-攄
-擯
-轂
-驀
-鶓
-薊
-蘺
-鎣
-頤
-櫚
-櫸
-磧
-磣
-鵪
-輳
-齟
-齙
-韙
-囁
-躂
-蹕
-躚
-躋
-噯
-鍺
-錛
-錡
-鍀
-錁
-錕
-錮
-鍁
-錈
-錠
-錙
-覦
-頷
-鮁
-鮃
-鮎
-鱸
-穌
-鮒
-鮐
-鵮
-颼
-饈
-鶉
-瘮
-闔
-闐
-闕
-灧
-瀅
-潷
-灤
-澦
-懾
-鱟
-騫
-竇
-謾
-謫
-嬡
-嬪
-縉
-縝
-縟
-轡
-騮
-縞
-縭
-縊
-縑
-騸
-覯
-韜
-靉
-攖
-薔
-藺
-鶘
-檳
-櫧
-釅
-殯
-霽
-轅
-齜
-齦
-瞜
-曖
-躊
-蟈
-鶚
-嚶
-羆
-賻
-罌
-鶻
-鍥
-鍇
-鍶
-鍔
-鍤
-鏘
-鎂
-鏤
-簀
-篋
-簞
-籙
-臏
-鮭
-鮪
-鱭
-鮫
-鱘
-饉
-鑾
-瘻
-闞
-鮝
-糝
-鷀
-瀲
-濰
-譖
-褸
-譙
-讕
-譎
-鶥
-嬙
-鶩
-驃
-縹
-縵
-縲
-纓
-驄
-繆
-繅
-耬
-瓔
-擷
-擼
-攛
-聵
-覲
-韃
-鞽
-蘄
-賾
-檣
-靨
-魘
-饜
-轆
-齬
-齪
-覷
-顒
-躓
-躑
-蠑
-螻
-顎
-嚕
-顓
-鑷
-鎘
-鎸
-鎳
-鎦
-鎰
-鎵
-鑌
-簣
-鷂
-鯁
-鱺
-鰱
-鰹
-鰣
-鯀
-鯇
-觶
-饊
-饌
-齏
-讞
-襤
-譫
-屨
-纈
-繕
-繒
-驏
-擻
-顳
-顢
-藪
-櫓
-櫞
-贋
-飆
-鏨
-轔
-蟎
-鐯
-鏢
-鏜
-鏝
-鏰
-鏞
-鏑
-鏃
-鏐
-氌
-穡
-魎
-鯪
-鯡
-鯤
-鯧
-鯝
-鯢
-鯛
-鯔
-獺
-鷓
-贇
-癭
-斕
-瀨
-顙
-繾
-繰
-繯
-蘚
-鷯
-齲
-齷
-躡
-蹣
-羈
-鐔
-鐝
-鐐
-鐓
-鑭
-鑹
-鏹
-鐙
-籪
-鷦
-鱝
-鰈
-鯷
-鰓
-鰍
-鰉
-鯿
-鷲
-懣
-鷸
-鰲
-韉
-顥
-鷺
-䴉
-髏
-鑊
-鐳
-鐲
-讎
-鰨
-鰥
-鰩
-癩
-攢
-靄
-躥
-髖
-髕
-鑔
-籟
-鰳
-鰾
-鱈
-鰻
-鱅
-讖
-驥
-纘
-瓚
-鼉
-黷
-黲
-鑣
-鑞
-臢
-鱖
-鱔
-鱒
-驤
-顰
-鱧
-癲
-灝
-鸛
-鑱
-趲
-顴
-躦
-饢
-戇
-戔
-訏
-訒
-釓
-俔
-閆
-澫
-訢
-訩
-詝
-紃
-纊
-瑒
-剗
-塸
-壢
-埨
-撝
-蔿
-榪
-軑
-軏
-咼
-㠣
-覎
-㑳
-颺
-閌
-潕
-湋
-澐
-浿
-諓
-禡
-詗
-詘
-詖
-屓
-彄
-紘
-馹
-馼
-紵
-紞
-駃
-紖
-瑲
-薴
-棡
-軝
-暐
-晛
-崬
-釴
-釤
-鍆
-鍚
-鄶
-獮
-飿
-嶨
-詷
-詪
-鄩
-鳲
-隑
-隮
-娙
-逕
-駓
-駔
-駉
-絅
-騶
-䮄
-紼
-紿
-瓅
-韍
-墶
-塏
-薘
-蕘
-蔄
-葒
-鳾
-龑
-軹
-軤
-轢
-軺
-睍
-曨
-噠
-鈃
-鈇
-鉅
-鋹
-釿
-錀
-鈧
-鈥
-鈄
-倈
-艤
-鶬
-颭
-餏
-湞
-溮
-滻
-褘
-絰
-駰
-絪
-駪
-綎
-綖
-驫
-勣
-璕
-𡑍
-䓣
-薟
-藭
-椏
-梜
-頍
-硜
-輄
-輈
-輇
-貲
-嗊
-曄
-暉
-鄳
-幬
-輋
-嶮
-贐
-鉥
-鉕
-鑪
-鉮
-鉊
-鉧
-僤
-鴒
-魛
-餗
-燖
-溳
-礐
-窵
-襏
-駼
-絺
-綌
-騂
-綄
-璡
-墠
-壼
-聹
-蘀
-勩
-罃
-檮
-棶
-厴
-䃮
-磑
-礄
-鴷
-齕
-頔
-廼
-凢
-亾
-枒
-屍
-匃
-匄
-紥
-紮
-疋
-殀
-讐
-觔
-兇
-宂
-㕥
-㠯
-栞
-佈
-佔
-呌
-敂
-冄
-坵
-僊
-怱
-悤
-冊
-夘
-戼
-牠
-妳
-嬭
-摃
-釦
-攷
-託
-衺
-衕
-弔
-喫
-囙
-㠶
-颿
-秊
-倣
-髣
-佀
-朶
-氷
-決
-併
-並
-竝
-汙
-汚
-異
-姦
-廵
-挵
-衖
-搤
-阯
-撦
-埳
-阬
-誌
-㕁
-卻
-刦
-刧
-刼
-芲
-蘤
-桿
-槓
-荳
-獃
-唫
-脗
-皁
-彿
-髴
-疘
-刪
-鉋
-鑤
-況
-牀
-恡
-棄
-洶
-汎
-災
-烖
-菑
-禩
-侷
-跼
-坿
-玅
-姉
-妬
-翫
-搨
-柺
-拕
-牴
-觝
-倖
-抝
-盃
-桮
-傑
-逩
-肎
-菓
-崐
-崑
-呪
-虖
-嘑
-謼
-詠
-㟁
-嵒
-巗
-巖
-雰
-稈
-咊
-嶽
-妷
-姪
-廹
-徃
-餚
-採
-寀
-唸
-週
-昬
-兎
-兔
-亯
-亱
-䘚
-淨
-劵
-匟
-㳒
-灋
-洩
-霑
-淚
-註
-恠
-箒
-屆
-絃
-圅
-旾
-珎
-掛
-垜
-艸
-茘
-査
-栢
-柵
-栁
-桺
-柹
-韮
-揹
-昰
-閧
-鬨
-冐
-暎
-嚥
-倃
-𠴰
-偺
-喒
-齩
-欬
-榘
-㑺
-儁
-敍
-敘
-肧
-脈
-䘑
-衇
-跡
-蹟
-砲
-礮
-薙
-鬀
-恆
-怳
-卹
-䘏
-賉
-婣
-畊
-揑
-綑
-輓
-恥
-躭
-晉
-棲
-覈
-慄
-翄
-脣
-槕
-㨪
-螡
-蟁
-㤙
-陗
-峩
-峯
-乗
-椉
-咲
-筍
-俛
-頫
-勌
-䠶
-躳
-慇
-拏
-㧱
-挐
-脃
-胷
-肐
-貍
-㽞
-畱
-淒
-悽
-蓆
-効
-傚
-涼
-缾
-菸
-煙
-淛
-湧
-誖
-猂
-醼
-讌
-㝠
-寃
-孃
-桒
-毬
-瑠
-璢
-瑯
-㨗
-搥
-搯
-蔆
-惏
-楳
-槑
-捄
-廂
-慽
-慼
-瞇
-埜
-畧
-虵
-稭
-棃
-犂
-迻
-媮
-兠
-舩
-慾
-綵
-腳
-𩓐
-夠
-豬
-貓
-湊
-減
-庻
-蔴
-菴
-朢
-睠
-觕
-麤
-釬
-銲
-痳
-殽
-婬
-滛
-湻
-㴱
-樑
-顇
-㝛
-窰
-窯
-琹
-欵
-墖
-趂
-隄
-愽
-揷
-揫
-煑
-朞
-㪚
-塟
-蔥
-蔕
-稜
-棊
-碁
-椶
-偪
-㕑
-廚
-廈
-鴈
-冣
-㝡
-晳
-鼃
-餧
-餵
-嗁
-諠
-㡌
-賸
-筴
-筞
-筩
-栰
-暠
-皜
-踰
-蝟
-㪟
-燄
-遊
-媿
-嘅
-庽
-窓
-牎
-牕
-窻
-徧
-僱
-帬
-裠
-強
-彊
-疎
-壻
-瓌
-䰟
-皷
-擕
-㩗
-㩦
-攜
-懃
-鞾
-幙
-㮣
-酧
-詶
-醻
-掽
-踫
-㼝
-盌
-磟
-覩
-倸
-㬉
-煗
-煖
-晻
-闇
-炤
-跥
-䗬
-蠭
-寘
-辠
-稺
-穉
-燬
-譭
-瘉
-癒
-顋
-骽
-猨
-蝯
-稟
-痺
-癡
-亷
-㢘
-韻
-泝
-遡
-昚
-躶
-臝
-羣
-㬪
-曡
-疊
-勦
-琍
-瓈
-𤋮
-熈
-牓
-搾
-謌
-堿
-鹻
-鹼
-矁
-燻
-髈
-𤺥
-辢
-旂
-𡚁
-潄
-砦
-詧
-嫰
-櫈
-撐
-墪
-譔
-鞵
-鞌
-蕋
-橤
-蘂
-醕
-譆
-跴
-蹤
-蜨
-蠍
-稾
-殭
-惪
-厀
-襃
-癅
-䊀
-餬
-潛
-癄
-顦
-鷰
-藷
-櫥
-螎
-蹏
-蟇
-譟
-簒
-彫
-琱
-鵰
-餹
-餻
-簷
-粦
-燐
-緐
-幑
-蹧
-粇
-穅
-臋
-籐
-繙
-飜
-孼
-蠏
-燿
-蝡
-稬
-穤
-惷
-覇
-鑵
-戹
-阨
-剳
-帀
-巵
-亙
-佇
-竚
-穽
-岅
-虯
-𦍑
-羗
-啎
-姙
-㘭
-袟
-袠
-逈
-㒺
-犛
-氂
-偘
-甕
-罋
-冺
-姍
-蝨
-琺
-瑇
-尅
-梔
-斮
-斲
-斵
-暱
-毘
-蝱
-吚
-哶
-峝
-粃
-竢
-狥
-秈
-烱
-㳄
-袵
-盇
-涖
-蒞
-碪
-蠔
-唕
-倐
-儵
-雋
-皐
-臯
-衂
-䶊
-臙
-獧
-痾
-皰
-湼
-澣
-濬
-塚
-襢
-娿
-勅
-勑
-戞
-廐
-廄
-眥
-覜
-勗
-啗
-噉
-傯
-挱
-㥫
-惥
-慂
-陻
-蕚
-萲
-蕿
-蘐
-藼
-櫂
-箠
-槨
-啑
-蹠
-蚘
-痐
-蛕
-蜖
-瘖
-遯
-醃
-飱
-冪
-簑
-枏
-柟
-檝
-楥
-矴
-椗
-嘷
-獋
-粺
-䈰
-諐
-齶
-堘
-疿
-雝
-秔
-稉
-槀
-搉
-廝
-叡
-嘠
-蜋
-筯
-篛
-麞
-糉
-緥
-璿
-髥
-臕
-餈
-剹
-橜
-罇
-蜺
-矙
-憇
-翺
-饍
-瞖
-羴
-羶
-爕
-繦
-騌
-鬉
-騣
-蔾
-䠀
-簮
-躕
-蹵
-䝔
-貛
-鼴
-麐
-塡
-あ
-い
-う
-え
-お
-か
-き
-く
-け
-こ
-さ
-し
-す
-せ
-そ
-た
-ち
-つ
-て
-と
-な
-に
-ぬ
-ね
-の
-は
-ひ
-ふ
-へ
-ほ
-ま
-み
-む
-め
-も
-や
-ゆ
-よ
-ら
-り
-る
-れ
-ろ
-わ
-を
-ん
-が
-ぎ
-ぐ
-げ
-ご
-ざ
-じ
-ず
-ぜ
-ぞ
-だ
-ぢ
-づ
-で
-ど
-ば
-び
-ぶ
-べ
-ぼ
-ぱ
-ぴ
-ぷ
-ぺ
-ぽ
-ぁ
-ぃ
-ぅ
-ぇ
-ぉ
-っ
-ゃ
-ゅ
-ょ
-ゎ
-ゕ
-ゖ
-ア
-イ
-ウ
-エ
-オ
-カ
-キ
-ク
-ケ
-コ
-サ
-シ
-ス
-セ
-ソ
-タ
-チ
-ツ
-テ
-ト
-ナ
-ニ
-ヌ
-ネ
-ノ
-ハ
-ヒ
-フ
-ヘ
-ホ
-マ
-ミ
-ム
-メ
-モ
-ヤ
-ユ
-ヨ
-ラ
-リ
-ル
-レ
-ロ
-ワ
-ヲ
-ン
-ガ
-ギ
-グ
-ゲ
-ゴ
-ザ
-ジ
-ズ
-ゼ
-ゾ
-ダ
-ヂ
-ヅ
-デ
-ド
-バ
-ビ
-ブ
-ベ
-ボ
-パ
-ピ
-プ
-ペ
-ポ
-ァ
-ィ
-ゥ
-ェ
-ォ
-ッ
-ャ
-ュ
-ョ
-ヮ
-ヵ
-ヶ
-ヷ
-ヸ
-ヹ
-ヺ
-・
-ー
-ヽ
-ヾ
-ヿ
-ｱ
-ｲ
-ｳ
-ｴ
-ｵ
-ｶ
-ｷ
-ｸ
-ｹ
-ｺ
-ｻ
-ｼ
-ｽ
-ｾ
-ｿ
-ﾀ
-ﾁ
-ﾂ
-ﾃ
-ﾄ
-ﾅ
-ﾆ
-ﾇ
-ﾈ
-ﾉ
-ﾊ
-ﾋ
-ﾌ
-ﾍ
-ﾎ
-ﾏ
-ﾐ
-ﾑ
-ﾒ
-ﾓ
-ﾔ
-ﾕ
-ﾖ
-ﾗ
-ﾘ
-ﾙ
-ﾚ
-ﾛ
-ﾜ
-ｦ
-ﾝ
-ﾞ
-ﾟ
-ｧ
-ｨ
-ｩ
-ｪ
-ｫ
-ｯ
-ｬ
-ｭ
-ｮ
-円
-気
-糸
-絵
-楽
-帰
-戸
-広
-黒
-図
-線
-読
-売
-歩
-毎
-亜
-悪
-圧
-扱
-囲
-為
-壱
-隠
-栄
-営
-駅
-塩
-縁
-艶
-応
-桜
-穏
-仮
-価
-箇
-ゑ
-ゝ
-ゞ
-ヰ
-ヴ
-㈱
-両
-丼
-丿
-亀
-仏
-伝
-侶
-俤
-値
-倶
-倹
-偐
-偽
-働
-儛
-兌
-児
-冑
-冨
-凞
-処
-凪
-別
-剣
-剤
-剰
-劔
-労
-勧
-勲
-匁
-匂
-匲
-卍
-単
-厳
-収
-呂
-呉
-呑
-呰
-唖
-喚
-喩
-喰
-噛
-噺
-嚢
-囃
-団
-圀
-圏
-堀
-堺
-塀
-塁
-塙
-増
-墺
-壊
-壌
-壷
-変
-奨
-姫
-娯
-嫐
-嬢
-嬾
-孁
-宍
-実
-宮
-寔
-寛
-対
-専
-尭
-峠
-崋
-嶋
-巀
-巌
-巣
-巻
-帯
-幇
-庁
-廃
-廻
-弉
-弌
-弐
-弖
-弾
-従
-徳
-徴
-忯
-恵
-悩
-惣
-懐
-懽
-戦
-戯
-戻
-払
-抜
-択
-拝
-拠
-拡
-拵
-挙
-挿
-捗
-捜
-掟
-掲
-掻
-揃
-換
-揺
-摂
-撃
-撹
-斉
-斎
-旛
-旡
-晧
-晩
-暁
-暦
-曽
-杁
-杢
-杣
-杮
-枓
-枠
-枡
-柾
-栂
-栃
-桝
-桟
-桾
-梛
-梱
-梲
-梶
-椙
-検
-椥
-楕
-楡
-楢
-榊
-榎
-槇
-様
-槙
-槻
-樋
-権
-樫
-橿
-檥
-欅
-歎
-歓
-歯
-歳
-歴
-毀
-沖
-沢
-浄
-涙
-済
-渉
-渋
-渓
-渕
-満
-滝
-漑
-潅
-澁
-瀞
-瀬
-焔
-焼
-煇
-煕
-煥
-燗
-爼
-犠
-狛
-猟
-獏
-獣
-珊
-瑤
-甞
-畑
-畠
-畳
-畷
-畺
-痩
-癪
-発
-県
-眞
-砕
-碕
-礒
-禖
-禿
-稲
-穂
-穣
-竃
-竜
-竴
-笹
-筈
-筬
-筰
-箆
-箏
-箙
-篠
-篭
-簺
-籾
-粂
-粋
-粛
-粧
-糺
-紬
-絁
-経
-絖
-絣
-絽
-継
-続
-綟
-総
-縄
-縅
-縒
-縦
-繊
-繋
-繍
-繝
-繧
-纐
-纒
-罠
-罧
-罵
-羂
-羇
-羨
-聟
-聡
-聨
-聴
-脇
-脳
-膣
-膵
-臈
-臓
-臥
-舎
-舖
-舗
-舘
-芿
-苅
-茲
-荊
-荘
-莬
-莵
-菫
-萠
-蔵
-薗
-薫
-薬
-薭
-蘊
-蛍
-蝋
-蝿
-蟷
-衞
-衵
-袙
-袞
-袰
-袴
-袿
-裃
-裡
-裲
-褄
-褌
-襴
-襷
-覗
-覚
-覧
-観
-訳
-証
-諌
-諚
-諟
-諡
-諮
-譛
-譲
-讃
-豅
-豊
-豎
-賎
-賛
-贔
-躙
-躰
-転
-軽
-輌
-辥
-辺
-辻
-込
-逓
-遅
-遙
-邉
-郷
-酔
-醗
-醤
-醸
-釈
-鉄
-鉇
-鉤
-鉱
-鉾
-銈
-銕
-銭
-鋲
-鋳
-鋺
-錆
-錍
-錣
-錬
-錵
-鍑
-鍮
-鍼
-鎌
-鎗
-鎚
-鎹
-鐇
-鐚
-鐡
-鑁
-鑑
-鑚
-鑢
-閇
-関
-閦
-闘
-陥
-険
-隣
-隷
-雑
-雫
-霊
-靜
-靫
-靭
-靱
-鞄
-鞆
-頚
-頬
-頴
-頼
-顕
-顗
-餝
-饂
-駄
-駆
-駈
-騒
-験
-騨
-髄
-髙
-髪
-髷
-鯖
-鯰
-鯱
-鰒
-鰯
-鰰
-鳰
-鴎
-鴫
-鵄
-鵞
-鵺
-鶏
-鹸
-麁
-麺
-麿
-黌
-黙
-鼈
-齢
-龗
-縯
-蟅
-坖
-祂
-鼂
-鱚
-蛻
-屌
-呾
-煔
-吶
-扥
-蚖
-銂
-尃
-夋
-鵼
-徬
-寳
-彡
-舨
-湳
-麼
-鍈
-崈
-鱣
-盺
-拺
-瑥
-茷
-焻
-奀
-驎
-鱰
-砢
-痟
-廱
-僜
-瘺
-鱊
-擥
-嶰
-淓
-跅
-浵
-媗
-璦
-煠
-檊
-媃
-峅
-躄
-鉟
-塽
-蟴
-鯮
-弍
-烒
-鵵
-妑
-孋
-蚡
-恊
-輭
-廞
-產
-曅
-盜
-騤
-囪
-鱀
-茇
-葊
-逹
-狓
-崢
-趖
-凃
-羙
-鮸
-昞
-楿
-渽
-圗
-麪
-屇
-鍉
-葝
-沯
-爭
-幵
-筭
-寊
-銋
-貮
-鎭
-熺
-昜
-鍱
-墬
-愒
-磺
-嚈
-稘
-珮
-釆
-殑
-鍩
-䲁
-蕷
-鐿
-僡
-佹
-輶
-冴
-襶
-賔
-猙
-辧
-絛
-磾
-韁
-螔
-譳
-礑
-鋱
-魩
-嚗
-棆
-牆
-敟
-柶
-瓛
-魣
-巎
-轘
-襌
-枼
-鸌
-逺
-錏
-縡
-帢
-騄
-媼
-埅
-鄤
-萐
-祙
-旼
-詥
-鶲
-燉
-卲
-銱
-庲
-伱
-氽
-嵿
-挻
-煵
-窋
-鐤
-鮊
-鱬
-鰧
-嬤
-譞
-諲
-脭
-悳
-崘
-阭
-內
-袾
-冚
-壐
-咗
-礠
-孮
-痲
-埈
-肹
-鰮
-鮓
-濊
-塜
-凜
-蒢
-噰
-桼
-峍
-焴
-鶒
-鋮
-綠
-鶹
-熿
-毴
-咟
-嘥
-睺
-繡
-郎
-瘞
-鉶
-蔎
-秠
-緤
-蝀
-躝
-蟜
-繃
-囮
-墫
-乭
-胊
-濙
-瘓
-榣
-鑛
-鐫
-嶴
-甹
-坮
-銾
-蒭
-睜
-俋
-餠
-榢
-蓳
-盋
-堷
-鍏
-苝
-巛
-蚵
-暏
-熤
-嬨
-墎
-鏽
-戶
-菺
-膮
-熖
-睪
-栜
-捱
-榗
-鍷
-曧
-犽
-韑
-袓
-䖝
-焄
-喦
-髲
-疌
-㴪
-侊
-貐
-蕅
-禠
-蕑
-囯
-暊
-儞
-佋
-柎
-㐱
-鰤
-苳
-鱥
-謤
-遶
-眀
-鑀
-羋
-顏
-陜
-銩
-黶
-苼
-蒤
-棛
-儫
-咁
-抦
-衚
-棩
-焿
-脫
-麅
-玏
-埧
-淸
-黁
-淽
-彠
-鮨
-沜
-糀
-厓
-楧
-嶌
-簹
-檵
-鱇
-嶬
-廸
-卽
-樀
-贌
-酼
-籛
-沒
-晸
-諪
-蕡
-妏
-鄋
-蒍
-奧
-抇
-蓨
-薆
-鱷
-巘
-䝉
-亰
-寈
-槩
-誒
-麴
-蕟
-溎
-蘗
-榦
-斿
-暟
-炲
-拚
-娖
-繖
-橚
-寜
-爀
-饟
-悅
-鯏
-彜
-眾
-葯
-嬝
-埮
-獇
-馛
-溙
-瀦
-熼
-硓
-鈢
-樆
-輬
-鰜
-蔘
-渙
-澔
-嗮
-旉
-籜
-媊
-燘
-儚
-頹
-缽
-俽
-逨
-鱓
-郞
-歊
-杴
-珡
-杋
-醁
-鰏
-鵾
-鐽
-鮋
-巶
-荅
-薾
-囓
-蹻
-獎
-禑
-鎓
-榲
-僴
-綞
-尓
-敭
-曔
-褔
-鬅
-亊
-鏦
-蓘
-裬
-鱲
-薡
-鰗
-箑
-鬪
-縂
-璸
-甙
-茮
-辵
-岻
-覿
-滈
-鯶
-鑂
-囶
-舺
-溋
-拋
-菾
-敾
-虨
-綝
-蝍
-醂
-禨
-賹
-廧
-絕
-槗
-徫
-鎔
-曮
-蠂
-捒
-堈
-莕
-蓪
-敎
-禃
-櫱
-綧
-瀶
-逌
-浤
-碻
-刄
-逤
-剏
-氹
-菈
-娫
-蜛
-嵗
-糎
-螶
-譓
-鏳
-嵙
-瑊
-隲
-檨
-緈
-畵
-砯
-簗
-彅
-鰺
-騋
-窶
-嚒
-嵻
-尙
-頵
-槰
-虉
-醞
-巂
-彔
-偊
-畇
-鱨
-妸
-塲
-畐
-鈫
-錟
-磪
-摠
-彥
-璙
-囝
-寗
-耎
-鮡
-蘓
-弅
-焃
-飥
-戙
-塰
-儱
-槺
-噏
-魟
-禵
-佧
-咘
-盪
-瑈
-鉲
-睭
-鏌
-鼇
-郋
-魮
-朖
-滽
-渃
-滙
-熯
-醿
-鎅
-褀
-鬬
-巄
-螥
-眜
-釚
-柉
-壎
-峇
-姸
-唭
-鮜
-鈖
-嫈
-壄
-洤
-黃
-伕
-堦
-嶔
-鮰
-鞞
-漎
-鉓
-鮗
-壴
-阝
-妀
-矽
-獢
-倗
-銪
-鴓
-橒
-凈
-哖
-屚
-偍
-瑺
-媯
-淍
-驌
-椇
-赬
-薐
-糹
-碽
-濲
-釭
-晭
-纕
-寖
-閞
-歿
-呎
-鶆
-屄
-櫿
-犎
-旲
-㙟
-龎
-翜
-螾
-說
-衜
-泆
-軎
-鵂
-荎
-嚧
-硂
-桖
-褭
-筊
-鰷
-秳
-戩
-轀
-鬹
-飬
-卋
-暸
-狦
-搢
-娋
-鏴
-溫
-毉
-淰
-謩
-餺
-鵙
-鳽
-鮀
-狶
-氻
-轝
-妺
-袛
-蓭
-梂
-娛
-牼
-稅
-兿
-玾
-煚
-僩
-鶿
-鬄
-崠
-鉆
-鯓
-蚢
-庀
-鵟
-坣
-殼
-悞
-熅
-敻
-鍠
-曶
-愼
-搳
-姃
-砳
-槼
-臞
-韾
-靑
-鸊
-薲
-虛
-蠄
-啟
-鶺
-苺
-滾
-褞
-仺
-胇
-憻
-郳
-烉
-驩
-冇
-枖
-夌
-搵
-匸
-盨
-櫾
-霤
-麊
-貒
-噓
-嗢
-笩
-晈
-冂
-銳
-毿
-慜
-囧
-閜
-娸
-庢
-壆
-馯
-桱
-兗
-葃
-侅
-煐
-鐦
-藸
-鷎
-嵰
-逎
-弒
-匋
-鐭
-廔
-砩
-孆
-灴
-伷
-兪
-鴗
-澯
-幚
-旙
-勻
-礽
-婑
-鱮
-娍
-銶
-吳
-鍟
-仼
-鳧
-彞
-娽
-昛
-鰼
-剎
-佉
-鉏
-偸
-鰆
-讙
-橪
-啱
-岀
-孻
-釪
-乹
-鈳
-漇
-檦
-埻
-祿
-爌
-禇
-鱵
-㸃
-梉
-燝
-霙
-炁
-飮
-蠙
-勷
-鵎
-儥
-鐠
-唻
-廰
-嚿
-嵕
-墱
-紑
-搖
-瘜
-皝
-鸑
-瀁
-粵
-撚
-巑
-梀
-啯
-眛
-諴
-夊
-僙
-鍝
-裖
-鮣
-凬
-飡
-灊
-橓
-嫳
-筳
-咑
-粍
-瓑
-璌
-伃
-閰
-傜
-黐
-謢
-驒
-橫
-蛯
-寕
-蠵
-瞓
-旳
-翏
-硏
-寯
-韡
-楤
-鰃
-朿
-侞
-鵯
-愨
-祹
-厔
-丌
-盩
-謏
-魕
-啣
-閱
-曺
-枛
-罉
-卐
-樻
-鷉
-鯒
-鋡
-磱
-枱
-攴
-蠷
-穈
-嚟
-檽
-趐
-奐
-鋐
-檇
-薀
-峼
-咭
-訔
-韠
-鑴
-鸐
-唃
-捦
-鸜
-誴
-罳
-璄
-暃
-夀
-賨
-鞥
-鈊
-灡
-鮍
-懮
-籣
-昐
-陁
-襾
-鮠
-鈏
-囍
-婯
-艔
-貭
-䰾
-姁
-禼
-堖
-鋶
-仛
-鏷
-謜
-鑅
-忬
-蘶
-謠
-觙
-奫
-狟
-泩
-桙
-飈
-垰
-啍
-嚞
-鯕
-蒧
-榞
-徸
-璹
-揔
-欉
-魞
-菶
-玧
-鳯
-廍
-侚
-岰
-岧
-鋕
-凵
-彣
-崱
-媜
-倢
-鵐
-砋
-鷚
-鱠
-鮻
-繻
-摵
-贓
-磵
-錻
-痠
-粩
-胅
-奣
-塨
-瀠
-鸘
-啚
-娳
-霶
-壔
-峚
-甂
-廁
-覌
-鰂
-猳
-鱻
-盫
-裿
-杬
-歛
-澋
-蘞
-嵜
-尐
-旽
-鉌
-鎛
-豿
-凖
-榤
-禓
-龝
-悧
-鷟
-鮟
-吋
-喢
-岪
-吥
-漵
-頠
-豔
-巿
-鑨
-醣
-熳
-懍
-湥
-檡
-韺
-戱
-緖
-鐈
-凉
-緃
-鮹
-媐
-爯
-巆
-褍
-鐬
-昍
-扙
-鍳
-芛
-蟳
-嬅
-糬
-吔
-塭
-譿
-冧
-鏓
-嶪
-嗹
-椵
-姀
-閿
-褧
-錞
-玆
-笘
-篔
-萡
-鶡
-螐
-鮄
-鰟
-脷
-啲
-杤
-蓚
-尗
-娎
-殟
-淥
-蝚
-蓧
-彐
-嚤
-銍
-囒
-坶
-淩
-鶼
-鱂
-喼
-燫
-肏
-姵
-廌
-禟
-籝
-迵
-嵨
-堮
-蟌
-憍
-廕
-蜑
-緁
-唘
-竩
-崙
-璚
-粄
-栨
-罈
-梫
-貤
-藔
-蜯
-訁
-斖
-煶
-馦
-妠
-閟
-疕
-夆
-鎪
-膥
-澻
-嘢
-嚐
-靁
-鎻
-鰛
-穵
-烋
-縕
-褎
-疒
-壠
-溼
-圂
-咅
-鯭
-鯙
-磘
-玨
-珤
-朊
-蚼
-濶
-薞
-嚩
-丟
-嫺
-鯻
-椲
-鰕
-刂
-蠘
-踎
-瀴
-琁
-鰶
-瑴
-肜
-㐂
-欥
-媺
-竻
-讚
-𣇉
-裵
-緜
-廩
-齧
-叄
-俌
-厰
-滀
-錄
-鷫
-鯗
-攞
-姌
-蔝
-幷
-縤
-屻
-鯃
-雞
-纁
-嫲
-嵮
-屭
-嶃
-跩
-鋗
-蕢
-篊
-俬
-淎
-暻
-鏻
-憓
-玗
-溈
-笭
-糢
-勳
-閒
-沍
-咾
-鉷
-蘵
-俁
-崵
-毸
-苪
-掙
-鴡
-萭
-俴
-屜
-蒾
-艹
-剷
-慍
-朮
-枴
-氳
-猓
-甽
-箝
-譁
-贗
-迆
-鈽
-鍊
-鍰
-鏍
-靦
-餽
-丮
-丱
-仜
-仩
-伬
-伔
-仱
-伀
-伻
-佢
-佒
-侀
-侇
-佷
-佌
-佪
-侐
-侜
-俓
-侲
-俉
-侻
-侳
-俇
-倅
-倇
-倰
-倛
-倳
-倷
-俷
-倠
-偯
-偞
-偠
-偋
-偝
-偛
-偢
-偅
-偟
-偩
-偫
-傛
-傔
-傞
-傋
-傌
-傎
-傝
-偨
-傂
-傽
-傿
-僆
-傮
-僄
-僈
-傰
-僁
-傱
-僋
-僗
-僛
-僪
-僝
-僓
-僿
-儃
-儰
-僸
-僶
-僾
-儌
-僽
-儜
-儓
-儗
-儑
-儢
-儤
-儠
-儸
-儹
-儽
-冓
-冘
-冞
-凊
-凅
-凔
-刌
-刉
-刓
-刜
-刞
-刵
-刲
-剆
-刱
-剉
-剚
-剒
-剫
-剭
-剬
-剺
-剸
-剻
-剼
-劀
-劋
-劖
-劘
-劗
-劙
-劦
-勴
-匊
-匢
-匰
-匴
-匷
-匽
-卌
-卼
-厎
-厒
-厗
-厞
-厜
-厤
-厬
-厹
-吰
-吷
-吪
-呿
-咈
-呫
-呺
-呥
-呬
-呴
-茍
-咷
-咮
-咶
-哅
-咠
-咢
-唦
-唗
-唒
-哤
-唚
-唈
-哫
-唅
-唴
-啢
-唶
-啒
-啅
-唌
-唲
-喨
-喥
-喭
-噅
-喓
-喣
-啽
-喌
-嗃
-嗛
-嗋
-嗀
-喿
-喍
-嗏
-嗕
-嗈
-嘕
-嘒
-嗼
-嘐
-嘓
-嘂
-嗺
-嘝
-嘄
-嗿
-噈
-噊
-噆
-噚
-嘳
-嘽
-嘾
-噮
-噳
-噣
-噭
-噞
-嚌
-嚍
-嚃
-嚘
-嚜
-嚫
-嚪
-嚬
-嚲
-嚵
-嚽
-嚾
-囆
-囅
-囋
-囗
-圁
-圞
-圠
-坁
-坅
-坲
-坱
-垀
-坴
-垗
-垝
-垔
-垘
-垽
-垼
-埢
-埶
-堩
-堣
-塈
-堥
-塓
-塉
-塯
-塕
-塼
-墆
-塿
-塴
-墋
-塺
-墝
-墯
-壈
-墽
-壖
-壝
-壛
-壾
-壿
-夃
-夎
-夒
-夗
-奅
-奊
-奰
-奲
-奼
-妦
-妎
-妢
-妐
-妵
-姏
-姎
-㚷
-姡
-姺
-姼
-娭
-婐
-婟
-婥
-婓
-婗
-媔
-媟
-媢
-婸
-媦
-媥
-媬
-媕
-娷
-嫇
-嫋
-媰
-媻
-嫮
-嫥
-嫢
-嫛
-嫿
-嫴
-嫷
-嫶
-嬎
-嬓
-嬐
-嬲
-嬽
-孈
-屘
-孲
-孷
-宎
-宨
-寪
-寍
-寋
-寑
-寙
-寠
-寱
-尌
-尒
-尟
-尰
-尳
-屖
-屔
-屝
-屧
-屩
-屮
-屴
-岏
-岋
-岉
-岒
-岮
-岤
-岯
-岟
-岝
-峐
-峌
-峞
-峉
-峊
-峬
-峮
-峷
-崝
-崨
-崥
-崏
-崰
-崣
-崷
-嵃
-嵑
-崳
-崺
-嵂
-嵱
-嵣
-嵥
-嵞
-嶀
-嵽
-嶆
-嵺
-嵷
-嶊
-嶉
-嶈
-嵾
-嶕
-嶜
-嶡
-嶚
-嶞
-嶱
-嶩
-嶵
-嶭
-巃
-巏
-巕
-巟
-巹
-帊
-帗
-帟
-帣
-帠
-帤
-帩
-帾
-帴
-幏
-幎
-幓
-幩
-幝
-幠
-幧
-幨
-幦
-幭
-幰
-庂
-庉
-庌
-庈
-庰
-庛
-庣
-庨
-庮
-庪
-庬
-庴
-廅
-廇
-廘
-廗
-廎
-廜
-緳
-廦
-廥
-廮
-廯
-蠯
-廾
-弚
-弝
-弣
-弤
-弮
-弳
-彃
-彉
-彋
-彏
-彯
-彴
-彸
-彾
-徦
-徥
-徯
-徲
-徾
-徿
-忀
-忁
-忔
-忕
-忨
-忣
-忷
-忥
-怭
-怲
-怋
-怴
-怗
-怚
-怞
-怬
-怢
-怐
-怮
-怓
-怷
-怹
-恲
-恞
-恅
-恇
-恉
-恛
-恌
-恀
-恟
-悀
-悁
-悕
-悗
-悇
-悊
-悐
-悾
-悺
-惓
-惤
-惈
-悷
-惉
-悹
-惌
-惢
-惄
-愊
-愖
-愅
-惵
-愓
-惸
-惼
-惾
-慉
-慅
-愶
-愲
-愮
-愯
-愬
-慁
-慞
-慱
-慒
-慓
-慲
-憀
-慴
-慔
-慺
-慛
-憃
-慹
-憱
-憰
-憢
-憉
-憛
-憯
-憟
-憪
-憡
-憝
-憖
-懅
-憴
-懆
-懁
-憿
-憸
-憵
-憼
-懧
-懠
-懥
-懤
-懘
-懭
-懱
-懪
-懰
-懫
-懻
-戁
-戃
-戄
-戉
-戠
-酨
-戺
-扐
-扜
-扤
-扡
-扢
-抆
-抌
-抎
-抏
-扻
-抭
-抴
-拑
-抾
-抪
-抶
-抮
-挍
-挋
-挃
-拫
-拹
-挏
-挌
-拸
-挀
-拲
-捖
-挬
-挶
-揤
-捊
-挼
-挩
-捁
-挴
-捘
-捔
-捥
-掝
-掗
-掫
-掯
-捵
-掜
-捼
-掤
-掔
-掱
-揎
-揥
-揨
-揯
-揊
-揲
-揵
-摡
-揟
-揝
-揜
-揘
-揅
-揱
-搆
-搟
-搕
-搘
-搹
-搷
-搣
-搰
-搊
-搚
-摀
-搧
-搫
-摍
-摝
-摲
-摦
-摎
-摋
-摓
-摐
-摿
-摮
-摰
-撢
-撠
-撗
-撜
-撋
-撊
-撌
-撟
-擗
-擖
-擏
-擉
-撽
-擩
-擣
-擫
-擭
-擨
-擽
-擸
-攇
-攐
-攍
-攌
-攗
-攕
-攓
-攡
-攠
-攦
-攩
-攭
-攲
-攳
-敁
-敊
-敆
-敓
-敧
-敪
-敤
-敜
-敯
-敳
-敶
-敺
-敹
-敿
-斁
-斀
-斄
-斒
-斔
-斞
-斨
-斪
-斻
-旍
-旓
-旚
-旝
-旟
-昲
-昦
-昢
-晇
-晥
-晜
-晼
-晬
-暀
-暆
-暍
-暋
-暡
-暰
-暩
-曀
-曊
-曋
-曏
-曒
-曚
-曣
-曭
-朁
-朅
-朄
-朒
-朘
-朣
-朾
-朹
-朻
-朼
-杅
-杇
-杝
-杗
-枎
-杶
-枆
-枌
-柲
-枺
-枻
-柸
-柀
-柅
-柫
-柤
-柍
-柮
-柣
-柂
-柧
-栚
-桋
-桏
-栱
-栵
-栫
-栭
-栯
-栘
-栔
-梡
-梇
-梐
-桭
-梮
-楖
-梬
-梩
-桵
-梒
-椌
-椄
-棜
-棷
-棳
-棌
-椈
-楰
-棯
-椔
-棸
-楟
-楎
-楱
-楅
-楺
-楈
-楛
-楉
-楬
-椳
-楀
-楄
-楶
-楘
-榶
-槉
-榠
-榬
-榼
-榙
-榩
-榾
-榯
-槄
-榽
-榹
-槥
-槸
-樕
-樠
-槬
-槢
-樛
-樝
-槾
-樧
-槮
-樔
-槷
-橀
-樴
-橉
-橧
-樲
-橨
-橝
-橭
-橶
-樿
-橁
-檍
-檖
-檁
-檟
-橾
-檛
-檓
-檕
-檃
-櫅
-檹
-櫡
-櫠
-櫌
-櫑
-櫙
-櫋
-櫜
-櫐
-櫫
-櫬
-櫰
-櫹
-櫺
-櫼
-欃
-欋
-欈
-欐
-欑
-欘
-欨
-欴
-欯
-欭
-欱
-欶
-欳
-欷
-欿
-歂
-歈
-歍
-歋
-歕
-歔
-歜
-歠
-歭
-歾
-肂
-殈
-殏
-殔
-殗
-殙
-殠
-殥
-殢
-殦
-殧
-殰
-殶
-毃
-毄
-毈
-毇
-毊
-毚
-毞
-毦
-毤
-毨
-毣
-毰
-毲
-毻
-毼
-毾
-氁
-氀
-氄
-氠
-氶
-汃
-汒
-汏
-汍
-汸
-沋
-汱
-汯
-沕
-汦
-汳
-泬
-沶
-沬
-泧
-沷
-泭
-泲
-泒
-沴
-洟
-洊
-洀
-浺
-浶
-洍
-涒
-浘
-浢
-涊
-涆
-浧
-涗
-涳
-涬
-淢
-涷
-淔
-渀
-淈
-涾
-淊
-涽
-淭
-湆
-湇
-湅
-湢
-渿
-湁
-渜
-渳
-湀
-渻
-渮
-湨
-湡
-渱
-渨
-湠
-湱
-湩
-渹
-溛
-滖
-溓
-溔
-滒
-溰
-溾
-滜
-滵
-滱
-漃
-漥
-漮
-潎
-漙
-漧
-漘
-漒
-滭
-漊
-潳
-滮
-潀
-漰
-潃
-漅
-濆
-澒
-澅
-潚
-潠
-澖
-潶
-潬
-潒
-潐
-潗
-澓
-潝
-濇
-濎
-濈
-濄
-澞
-澨
-瀄
-濌
-澩
-濴
-濔
-濣
-濭
-濧
-濦
-瀇
-瀎
-濿
-瀀
-濻
-瀙
-瀖
-瀫
-瀡
-瀢
-瀩
-瀯
-瀷
-灂
-瀸
-瀿
-瀺
-灄
-灉
-灖
-灗
-灛
-灟
-灨
-灩
-灪
-炾
-炰
-烓
-烑
-缹
-焍
-烰
-焠
-焮
-焣
-煆
-煣
-煝
-熐
-熉
-熀
-熂
-熚
-燅
-燂
-熸
-燀
-燡
-爁
-爊
-爂
-爓
-爞
-爢
-爣
-牄
-牉
-牋
-牏
-牣
-牬
-牰
-牸
-牷
-犈
-犉
-犆
-犅
-犌
-犑
-犐
-犗
-犕
-犓
-犘
-犚
-犝
-犞
-犥
-犦
-犤
-犣
-犩
-犪
-犮
-犵
-犿
-狆
-狖
-狋
-狘
-狜
-狔
-狚
-狌
-狑
-狊
-狤
-狫
-狪
-狣
-猀
-狾
-猑
-猘
-猈
-狿
-猏
-猋
-猒
-猧
-猲
-猭
-猦
-猣
-猵
-猼
-獂
-獀
-獊
-獑
-獌
-獘
-獞
-獟
-獝
-獛
-獡
-獩
-獦
-獥
-獳
-獶
-獽
-獿
-玂
-玁
-玈
-玊
-玔
-珓
-珶
-琖
-瑵
-璊
-瑽
-璅
-瑿
-璗
-瓁
-瓋
-瓝
-瓟
-瓡
-瓥
-瓨
-瓬
-瓵
-瓾
-瓽
-甀
-甃
-甈
-甋
-甐
-甒
-甔
-甖
-甝
-甮
-甿
-畟
-畣
-畽
-疀
-疧
-痁
-疻
-痀
-痎
-痏
-痋
-痌
-痑
-痚
-痡
-痝
-痗
-痯
-瘏
-痷
-痸
-痻
-瘈
-瘑
-瘝
-瘣
-瘯
-瘱
-瘽
-癈
-癉
-癙
-癐
-癓
-癠
-癵
-癹
-皊
-皏
-皫
-皯
-皵
-皻
-皽
-皾
-盄
-盓
-盝
-盬
-盭
-盳
-眃
-眅
-盻
-眝
-眐
-眓
-眒
-眣
-眑
-眕
-眹
-眱
-眲
-眴
-眳
-眽
-睆
-睅
-睊
-睋
-睌
-睕
-睟
-睒
-睖
-睩
-睧
-睔
-瞁
-睼
-瞂
-睮
-睯
-瞏
-瞉
-瞚
-瞝
-瞡
-瞛
-瞲
-瞷
-瞶
-瞴
-矂
-矉
-矊
-矌
-矎
-矏
-矐
-矔
-矕
-矘
-矠
-矱
-矲
-矹
-矺
-砅
-砐
-砏
-砎
-砨
-硈
-硉
-硠
-硥
-硱
-硰
-硩
-碔
-碄
-碅
-碆
-硾
-碫
-碞
-磍
-磌
-磎
-磈
-磃
-磝
-磩
-磥
-磞
-磛
-磳
-磼
-磿
-礔
-礉
-礝
-礛
-礜
-礥
-礣
-礧
-礨
-礭
-礿
-祌
-祅
-祔
-祒
-祑
-祤
-祩
-祪
-祣
-祫
-祡
-祴
-祳
-禂
-禗
-禜
-禫
-禭
-禬
-禴
-禷
-禸
-歶
-秅
-秏
-秖
-秎
-秮
-秪
-秺
-秶
-稊
-稒
-稫
-穊
-稰
-稯
-穋
-穛
-穖
-穧
-穨
-穮
-穬
-穭
-穱
-穾
-窆
-窉
-窌
-窏
-窔
-窐
-窙
-窢
-窞
-窫
-窲
-窴
-窱
-窾
-竀
-竁
-竷
-笐
-笓
-笅
-笵
-笻
-笴
-笰
-笢
-笝
-笲
-筄
-筡
-箈
-箊
-箌
-箛
-箎
-箘
-箄
-箷
-箾
-篎
-箯
-箹
-篞
-篣
-篧
-篕
-篨
-篹
-簅
-篲
-篿
-篻
-簎
-篴
-簂
-簁
-篸
-篽
-簜
-簩
-簙
-簭
-簦
-簨
-簢
-簥
-簳
-簼
-簬
-簻
-籉
-籈
-籊
-籔
-籗
-籧
-籦
-籯
-籺
-籸
-籹
-粊
-粔
-粻
-糔
-糪
-糱
-糷
-紎
-紟
-紒
-紽
-紸
-紶
-紩
-絇
-紾
-絘
-絯
-絓
-絧
-絏
-絭
-絫
-綀
-綍
-絿
-綅
-絻
-絼
-綔
-綷
-緂
-綪
-緀
-緅
-緎
-緆
-緌
-綯
-綼
-緷
-緛
-緪
-緧
-縃
-緺
-緶
-緰
-縗
-縌
-縓
-縎
-縜
-縚
-縏
-縼
-繂
-縳
-顈
-繈
-縸
-縪
-繉
-繀
-縩
-緵
-縰
-縿
-縶
-繜
-繐
-繣
-繘
-繢
-繟
-繑
-繠
-繶
-繵
-繸
-繷
-繺
-繲
-繴
-纀
-纇
-纋
-纆
-纑
-纗
-纚
-缿
-罊
-罏
-罜
-罞
-罝
-罛
-罣
-罥
-罦
-罭
-罫
-罬
-罻
-罼
-罺
-罿
-羃
-羉
-羍
-羒
-羜
-羛
-羢
-羠
-羦
-羬
-羭
-羵
-羳
-羷
-羺
-羾
-翋
-翍
-翐
-翑
-翇
-翢
-翣
-翭
-翪
-翨
-翴
-翲
-翽
-翿
-耟
-耞
-耡
-耴
-耾
-耹
-聇
-聈
-聑
-聏
-聝
-肕
-肙
-肒
-肣
-肵
-胘
-胑
-胐
-胕
-胉
-胏
-胹
-胵
-脁
-胻
-脀
-胾
-胔
-脰
-脥
-脤
-脙
-脡
-脕
-脧
-腃
-腏
-腄
-腇
-脽
-腍
-腤
-腷
-腜
-腛
-腢
-腲
-朡
-腞
-腶
-膉
-膆
-膃
-膇
-膍
-膌
-膋
-膟
-膕
-膢
-膱
-膹
-膫
-膰
-膬
-膴
-膲
-臇
-膷
-臄
-臅
-臒
-臐
-臗
-臛
-臡
-臦
-臩
-臮
-臲
-臷
-臸
-臿
-舋
-舑
-舕
-舝
-舡
-舼
-舽
-艀
-艂
-艓
-艒
-艐
-艑
-艕
-艛
-艵
-艼
-芀
-芐
-芅
-芓
-芔
-苀
-芚
-芵
-芧
-芞
-芺
-苙
-苨
-苖
-苬
-苲
-苵
-苶
-茙
-茥
-茿
-茦
-茢
-荂
-茪
-荍
-茖
-茤
-茠
-茩
-茻
-莐
-莣
-莍
-荺
-莤
-荴
-莏
-莁
-荵
-莔
-莃
-莌
-莋
-荾
-莥
-菨
-萒
-菧
-菤
-菆
-菣
-菿
-菋
-菎
-菵
-萉
-菞
-菳
-菕
-蓱
-萿
-葹
-葥
-葀
-葧
-萰
-葍
-葽
-蔇
-葞
-萷
-萺
-萴
-葅
-菙
-葋
-萯
-葂
-葟
-葌
-蓎
-蒬
-蒮
-蒫
-蒪
-蒚
-蒝
-蓌
-蒛
-蒩
-蒘
-蒶
-蒠
-蔤
-蔏
-蔩
-蔉
-蔍
-蔧
-蔜
-蓻
-蓺
-蓴
-蔪
-蓲
-蓷
-蓫
-蔒
-蓩
-蔖
-蓾
-蔨
-蔮
-蔂
-蓶
-蔱
-蓹
-蔠
-蔰
-蕫
-蕍
-蕀
-蕆
-蕄
-蕇
-蕣
-蕛
-蕱
-蕵
-蕮
-蕧
-蕠
-蕦
-蕝
-薃
-薧
-薕
-薠
-薋
-薣
-薚
-蕼
-薉
-蕸
-薎
-薖
-薍
-薝
-薂
-藆
-藀
-藃
-藂
-薵
-薽
-藇
-藄
-藋
-藈
-藅
-薱
-薶
-藒
-藫
-藱
-藙
-藡
-藚
-藗
-藲
-藬
-藘
-藣
-藑
-藰
-蘁
-藾
-蘛
-蘉
-蘌
-蘪
-蘦
-蘟
-蘣
-蘜
-蘙
-蘮
-蘡
-蘠
-蘥
-蘴
-蘳
-蘬
-虀
-蘹
-蘱
-蘻
-蘾
-虃
-虆
-虇
-虈
-虌
-虋
-虙
-虡
-虣
-虩
-虪
-虰
-虭
-虴
-蚑
-蚞
-蚇
-蚗
-蚚
-蚅
-蚥
-蚙
-蚿
-蚷
-蛂
-蛁
-蛅
-蛈
-蚹
-蚳
-蚸
-蛌
-蚻
-蛢
-蛦
-蛓
-蛣
-蛚
-蛪
-蛝
-蛫
-蛜
-蛬
-蛗
-蜄
-蛷
-蜌
-蛖
-蛵
-蜁
-蛶
-蜳
-蝫
-蜙
-蝃
-蜬
-蝁
-蝆
-蜠
-蜲
-蜪
-蜭
-蜼
-蜵
-蝂
-蜦
-蜧
-蜸
-蜤
-蜰
-蝖
-蝷
-蟡
-蝳
-蝔
-蝛
-蝒
-蝑
-蝞
-蝭
-蝪
-蝐
-蝝
-蝬
-蝺
-蝜
-螛
-螏
-螓
-螒
-螁
-螖
-螘
-蝹
-螇
-螑
-螝
-螜
-螚
-螪
-螰
-螹
-螼
-螮
-蟉
-蟃
-蟂
-螷
-螴
-螿
-螸
-蟞
-蟧
-蟦
-蟢
-蟟
-蟤
-蟔
-蟓
-蟭
-蟘
-螤
-蟗
-蟙
-蠁
-蟨
-蠀
-蟺
-蠉
-蠌
-蟼
-蠈
-蟿
-蠗
-蠩
-蠝
-蠛
-蠠
-蠤
-蠜
-蠫
-蠬
-蠨
-蠦
-蠪
-蠥
-蠰
-蠮
-蠳
-蠸
-蠾
-蠽
-蠿
-衁
-衈
-衋
-衧
-衪
-衭
-衶
-袀
-衱
-衯
-袃
-袉
-袕
-袨
-袚
-袑
-袡
-袘
-袧
-袬
-袌
-袺
-裗
-袹
-袸
-裀
-袶
-袽
-袲
-裋
-裍
-裞
-裚
-裷
-裧
-裺
-裮
-裶
-裯
-裻
-褁
-褅
-褋
-褗
-褆
-褖
-褑
-褦
-褮
-褱
-褢
-褩
-褵
-褼
-褾
-襒
-褷
-襂
-褽
-襓
-襋
-襆
-襐
-襛
-襗
-襡
-襘
-襝
-襣
-襭
-襩
-襮
-襳
-襹
-襺
-覂
-覅
-覕
-覛
-覝
-覢
-覤
-覣
-覭
-覮
-覶
-觓
-觤
-觡
-觠
-觢
-觩
-觰
-觬
-觲
-觷
-觺
-觻
-觼
-觾
-訑
-訰
-訧
-訬
-訞
-詍
-訹
-詙
-詀
-詄
-詅
-訿
-誂
-詻
-誃
-誫
-誙
-誋
-諆
-誸
-諔
-諕
-誻
-諀
-諅
-諵
-諝
-諰
-諈
-謞
-謘
-謑
-謋
-謒
-謕
-謍
-謈
-謪
-謧
-謣
-謰
-謵
-譇
-謯
-謱
-謥
-謷
-謦
-譐
-譈
-譊
-譀
-譋
-譕
-譑
-譠
-譪
-譝
-譨
-譣
-譥
-譹
-譸
-譅
-譺
-譻
-譾
-讄
-讂
-讆
-讋
-讔
-讘
-讟
-谹
-谻
-谽
-谾
-豃
-豋
-豍
-豏
-豗
-豜
-豝
-豟
-豥
-豤
-豦
-豭
-豰
-豲
-豱
-豯
-豵
-豷
-豶
-豻
-豽
-貁
-貀
-貄
-貏
-貑
-貕
-貙
-貗
-貜
-貣
-貾
-賌
-賥
-賟
-賙
-賵
-賮
-贆
-贕
-贙
-赨
-赩
-赮
-赸
-趀
-趌
-趎
-趏
-趍
-趓
-趠
-趜
-趡
-趥
-趧
-趬
-趪
-趭
-趫
-趮
-趷
-趹
-跘
-跓
-跍
-跇
-跜
-跕
-跙
-跈
-跰
-跠
-跮
-跦
-跢
-跧
-跲
-跫
-踂
-跿
-踍
-踃
-踇
-踆
-跾
-踠
-踥
-踤
-踡
-踕
-踛
-踖
-踑
-踙
-踧
-踘
-踓
-踳
-踾
-踸
-踼
-蹎
-蹍
-蹓
-蹗
-蹖
-蹞
-蹥
-蹛
-蹡
-蹝
-蹔
-蹸
-蹳
-蹪
-躆
-躈
-躖
-躗
-躟
-躠
-躤
-躣
-躩
-躨
-躽
-軓
-軘
-軞
-軯
-軷
-軦
-軮
-軥
-軵
-軧
-軨
-軶
-軱
-軬
-輆
-軿
-輁
-輀
-輂
-輐
-輑
-輤
-輘
-輚
-輠
-輣
-輖
-輗
-輮
-輵
-輲
-輹
-輷
-輴
-轃
-轇
-轈
-轒
-轑
-轏
-轐
-轓
-轙
-轖
-轗
-轕
-轚
-轞
-轛
-轠
-辴
-迉
-迒
-迋
-迍
-迖
-迣
-迡
-迾
-迿
-逜
-逿
-遝
-遳
-遰
-遻
-邆
-邅
-遾
-邍
-邔
-邟
-邥
-邞
-邧
-郱
-郕
-郖
-郠
-郙
-郣
-郥
-郘
-郰
-郲
-郔
-鄬
-郼
-鄈
-郹
-郻
-鄁
-鄇
-郺
-鄐
-鄍
-鄏
-鄎
-鄟
-鄝
-鄡
-鄛
-鄨
-鄪
-鄦
-鄮
-鄵
-鄸
-鄻
-鄾
-酀
-酁
-酄
-酇
-酖
-酘
-酓
-酟
-酳
-醆
-醊
-醓
-醙
-醟
-醥
-醧
-醰
-醱
-醷
-醲
-醳
-醹
-醽
-釂
-釃
-釢
-釱
-釳
-釸
-鈚
-鈌
-鈒
-釽
-鈆
-鉒
-鉠
-鉯
-鈶
-鉼
-銤
-銛
-銔
-鉹
-銗
-鋄
-鋀
-鋟
-鋘
-鋩
-鋝
-鋂
-鋊
-錧
-錼
-錭
-錎
-鋋
-鎡
-鎃
-鎯
-鍖
-鍜
-鍐
-鍭
-鍌
-鎒
-鎷
-鎝
-鎉
-鎎
-鎞
-鏏
-鏂
-鏚
-鏬
-鏙
-鐋
-鐏
-鏾
-鐕
-鐨
-鐍
-鐀
-鐎
-鐖
-鐻
-鐶
-鑐
-鑋
-鑕
-鑮
-鑯
-钂
-钀
-钁
-钃
-镺
-镻
-镼
-镽
-閈
-閍
-閺
-閵
-闀
-闉
-闅
-閷
-闒
-闑
-闚
-闛
-闠
-闟
-闤
-阞
-阢
-阤
-阠
-阰
-阹
-阸
-阺
-陏
-陓
-陊
-陼
-陭
-陫
-隇
-陾
-隉
-隒
-隓
-隞
-隤
-隿
-雂
-雈
-雓
-雔
-雗
-雚
-雟
-雘
-雺
-雽
-雿
-霂
-霋
-霒
-霐
-霠
-霣
-霢
-霩
-霫
-霬
-霮
-霵
-霿
-靆
-靃
-靪
-靮
-靷
-靲
-靾
-鞃
-鞀
-鞂
-靻
-鞊
-鞎
-鞈
-鞙
-鞗
-鞚
-鞜
-鞤
-鞪
-鞷
-鞶
-鞹
-鞻
-鞿
-韄
-韅
-韇
-韎
-韐
-韏
-韕
-韔
-韗
-韝
-韟
-韣
-韥
-韰
-韱
-韹
-韽
-頄
-頖
-頞
-頝
-頩
-頨
-頯
-頲
-顁
-顄
-顊
-顉
-顅
-顐
-顑
-顜
-顝
-顠
-顣
-顟
-顤
-顪
-顩
-顲
-颬
-颲
-颸
-颽
-颻
-颾
-飁
-飂
-飉
-飋
-飌
-飣
-飶
-餂
-餀
-飺
-餔
-餖
-餕
-餤
-餟
-餥
-餫
-餪
-餲
-餯
-餭
-餱
-餰
-饁
-饇
-饐
-饎
-饙
-饘
-饛
-饡
-馣
-馲
-馰
-馵
-馻
-馺
-駂
-馽
-駜
-駍
-駏
-駎
-駖
-駮
-駬
-駥
-駤
-駣
-駩
-駺
-駴
-駷
-駹
-駶
-駻
-駽
-駾
-騃
-騉
-騑
-騊
-騇
-騚
-騕
-騥
-騝
-騛
-騢
-騠
-騧
-騞
-騜
-騵
-騲
-騴
-騱
-騬
-騪
-騩
-騹
-騽
-驆
-騺
-驓
-驔
-驈
-驉
-驖
-驞
-驠
-驦
-驨
-骭
-骫
-骹
-骿
-骴
-骾
-髇
-髊
-髆
-髍
-髐
-髟
-髧
-髬
-髳
-髶
-髺
-髾
-鬁
-髼
-鬋
-鬊
-鬎
-鬌
-鬐
-鬕
-鬗
-鬖
-鬙
-鬞
-鬠
-鬤
-鬫
-鬳
-鬵
-鬺
-鬾
-鬿
-魊
-魌
-魖
-魠
-魡
-魧
-魱
-魦
-魶
-魵
-鮅
-鮇
-魼
-魾
-魻
-鮂
-鮚
-鮞
-鮛
-鮦
-鮥
-鮤
-鮆
-鯆
-鮿
-鮵
-鯈
-鯫
-鯠
-鯞
-鯦
-鯬
-鰌
-鰋
-鰅
-鯸
-鰫
-鰝
-鰬
-鱆
-鰿
-鱄
-鱁
-鰴
-鱐
-鱍
-鱋
-鱕
-鱦
-鱢
-鱞
-鱴
-鱳
-鱹
-鳦
-鳪
-鳭
-鳱
-鳵
-鳼
-鳺
-鳿
-鳷
-鴀
-鳹
-鳻
-鴅
-鴃
-鴥
-鴠
-鴔
-鴩
-鴘
-鴢
-鴐
-鴳
-鵁
-鵧
-鴶
-鴮
-鴱
-鴸
-鵅
-鵃
-鴾
-鵀
-鴽
-鵏
-鵊
-鵛
-鵋
-鵖
-鵌
-鵗
-鵔
-鵷
-鶁
-鶊
-鶄
-鶈
-鵱
-鶀
-鵸
-鶋
-鶌
-鵽
-鵫
-鵴
-鵩
-鶅
-鵳
-鵻
-鶂
-鵹
-鶟
-鶙
-鶤
-鶝
-鶐
-鶛
-鶠
-鶔
-鶜
-鶪
-鶗
-鶢
-鶨
-鶞
-鶣
-鶖
-鶷
-鶶
-鷁
-鷇
-鷊
-鷏
-鶾
-鷅
-鷃
-鶵
-鷈
-鶱
-鶭
-鷛
-鷒
-鷞
-鷋
-鷐
-鷜
-鷑
-鷩
-鷘
-鷖
-鷵
-鷕
-鷻
-鷷
-鷣
-鷤
-鷶
-鷡
-鷮
-鷢
-鸂
-鷾
-鸇
-鸃
-鸆
-鸅
-鸀
-鸁
-鸉
-鷿
-鷽
-鸄
-鸋
-鸍
-鸏
-鸒
-鸔
-鸓
-鸗
-鸙
-鹺
-麃
-麆
-麉
-麎
-麌
-麔
-麙
-麛
-麚
-麜
-麠
-麡
-麧
-麮
-麰
-麶
-麷
-黀
-黂
-黈
-黓
-黕
-黖
-黚
-黤
-黫
-黮
-黭
-黰
-黳
-黵
-黺
-鼁
-鼀
-鼆
-鼊
-鼏
-鼖
-鼛
-鼘
-鼜
-鼤
-鼣
-鼥
-鼪
-鼨
-鼭
-鼰
-鼮
-鼵
-鼳
-鼲
-鼸
-鼶
-齀
-齂
-齃
-齌
-齍
-齎
-齖
-齗
-齘
-齛
-齠
-齞
-齝
-齥
-齤
-齫
-齱
-齰
-齮
-齯
-齴
-齵
-齸
-齻
-齺
-齹
-齾
-龒
-龤
-堔
-礂
-蒏
-蒆
-兙
-兛
-兞
-兝
-兡
-兣
-嗧
-瓩
-忼
-擡
-氊
-穇
-擧
-譌
-!
-"
-#
-$
-%
-&
-'
-(
-)
-*
-+
-,
--
-.
-/
-0
-1
-2
-3
-4
-5
-6
-7
-8
-9
-:
-;
-<
-=
->
-?
-A
-B
-C
-D
-E
-F
-G
-H
-I
-J
-K
-L
-M
-N
-O
-P
-Q
-R
-S
-T
-U
-V
-W
-X
-Y
-Z
-[
-]
-_
-`
-a
-b
-c
-d
-e
-f
-g
-h
-i
-j
-k
-l
-m
-n
-o
-p
-q
-r
-s
-t
-u
-v
-w
-x
-y
-z
-©
-°
-²
-´
-½
-Á
-Ä
-Å
-Ç
-È
-É
-Í
-Ó
-Ö
-×
-Ü
-ß
-à
-á
-â
-ã
-ä
-å
-æ
-ç
-è
-é
-ê
-ë
-í
-ð
-ñ
-ò
-ó
-ô
-õ
-ö
-ø
-ú
-û
-ü
-ý
-ā
-ă
-ą
-ć
-Č
-č
-đ
-ē
-ė
-ę
-ğ
-ī
-ı
-Ł
-ł
-ń
-ň
-ō
-ř
-Ş
-ş
-Š
-š
-ţ
-ū
-ż
-Ž
-ž
-Ș
-ș
-ț
-Δ
-α
-λ
-μ
-φ
-Г
-О
-а
-в
-л
-о
-р
-с
-т
-я
-ồ
-—
-―
-’
-“
-”
-…
-℃
-→
-∇
-−
-■
-☆
-、
-。
-々
-〆
-〈
-〉
-「
-」
-『
-』
-〔
-〕
-〜
-！
-＃
-％
-＆
-（
-）
-＋
-，
-－
-．
-／
-０
-１
-２
-３
-４
-５
-６
-７
-８
-９
-：
-；
-＝
-？
-＠
-Ａ
-Ｂ
-Ｃ
-Ｄ
-Ｅ
-Ｆ
-Ｇ
-Ｈ
-Ｉ
-Ｊ
-Ｋ
-Ｌ
-Ｍ
-Ｎ
-Ｏ
-Ｐ
-Ｒ
-Ｓ
-Ｔ
-Ｕ
-Ｖ
-Ｗ
-Ｘ
-Ｚ
-ａ
-ｂ
-ｃ
-ｄ
-ｅ
-ｆ
-ｇ
-ｈ
-ｉ
-ｊ
-ｋ
-ｌ
-ｍ
-ｎ
-ｏ
-ｐ
-ｑ
-ｒ
-ｓ
-ｔ
-ｕ
-ｖ
-ｗ
-ｘ
-ｙ
-ｚ
-～
-･
-ǎ
-ǒ
-ě
-ǐ
-ì
-ǔ
-ù
-ǖ
-ǘ
-ǚ
-ǜ
-【
-】
-《
-》
-‥
-{
-}
-\
-|
-@
-^
-~
-÷
-∕
-∙
-⋅
-·
-⊕
-⊖
-⊗
-⊘
-⊙
-±
-∓
-∩
-∪
-□
-⊎
-⊓
-⊔
-≠
-≈
-≡
-≤
-≥
-≪
-≫
-≲
-≳
-≶
-≷
-≺
-≻
-≼
-≽
-∈
-∉
-⊂
-⊃
-⊆
-⊇
-⊄
-⊅
-∅
-∖
-∁
-∆
-∧
-∨
-¬
-⊻
-⊼
-⊽
-←
-↔
-⇒
-⇐
-⇔
-∀
-∃
-∄
-∴
-∵
-∝
-∞
-⊥
-∟
-∠
-∡
-∢
-′
-″
-∥
-⊾
-⊿
-∂
-∫
-∬
-∭
-∮
-∯
-∰
-∑
-∏
-√
-∛
-∜
-∱
-∲
-∳
-∶
-∷
-∼
-®
-≄
-≅
-≃
-≦
-≧
-⊈
-⊉
-⊢
-⊤
-⊨
-⊧
-℉
-Ω
-℧
-Å
-⌀
-ℏ
-⅀
-⍺
-⍵
-¢
-€
-£
-¥
-￥
-₿
-↑
-↓
-↕
-↖
-↗
-↘
-↙
-↺
-↻
-↼
-↽
-↾
-↿
-⇀
-⇁
-⇂
-⇃
-⇋
-⇌
-ª
-º
-⁰
-¹
-³
-⁴
-⁵
-⁶
-⁷
-⁸
-⁹
-⁺
-⁻
-⁼
-⁽
-⁾
-ⁿ
-₀
-₁
-₂
-₃
-₄
-₅
-₆
-₇
-₈
-₉
-₊
-₋
-₌
-₍
-₎
-Ⅰ
-Ⅱ
-Ⅲ
-Ⅳ
-Ⅴ
-Ⅵ
-Ⅶ
-Ⅷ
-Ⅸ
-Ⅹ
-Ⅺ
-Ⅻ
-ⅰ
-ⅱ
-ⅲ
-ⅳ
-ⅴ
-ⅵ
-ⅶ
-ⅷ
-ⅸ
-ⅹ
-ⅺ
-ⅻ
-☰
-☱
-☲
-☳
-☴
-☵
-☶
-☷
-♀
-♂
-♳
-♴
-♵
-♶
-♷
-♸
-♹
-♺
-♩
-♪
-♫
-♬
-⚪
-⚫
-⚬
-✶
-✷
-✸
-➀
-➁
-➂
-➃
-➄
-➅
-➆
-➇
-➈
-➉
-➊
-➋
-➌
-➍
-➎
-➏
-➐
-➑
-➒
-➓
-⏀
-⏁
-⏂
-⏃
-⏄
-⏅
-⏆
-⏇
-⏈
-⏉
-⏊
-⏋
-⏌
-⏚
-⏴
-⏵
-⏶
-⏷
-⏸
-⏹
-⏺
-⏻
-⏼
-Α
-Β
-Γ
-Ε
-Ζ
-Η
-Θ
-Ι
-Κ
-Λ
-Μ
-Ν
-Ξ
-Ο
-Π
-Ρ
-Σ
-Τ
-Υ
-Φ
-Χ
-Ψ
-β
-γ
-δ
-ε
-ζ
-η
-θ
-ι
-κ
-ν
-ξ
-ο
-π
-ρ
-σ
-τ
-υ
-χ
-ψ
-ω
-ϐ
-ϑ
-ϒ
-ϕ
-█
-ϖ
-ϰ
-ϱ
-ϴ
-ϵ
-ϝ
-Ϟ
-ϟ
-Ϡ
-ϡ
-Ϣ
-ϣ
-Ϥ
-ϥ
-Ϧ
-ϧ
-Ϩ
-ϩ
-Ϫ
-ϫ
-Ϭ
-ϭ
-Ϯ
-ϯ
-∸
-∹
-∺
-∻
-∽
-∾
-∿
-≀
-≁
-≂
-≆
-≇
-≉
-≊
-≋
-≌
-≍
-≎
-≏
-≐
-≑
-≒
-≓
-≔
-≕
-≖
-≗
-≘
-≙
-≚
-≛
-≜
-≝
-≞
-≟
-≢
-≣
-≨
-≩
-≬
-≭
-≮
-≯
-≰
-≱
-≴
-≵
-≸
-≹
-≾
-≿
-⊀
-⊁
-⊊
-⊋
-⊌
-⊍
-⊏
-⊐
-⊑
-⊒
-⊚
-⊛
-⊜
-⊝
-⊞
-⊟
-⊠
-⊡
-⊣
-⊦
-⊩
-⊪
-⊫
-⊬
-⊭
-⊮
-⊯
-⊰
-⊱
-⊲
-⊳
-⊴
-⊵
-⊶
-⊷
-⊸
-⊹
-⊺
-ℎ
-℘
-ℜ
-ℑ
-ℵ
-ℶ
-ℷ
-ℸ
-⌬
-⌭
-⌮
-⌯
-⎔
-¤
-₠
-₡
-₢
-₣
-₤
-₥
-₦
-₧
-₨
-₩
-₪
-₫
-₭
-₮
-₯
-₰
-₱
-₲
-₳
-₴
-₵
-₶
-₷
-₸
-₹
-₺
-₻
-₼
-₽
-₾
-↚
-↛
-↜
-↝
-↞
-↟
-↠
-↡
-↢
-↣
-↤
-↥
-↦
-↧
-↨
-↩
-↪
-↫
-↬
-↭
-↮
-↯
-↰
-↱
-↲
-↳
-↴
-↵
-↶
-↷
-↸
-↹
-⇄
-⇅
-⇆
-⇇
-⇈
-⇉
-⇊
-⇍
-⇎
-⇏
-⇑
-⇓
-⇕
-⇖
-⇗
-⇘
-⇙
-⇚
-⇛
-⇜
-⇝
-⇞
-⇟
-⇠
-⇡
-⇢
-⇣
-⇤
-⇥
-⇦
-⇧
-⇨
-⇩
-⇪
-⇫
-⇬
-⇭
-⇮
-⇯
-⇰
-⇱
-⇲
-⇳
-⇴
-⇵
-⇶
-⇷
-⇸
-⇹
-⇺
-⇻
-⇼
-⇽
-⇾
-⇿
-ↀ
-ↁ
-ↂ
-☀
-☁
-☂
-☃
-☄
-★
-☇
-☈
-☉
-☊
-☋
-☌
-☍
-☎
-☏
-☐
-☑
-☒
-☓
-☔
-☕
-☖
-☗
-☘
-☙
-☚
-☛
-☜
-☝
-☞
-☟
-☠
-☡
-☢
-☣
-☤
-☥
-☦
-☧
-☨
-☩
-☪
-☫
-☬
-☭
-☮
-☯
-☸
-☹
-☺
-☻
-☼
-☽
-☾
-☿
-♁
-♃
-♄
-♅
-♆
-♇
-♔
-♕
-♖
-♗
-♘
-♙
-♚
-♛
-♜
-♝
-♞
-♟
-♠
-♡
-♢
-♣
-♤
-♥
-♦
-♧
-♨
-♭
-♮
-♯
-♰
-♱
-♲
-♻
-♼
-♽
-♾
-⚀
-⚁
-⚂
-⚃
-⚄
-⚅
-⚆
-⚇
-⚈
-⚉
-⚊
-⚋
-⚌
-⚍
-⚎
-⚏
-⚐
-⚑
-⚒
-⚓
-⚔
-⚕
-⚖
-⚗
-⚘
-⚙
-⚚
-⚛
-⚜
-⚝
-⚞
-⚟
-⚠
-⚡
-⚢
-⚣
-⚤
-⚥
-⚦
-⚧
-⚨
-⚩
-⚭
-⚮
-⚯
-⚰
-⚱
-⚲
-⚳
-⚴
-⚵
-⚶
-⚷
-⚸
-⚹
-⚺
-⚻
-⚼
-⚿
-⛀
-⛁
-⛂
-⛃
-⛆
-⛇
-⛈
-⛉
-⛊
-⛋
-⛌
-⛍
-⛏
-⛐
-⛑
-⛒
-⛓
-⛕
-⛖
-⛗
-⛘
-⛙
-⛚
-⛛
-⛜
-⛝
-⛞
-⛠
-⛡
-⛢
-⛣
-⛤
-⛥
-⛦
-⛧
-⛨
-⛩
-⛪
-⛫
-⛬
-⛭
-⛮
-⛯
-⛶
-⛾
-⛿
-✆
-✇
-✈
-✉
-✌
-✍
-✎
-✏
-✐
-✑
-✒
-✓
-✔
-✕
-✙
-✚
-✛
-✜
-✝
-✞
-✟
-✠
-✡
-✢
-✣
-✤
-✥
-✦
-✧
-✩
-✪
-✫
-✬
-✭
-✮
-✯
-✰
-✱
-✲
-✳
-✴
-✵
-✹
-✺
-✻
-✼
-✽
-✾
-✿
-❀
-❁
-❂
-❃
-❄
-❅
-❆
-❇
-❈
-❉
-❊
-❋
-❍
-❏
-❐
-❑
-❒
-❖
-❘
-❙
-❚
-❛
-❜
-❝
-❞
-❡
-❢
-❣
-❤
-❥
-❦
-❧
-❨
-❩
-❪
-❫
-❬
-❭
-❮
-❯
-❰
-❱
-❲
-❳
-❴
-❵
-❶
-❷
-❸
-❹
-❺
-❻
-❼
-❽
-❾
-❿
-①
-②
-③
-④
-⑤
-⑥
-⑦
-⑧
-⑨
-⑩
-➔
-➕
-➖
-➗
-➘
-➙
-➚
-➛
-➜
-➝
-➞
-➟
-➠
-➡
-➢
-➣
-➤
-➥
-➦
-➧
-➨
-➩
-➪
-➫
-➬
-➭
-➮
-➯
-➰
-➱
-➲
-➳
-➴
-➵
-➶
-➷
-➸
-➹
-➺
-➻
-➼
-➽
-➾
-➿
-⌘
-⌥
-⌃
-⎋
-⌫
-⌦
-⏏
-⌤
-⌧
-⌨
-⎆
-⎇
-⎈
-⎉
-⎊
-⎌
-⎍
-⎎
-⎏
-⎐
-⎑
-⎒
-⎓
-⎕
-⎖
-⎗
-⎘
-⎙
-⎚
-⎛
-⎜
-⎝
-⎞
-⎟
-⎠
-⎡
-⎢
-⎣
-⎤
-⎥
-⎦
-⎧
-⎨
-⎩
-⎪
-⎫
-⎬
-⎭
-⎮
-⎯
-⎰
-⎱
-⎲
-⎳
-⎴
-⎵
-⎶
-⎷
-⎸
-⎹
-⎺
-⎻
-⎼
-⎽
-⎾
-⎿
-⏍
-⏎
-⏐
-⏑
-⏒
-⏓
-⏔
-⏕
-⏖
-⏗
-⏘
-⏙
-⏛
-⏜
-⏝
-⏞
-⏟
-⏠
-⏡
-⏢
-⏣
-⏤
-⏥
-⏦
-⏧
-⏨
-⏭
-⏮
-⏯
-⏱
-⏲
-▲
-▽
-◐
-⏽
-⏾
-⏿
-ɐ
-ɑ
-ɒ
-ɓ
-ɔ
-ɕ
-ɖ
-ɗ
-ɘ
-ə
-ɚ
-ɛ
-ɜ
-ɝ
-ɞ
-ɟ
-ɠ
-ɡ
-ɢ
-ɣ
-ɤ
-ɥ
-ɦ
-ɧ
-ɨ
-ɩ
-ɪ
-ɫ
-ɬ
-ɭ
-ɮ
-ɯ
-ɰ
-ɱ
-ɲ
-ɳ
-ɴ
-ɵ
-ɶ
-ɷ
-ɸ
-ɹ
-ɺ
-ɻ
-ɼ
-ɽ
-ɾ
-ɿ
-ʀ
-ʁ
-ʂ
-ʃ
-ʄ
-ʅ
-ʆ
-ʇ
-ʈ
-ʉ
-ʊ
-ʋ
-ʌ
-ʍ
-ʎ
-ʏ
-ʐ
-ʑ
-ʒ
-ʓ
-ʔ
-ʕ
-ʖ
-ʗ
-ʘ
-ʙ
-ʚ
-ʛ
-ʜ
-ʝ
-ʞ
-ʟ
-ʠ
-ʡ
-ʢ
-ʣ
-ʤ
-ʥ
-ʦ
-ʧ
-ʨ
-ʩ
-ʪ
-ʫ
-ʬ
-ʭ
-ʮ
-ʯ
-━
-Ǝ
-Ã
-●
-▶
-｜
-𝑢
-〖
-〗
-︽
-–
-﹥
-𝜓
-•
-∋
-ƒ
-०
-✘
-Е
-◉
-〒
-𝒱
-𝜆
-⟹
-﹪
-◊
-╆
-오
-˂
-〉
-𝝎
-▪
-△
-▁
-◼
-〇
-▷
-▬
-𝒮
-†
-ₒ
-⼁
-〵
-⭐
-╳
-⟶
-으
-⬆
-Ạ
-◀
-
-▫
-丄
-︾
-◥
-‖
-𝜌
-ⅼ
-▼
-⁎
-﹏
-😁
-😂
-😃
-😄
-😅
-😆
-😉
-😊
-😋
-😌
-😍
-😏
-😒
-😓
-😔
-😖
-😘
-😚
-😜
-😝
-😞
-😠
-😡
-😢
-😣
-😤
-😥
-😨
-😩
-😪
-😫
-😭
-😰
-😱
-😲
-😳
-😵
-😷
-😸
-😹
-😺
-😻
-😼
-😽
-😾
-😿
-🙀
-🙅
-🙆
-🙇
-🙈
-🙉
-🙊
-🙋
-🙌
-🙍
-🙎
-🙏
-✂
-✅
-✊
-✋
-✖
-✨
-❌
-❎
-❓
-❔
-❕
-❗
-🚀
-🚃
-🚄
-🚅
-🚇
-🚉
-🚌
-🚏
-🚑
-🚒
-🚓
-🚕
-🚗
-🚙
-🚚
-🚢
-🚤
-🚥
-🚧
-🚨
-🚩
-🚪
-🚫
-🚬
-🚭
-🚲
-🚶
-🚹
-🚺
-🚻
-🚼
-🚽
-🚾
-🛀
-Ⓜ
-🅰
-🅱
-🅾
-🅿
-🆎
-🆑
-🆒
-🆓
-🆔
-🆕
-🆖
-🆗
-🆘
-🆙
-🆚
-🇩🇪
-🇬🇧
-🇨🇳
-🇯🇵
-🇫🇷
-🇰🇷
-🇪🇸
-🇮🇹
-🇷🇺
-🇺🇸
-🈁
-ℹ
-⌚
-⌛
-⏩
-⏪
-⏫
-⏬
-⏰
-⏳
-◻
-◽
-◾
-♈
-♉
-♊
-♋
-♌
-♍
-♎
-♏
-♐
-♑
-♒
-♓
-♿
-⚽
-⚾
-⛄
-⛅
-⛎
-⛔
-⛲
-⛳
-⛵
-⛺
-⛽
-⤴
-⤵
-⬅
-⬇
-⬛
-⬜
-⭕
-〰
-〽
-㊗
-㊙
-🀄
-🃏
-🌀
-🌁
-🌂
-🌃
-🌄
-🌅
-🌆
-🌇
-🌈
-🌉
-🌊
-🌋
-🌌
-🌏
-🌑
-🌓
-🌔
-🌕
-🌙
-🌛
-🌟
-🌠
-🌰
-🌱
-🌴
-🌵
-🌷
-🌸
-🌹
-🌺
-🌻
-🌼
-🌽
-🌾
-🌿
-🍀
-🍁
-🍂
-🍃
-🍄
-🍅
-🍆
-🍇
-🍈
-🍉
-🍊
-🍌
-🍍
-🍎
-🍏
-🍑
-🍒
-🍓
-🍔
-🍕
-🍖
-🍗
-🍘
-🍙
-🍚
-🍛
-🍜
-🍝
-🍞
-🍟
-🍠
-🍡
-🍢
-🍣
-🍤
-🍥
-🍦
-🍧
-🍨
-🍩
-🍪
-🍫
-🍬
-🍭
-🍮
-🍯
-🍰
-🍱
-🍲
-🍳
-🍴
-🍵
-🍶
-🍷
-🍸
-🍹
-🍺
-🍻
-🎀
-🎁
-🎂
-🎃
-🎄
-🎅
-🎆
-🎇
-🎈
-🎉
-🎊
-🎋
-🎌
-🎍
-🎎
-🎏
-🎐
-🎑
-🎒
-🎓
-🎠
-🎡
-🎢
-🎣
-🎤
-🎥
-🎦
-🎧
-🎨
-🎩
-🎪
-🎫
-🎬
-🎭
-🎮
-🎯
-🎰
-🎱
-🎲
-🎳
-🎴
-🎵
-🎶
-🎷
-🎸
-🎹
-🎺
-🎻
-🎼
-🎽
-🎾
-🎿
-🏀
-🏁
-🏂
-🏃
-🏄
-🏆
-🏈
-🏊
-🏠
-🏡
-🏢
-🏣
-🏥
-🏦
-🏧
-🏨
-🏩
-🏪
-🏫
-🏬
-🏭
-🏮
-🏯
-🏰
-🐌
-🐍
-🐎
-🐑
-🐒
-🐔
-🐗
-🐘
-🐙
-🐚
-🐛
-🐜
-🐝
-🐞
-🐟
-🐠
-🐡
-🐢
-🐣
-🐤
-🐥
-🐦
-🐧
-🐨
-🐩
-🐫
-🐬
-🐭
-🐮
-🐯
-🐰
-🐱
-🐲
-🐳
-🐴
-🐵
-🐶
-🐷
-🐸
-🐹
-🐺
-🐻
-🐼
-🐽
-🐾
-👀
-👂
-👃
-👄
-👅
-👆
-👇
-👈
-👉
-👊
-👋
-👌
-👍
-👎
-👏
-👐
-👑
-👒
-👓
-👔
-👕
-👖
-👗
-👘
-👙
-👚
-👛
-👜
-👝
-👞
-👟
-👠
-👡
-👢
-👣
-👤
-👦
-👧
-👨
-👩
-👪
-👫
-👮
-👯
-👰
-👱
-👲
-👳
-👴
-👵
-👶
-👷
-👸
-👹
-👺
-👻
-👼
-👽
-👾
-👿
-💀
-💁
-💂
-💃
-💄
-💅
-💆
-💇
-💈
-💉
-💊
-💋
-💌
-💍
-💎
-💏
-💐
-💑
-💒
-💓
-💔
-💕
-💖
-💗
-💘
-💙
-💚
-💛
-💜
-💝
-💞
-💟
-💠
-💡
-💢
-💣
-💤
-💥
-💦
-💧
-💨
-💩
-💪
-💫
-💬
-💮
-💯
-💰
-💲
-💳
-💴
-💵
-💸
-💹
-💺
-💻
-💼
-💽
-💾
-💿
-📀
-📁
-📂
-📃
-📄
-📅
-📆
-📇
-📈
-📉
-📊
-📋
-📌
-📍
-📎
-📏
-📐
-📑
-📒
-📓
-📔
-📕
-📖
-📗
-📘
-📙
-📚
-📛
-📜
-📝
-📞
-📟
-📠
-📡
-📢
-📣
-📤
-📥
-📦
-📧
-📨
-📩
-📪
-📫
-📮
-📰
-📱
-📲
-📳
-📴
-📶
-📷
-📹
-📺
-📻
-📼
-🔃
-🔊
-🔋
-🔌
-🔍
-🔎
-🔏
-🔐
-🔑
-🔒
-🔓
-🔔
-🔖
-🔗
-🔘
-🔙
-🔚
-🔛
-🔜
-🔝
-🔞
-🔟
-🔠
-🔡
-🔢
-🔣
-🔤
-🔥
-🔦
-🔧
-🔨
-🔩
-🔪
-🔫
-🔮
-🔯
-🔰
-🔱
-🔲
-🔳
-🔴
-🔵
-🔶
-🔷
-🔸
-🔹
-🔺
-🔻
-🔼
-🔽
-🕐
-🕑
-🕒
-🕓
-🕔
-🕕
-🕖
-🕗
-🕘
-🕙
-🕚
-🕛
-🗻
-🗼
-🗽
-🗾
-🗿
-😀
-😇
-😈
-😎
-😐
-😑
-😕
-😗
-😙
-😛
-😟
-😦
-😧
-😬
-😮
-😯
-😴
-😶
-🚁
-🚂
-🚆
-🚈
-🚊
-🚍
-🚎
-🚐
-🚔
-🚖
-🚘
-🚛
-🚜
-🚝
-🚞
-🚟
-🚠
-🚡
-🚣
-🚦
-🚮
-🚯
-🚰
-🚱
-🚳
-🚴
-🚵
-🚷
-🚸
-🚿
-🛁
-🛂
-🛃
-🛄
-🛅
-🌍
-🌎
-🌐
-🌒
-🌖
-🌗
-🌘
-🌚
-🌜
-🌝
-🌞
-🌲
-🌳
-🍋
-🍐
-🍼
-🏇
-🏉
-🏤
-🐀
-🐁
-🐂
-🐃
-🐄
-🐅
-🐆
-🐇
-🐈
-🐉
-🐊
-🐋
-🐏
-🐐
-🐓
-🐕
-🐖
-🐪
-👥
-👬
-👭
-💭
-💶
-💷
-📬
-📭
-📯
-📵
-🔀
-🔁
-🔂
-🔄
-🔅
-🔆
-🔇
-🔉
-🔕
-🔬
-🔭
-🕜
-🕝
-🕞
-🕟
-🕠
-🕡
-🕢
-🕣
-🕤
-🕥
-🕦
-🕧
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt
deleted file mode 100644
index 19d81892c205627f296adbf8b20ea41aba2de5d0..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ta_dict.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-t
-a
-_
-i
-m
-g
-/
-3
-I
-L
-S
-V
-R
-C
-2
-0
-1
-v
-l
-9
-7
-8
-.
-j
-p
-ப
-ூ
-த
-ம
-ி
-வ
-ர
-்
-ந
-ோ
-ன
-6
-ஆ
-ற
-ல
-5
-ள
-ா
-ொ
-ழ
-ு
-4
-ெ
-ண
-க
-ட
-ை
-ே
-ச
-ய
-ஒ
-இ
-அ
-ங
-உ
-ீ
-ஞ
-எ
-ஓ
-ஃ
-ஜ
-ஷ
-ஸ
-ஏ
-ஊ
-ஹ
-ஈ
-ஐ
-ௌ
-ஔ
-s
-c
-e
-n
-w
-F
-T
-O
-P
-K
-A
-N
-G
-Y
-E
-M
-H
-U
-B
-o
-b
-D
-d
-r
-W
-u
-y
-f
-X
-k
-q
-h
-J
-z
-Z
-Q
-x
--
-'
-$
-,
-%
-@
-é
-!
-#
-+
-É
-&
-:
-(
-?
- 
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt
deleted file mode 100644
index 83d74cc7e5f899ca43b23fa690d84d70bee535e3..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/te_dict.txt
+++ /dev/null
@@ -1,151 +0,0 @@
-t
-e
-_
-i
-m
-g
-/
-5
-I
-L
-S
-V
-R
-C
-2
-0
-1
-v
-a
-l
-3
-4
-8
-9
-.
-j
-p
-త
-ె
-ర
-క
-్
-ి
-ం
-చ
-ే
-ద
-ు
-7
-6
-ఉ
-ా
-మ
-ట
-ో
-వ
-ప
-ల
-శ
-ఆ
-య
-ై
-భ
-'
-ీ
-గ
-ూ
-డ
-ధ
-హ
-న
-జ
-స
-[
-‌
-ష
-అ
-ణ
-ఫ
-బ
-ఎ
-;
-ళ
-థ
-ొ
-ఠ
-ృ
-ఒ
-ఇ
-ః
-ఊ
-ఖ
--
-ఐ
-ఘ
-ౌ
-ఏ
-ఈ
-ఛ
-,
-ఓ
-ఞ
-|
-?
-:
-ఢ
-"
-(
-”
-!
-+
-)
-*
-=
-&
-“
-€
-]
-£
-$
-s
-c
-n
-w
-k
-J
-G
-u
-d
-r
-E
-o
-h
-y
-b
-f
-B
-M
-O
-T
-N
-D
-P
-A
-F
-x
-W
-Y
-U
-H
-K
-X
-z
-Z
-Q
-q
-É
-%
-#
-@
-é
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
deleted file mode 100644
index 876f3ee993f73e7d0e9af57336242c7403415f92..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
+++ /dev/null
@@ -1,65 +0,0 @@
-lang:
-  ch_lite:
-    det: ch_PP-OCRv3_det_infer.pth
-    rec: ch_PP-OCRv5_rec_infer.pth
-    dict: ppocrv5_dict.txt
-  ch_lite_v4:
-    det: ch_PP-OCRv3_det_infer.pth
-    rec: ch_PP-OCRv4_rec_infer.pth
-    dict: ppocr_keys_v1.txt
-  ch_server:
-    det: ch_PP-OCRv3_det_infer.pth
-    rec: ch_PP-OCRv5_rec_server_infer.pth
-    dict: ppocrv5_dict.txt
-  ch_server_v4:
-    det: ch_PP-OCRv3_det_infer.pth
-    rec: ch_PP-OCRv4_rec_server_infer.pth
-    dict: ppocr_keys_v1.txt
-  ch:
-    det: ch_PP-OCRv3_det_infer.pth
-    rec: ch_PP-OCRv4_rec_server_doc_infer.pth
-    dict: ppocrv4_doc_dict.txt
-  en:
-    det: en_PP-OCRv3_det_infer.pth
-    rec: en_PP-OCRv4_rec_infer.pth
-    dict: en_dict.txt
-  korean:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: korean_PP-OCRv3_rec_infer.pth
-    dict: korean_dict.txt
-  japan:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: japan_PP-OCRv3_rec_infer.pth
-    dict: japan_dict.txt
-  chinese_cht:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: chinese_cht_PP-OCRv3_rec_infer.pth
-    dict: chinese_cht_dict.txt
-  ta:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: ta_PP-OCRv3_rec_infer.pth
-    dict: ta_dict.txt
-  te:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: te_PP-OCRv3_rec_infer.pth
-    dict: te_dict.txt
-  ka:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: ka_PP-OCRv3_rec_infer.pth
-    dict: ka_dict.txt
-  latin:
-    det: en_PP-OCRv3_det_infer.pth
-    rec: latin_PP-OCRv3_rec_infer.pth
-    dict: latin_dict.txt
-  arabic:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: arabic_PP-OCRv3_rec_infer.pth
-    dict: arabic_dict.txt
-  cyrillic:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: cyrillic_PP-OCRv3_rec_infer.pth
-    dict: cyrillic_dict.txt
-  devanagari:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: devanagari_PP-OCRv3_rec_infer.pth
-    dict: devanagari_dict.txt
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py
deleted file mode 100644
index f64ba567a631a847c6c2ea3d345f86865056cb53..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py
deleted file mode 100755
index 5dea3390a6d8bbeb41d8b765eeab38d3fae4ef65..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_cls.py
+++ /dev/null
@@ -1,106 +0,0 @@
-import cv2
-import copy
-import numpy as np
-import math
-import time
-import torch
-from ...pytorchocr.base_ocr_v20 import BaseOCRV20
-from . import pytorchocr_utility as utility
-from ...pytorchocr.postprocess import build_post_process
-
-
-class TextClassifier(BaseOCRV20):
-    def __init__(self, args, **kwargs):
-        self.device = args.device
-        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
-        self.cls_batch_num = args.cls_batch_num
-        self.cls_thresh = args.cls_thresh
-        postprocess_params = {
-            'name': 'ClsPostProcess',
-            "label_list": args.label_list,
-        }
-        self.postprocess_op = build_post_process(postprocess_params)
-
-        self.weights_path = args.cls_model_path
-        self.yaml_path = args.cls_yaml_path
-        network_config = utility.get_arch_config(self.weights_path)
-        super(TextClassifier, self).__init__(network_config, **kwargs)
-
-        self.cls_image_shape = [int(v) for v in args.cls_image_shape.split(",")]
-
-        self.limited_max_width = args.limited_max_width
-        self.limited_min_width = args.limited_min_width
-
-        self.load_pytorch_weights(self.weights_path)
-        self.net.eval()
-        self.net.to(self.device)
-
-    def resize_norm_img(self, img):
-        imgC, imgH, imgW = self.cls_image_shape
-        h = img.shape[0]
-        w = img.shape[1]
-        ratio = w / float(h)
-        imgW = max(min(imgW, self.limited_max_width), self.limited_min_width)
-        ratio_imgH = math.ceil(imgH * ratio)
-        ratio_imgH = max(ratio_imgH, self.limited_min_width)
-        if ratio_imgH > imgW:
-            resized_w = imgW
-        else:
-            resized_w = int(math.ceil(imgH * ratio))
-        resized_image = cv2.resize(img, (resized_w, imgH))
-        resized_image = resized_image.astype('float32')
-        if self.cls_image_shape[0] == 1:
-            resized_image = resized_image / 255
-            resized_image = resized_image[np.newaxis, :]
-        else:
-            resized_image = resized_image.transpose((2, 0, 1)) / 255
-        resized_image -= 0.5
-        resized_image /= 0.5
-        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
-        padding_im[:, :, 0:resized_w] = resized_image
-        return padding_im
-
-    def __call__(self, img_list):
-        img_list = copy.deepcopy(img_list)
-        img_num = len(img_list)
-        # Calculate the aspect ratio of all text bars
-        width_list = []
-        for img in img_list:
-            width_list.append(img.shape[1] / float(img.shape[0]))
-        # Sorting can speed up the cls process
-        indices = np.argsort(np.array(width_list))
-
-        cls_res = [['', 0.0]] * img_num
-        batch_num = self.cls_batch_num
-        elapse = 0
-        for beg_img_no in range(0, img_num, batch_num):
-            end_img_no = min(img_num, beg_img_no + batch_num)
-            norm_img_batch = []
-            max_wh_ratio = 0
-            for ino in range(beg_img_no, end_img_no):
-                h, w = img_list[indices[ino]].shape[0:2]
-                wh_ratio = w * 1.0 / h
-                max_wh_ratio = max(max_wh_ratio, wh_ratio)
-            for ino in range(beg_img_no, end_img_no):
-                norm_img = self.resize_norm_img(img_list[indices[ino]])
-                norm_img = norm_img[np.newaxis, :]
-                norm_img_batch.append(norm_img)
-            norm_img_batch = np.concatenate(norm_img_batch)
-            norm_img_batch = norm_img_batch.copy()
-            starttime = time.time()
-
-            with torch.no_grad():
-                inp = torch.from_numpy(norm_img_batch)
-                inp = inp.to(self.device)
-                prob_out = self.net(inp)
-            prob_out = prob_out.cpu().numpy()
-
-            cls_result = self.postprocess_op(prob_out)
-            elapse += time.time() - starttime
-            for rno in range(len(cls_result)):
-                label, score = cls_result[rno]
-                cls_res[indices[beg_img_no + rno]] = [label, score]
-                if '180' in label and score > self.cls_thresh:
-                    img_list[indices[beg_img_no + rno]] = cv2.rotate(
-                        img_list[indices[beg_img_no + rno]], 1)
-        return img_list, cls_res, elapse
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py
deleted file mode 100755
index c6f1f9c7b95e61b8ee3789246238f999b3378bd5..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_det.py
+++ /dev/null
@@ -1,217 +0,0 @@
-import sys
-
-import numpy as np
-import time
-import torch
-from ...pytorchocr.base_ocr_v20 import BaseOCRV20
-from . import pytorchocr_utility as utility
-from ...pytorchocr.data import create_operators, transform
-from ...pytorchocr.postprocess import build_post_process
-
-
-class TextDetector(BaseOCRV20):
-    def __init__(self, args, **kwargs):
-        self.args = args
-        self.det_algorithm = args.det_algorithm
-        self.device = args.device
-        pre_process_list = [{
-            'DetResizeForTest': {
-                'limit_side_len': args.det_limit_side_len,
-                'limit_type': args.det_limit_type,
-            }
-        }, {
-            'NormalizeImage': {
-                'std': [0.229, 0.224, 0.225],
-                'mean': [0.485, 0.456, 0.406],
-                'scale': '1./255.',
-                'order': 'hwc'
-            }
-        }, {
-            'ToCHWImage': None
-        }, {
-            'KeepKeys': {
-                'keep_keys': ['image', 'shape']
-            }
-        }]
-        postprocess_params = {}
-        if self.det_algorithm == "DB":
-            postprocess_params['name'] = 'DBPostProcess'
-            postprocess_params["thresh"] = args.det_db_thresh
-            postprocess_params["box_thresh"] = args.det_db_box_thresh
-            postprocess_params["max_candidates"] = 1000
-            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
-            postprocess_params["use_dilation"] = args.use_dilation
-            postprocess_params["score_mode"] = args.det_db_score_mode
-        elif self.det_algorithm == "DB++":
-            postprocess_params['name'] = 'DBPostProcess'
-            postprocess_params["thresh"] = args.det_db_thresh
-            postprocess_params["box_thresh"] = args.det_db_box_thresh
-            postprocess_params["max_candidates"] = 1000
-            postprocess_params["unclip_ratio"] = args.det_db_unclip_ratio
-            postprocess_params["use_dilation"] = args.use_dilation
-            postprocess_params["score_mode"] = args.det_db_score_mode
-            pre_process_list[1] = {
-                'NormalizeImage': {
-                    'std': [1.0, 1.0, 1.0],
-                    'mean':
-                        [0.48109378172549, 0.45752457890196, 0.40787054090196],
-                    'scale': '1./255.',
-                    'order': 'hwc'
-                }
-            }
-        elif self.det_algorithm == "EAST":
-            postprocess_params['name'] = 'EASTPostProcess'
-            postprocess_params["score_thresh"] = args.det_east_score_thresh
-            postprocess_params["cover_thresh"] = args.det_east_cover_thresh
-            postprocess_params["nms_thresh"] = args.det_east_nms_thresh
-        elif self.det_algorithm == "SAST":
-            pre_process_list[0] = {
-                'DetResizeForTest': {
-                    'resize_long': args.det_limit_side_len
-                }
-            }
-            postprocess_params['name'] = 'SASTPostProcess'
-            postprocess_params["score_thresh"] = args.det_sast_score_thresh
-            postprocess_params["nms_thresh"] = args.det_sast_nms_thresh
-            self.det_sast_polygon = args.det_sast_polygon
-            if self.det_sast_polygon:
-                postprocess_params["sample_pts_num"] = 6
-                postprocess_params["expand_scale"] = 1.2
-                postprocess_params["shrink_ratio_of_width"] = 0.2
-            else:
-                postprocess_params["sample_pts_num"] = 2
-                postprocess_params["expand_scale"] = 1.0
-                postprocess_params["shrink_ratio_of_width"] = 0.3
-        elif self.det_algorithm == "PSE":
-            postprocess_params['name'] = 'PSEPostProcess'
-            postprocess_params["thresh"] = args.det_pse_thresh
-            postprocess_params["box_thresh"] = args.det_pse_box_thresh
-            postprocess_params["min_area"] = args.det_pse_min_area
-            postprocess_params["box_type"] = args.det_pse_box_type
-            postprocess_params["scale"] = args.det_pse_scale
-            self.det_pse_box_type = args.det_pse_box_type
-        elif self.det_algorithm == "FCE":
-            pre_process_list[0] = {
-                'DetResizeForTest': {
-                    'rescale_img': [1080, 736]
-                }
-            }
-            postprocess_params['name'] = 'FCEPostProcess'
-            postprocess_params["scales"] = args.scales
-            postprocess_params["alpha"] = args.alpha
-            postprocess_params["beta"] = args.beta
-            postprocess_params["fourier_degree"] = args.fourier_degree
-            postprocess_params["box_type"] = args.det_fce_box_type
-        else:
-            print("unknown det_algorithm:{}".format(self.det_algorithm))
-            sys.exit(0)
-
-        self.preprocess_op = create_operators(pre_process_list)
-        self.postprocess_op = build_post_process(postprocess_params)
-
-        self.weights_path = args.det_model_path
-        self.yaml_path = args.det_yaml_path
-        network_config = utility.get_arch_config(self.weights_path)
-        super(TextDetector, self).__init__(network_config, **kwargs)
-        self.load_pytorch_weights(self.weights_path)
-        self.net.eval()
-        self.net.to(self.device)
-
-    def order_points_clockwise(self, pts):
-        """
-        reference from: https://github.com/jrosebr1/imutils/blob/master/imutils/perspective.py
-        # sort the points based on their x-coordinates
-        """
-        xSorted = pts[np.argsort(pts[:, 0]), :]
-
-        # grab the left-most and right-most points from the sorted
-        # x-roodinate points
-        leftMost = xSorted[:2, :]
-        rightMost = xSorted[2:, :]
-
-        # now, sort the left-most coordinates according to their
-        # y-coordinates so we can grab the top-left and bottom-left
-        # points, respectively
-        leftMost = leftMost[np.argsort(leftMost[:, 1]), :]
-        (tl, bl) = leftMost
-
-        rightMost = rightMost[np.argsort(rightMost[:, 1]), :]
-        (tr, br) = rightMost
-
-        rect = np.array([tl, tr, br, bl], dtype="float32")
-        return rect
-
-    def clip_det_res(self, points, img_height, img_width):
-        for pno in range(points.shape[0]):
-            points[pno, 0] = int(min(max(points[pno, 0], 0), img_width - 1))
-            points[pno, 1] = int(min(max(points[pno, 1], 0), img_height - 1))
-        return points
-
-    def filter_tag_det_res(self, dt_boxes, image_shape):
-        img_height, img_width = image_shape[0:2]
-        dt_boxes_new = []
-        for box in dt_boxes:
-            box = self.order_points_clockwise(box)
-            box = self.clip_det_res(box, img_height, img_width)
-            rect_width = int(np.linalg.norm(box[0] - box[1]))
-            rect_height = int(np.linalg.norm(box[0] - box[3]))
-            if rect_width <= 3 or rect_height <= 3:
-                continue
-            dt_boxes_new.append(box)
-        dt_boxes = np.array(dt_boxes_new)
-        return dt_boxes
-
-    def filter_tag_det_res_only_clip(self, dt_boxes, image_shape):
-        img_height, img_width = image_shape[0:2]
-        dt_boxes_new = []
-        for box in dt_boxes:
-            box = self.clip_det_res(box, img_height, img_width)
-            dt_boxes_new.append(box)
-        dt_boxes = np.array(dt_boxes_new)
-        return dt_boxes
-
-    def __call__(self, img):
-        ori_im = img.copy()
-        data = {'image': img}
-        data = transform(data, self.preprocess_op)
-        img, shape_list = data
-        if img is None:
-            return None, 0
-        img = np.expand_dims(img, axis=0)
-        shape_list = np.expand_dims(shape_list, axis=0)
-        img = img.copy()
-        starttime = time.time()
-
-        with torch.no_grad():
-            inp = torch.from_numpy(img)
-            inp = inp.to(self.device)
-            outputs = self.net(inp)
-
-        preds = {}
-        if self.det_algorithm == "EAST":
-            preds['f_geo'] = outputs['f_geo'].cpu().numpy()
-            preds['f_score'] = outputs['f_score'].cpu().numpy()
-        elif self.det_algorithm == 'SAST':
-            preds['f_border'] = outputs['f_border'].cpu().numpy()
-            preds['f_score'] = outputs['f_score'].cpu().numpy()
-            preds['f_tco'] = outputs['f_tco'].cpu().numpy()
-            preds['f_tvo'] = outputs['f_tvo'].cpu().numpy()
-        elif self.det_algorithm in ['DB', 'PSE', 'DB++']:
-            preds['maps'] = outputs['maps'].cpu().numpy()
-        elif self.det_algorithm == 'FCE':
-            for i, (k, output) in enumerate(outputs.items()):
-                preds['level_{}'.format(i)] = output
-        else:
-            raise NotImplementedError
-
-        post_result = self.postprocess_op(preds, shape_list)
-        dt_boxes = post_result[0]['points']
-        if (self.det_algorithm == "SAST" and
-            self.det_sast_polygon) or (self.det_algorithm in ["PSE", "FCE"] and
-                                       self.postprocess_op.box_type == 'poly'):
-            dt_boxes = self.filter_tag_det_res_only_clip(dt_boxes, ori_im.shape)
-        else:
-            dt_boxes = self.filter_tag_det_res(dt_boxes, ori_im.shape)
-
-        elapse = time.time() - starttime
-        return dt_boxes, elapse
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py
deleted file mode 100755
index c06ca5fe3f5bd0c4e38502ff548e2b488eeac233..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_rec.py
+++ /dev/null
@@ -1,446 +0,0 @@
-from PIL import Image
-import cv2
-import numpy as np
-import math
-import time
-import torch
-from tqdm import tqdm
-
-from ...pytorchocr.base_ocr_v20 import BaseOCRV20
-from . import pytorchocr_utility as utility
-from ...pytorchocr.postprocess import build_post_process
-
-
-class TextRecognizer(BaseOCRV20):
-    def __init__(self, args, **kwargs):
-        self.device = args.device
-        self.rec_image_shape = [int(v) for v in args.rec_image_shape.split(",")]
-        self.character_type = args.rec_char_type
-        self.rec_batch_num = args.rec_batch_num
-        self.rec_algorithm = args.rec_algorithm
-        self.max_text_length = args.max_text_length
-        postprocess_params = {
-            'name': 'CTCLabelDecode',
-            "character_type": args.rec_char_type,
-            "character_dict_path": args.rec_char_dict_path,
-            "use_space_char": args.use_space_char
-        }
-        if self.rec_algorithm == "SRN":
-            postprocess_params = {
-                'name': 'SRNLabelDecode',
-                "character_type": args.rec_char_type,
-                "character_dict_path": args.rec_char_dict_path,
-                "use_space_char": args.use_space_char
-            }
-        elif self.rec_algorithm == "RARE":
-            postprocess_params = {
-                'name': 'AttnLabelDecode',
-                "character_type": args.rec_char_type,
-                "character_dict_path": args.rec_char_dict_path,
-                "use_space_char": args.use_space_char
-            }
-        elif self.rec_algorithm == 'NRTR':
-            postprocess_params = {
-                'name': 'NRTRLabelDecode',
-                "character_dict_path": args.rec_char_dict_path,
-                "use_space_char": args.use_space_char
-            }
-        elif self.rec_algorithm == "SAR":
-            postprocess_params = {
-                'name': 'SARLabelDecode',
-                "character_dict_path": args.rec_char_dict_path,
-                "use_space_char": args.use_space_char
-            }
-        elif self.rec_algorithm == 'ViTSTR':
-            postprocess_params = {
-                'name': 'ViTSTRLabelDecode',
-                "character_dict_path": args.rec_char_dict_path,
-                "use_space_char": args.use_space_char
-            }
-        elif self.rec_algorithm == "CAN":
-            self.inverse = args.rec_image_inverse
-            postprocess_params = {
-                'name': 'CANLabelDecode',
-                "character_dict_path": args.rec_char_dict_path,
-                "use_space_char": args.use_space_char
-            }
-        elif self.rec_algorithm == 'RFL':
-            postprocess_params = {
-                'name': 'RFLLabelDecode',
-                "character_dict_path": None,
-                "use_space_char": args.use_space_char
-            }
-        self.postprocess_op = build_post_process(postprocess_params)
-
-        self.limited_max_width = args.limited_max_width
-        self.limited_min_width = args.limited_min_width
-
-        self.weights_path = args.rec_model_path
-        self.yaml_path = args.rec_yaml_path
-
-        network_config = utility.get_arch_config(self.weights_path)
-        weights = self.read_pytorch_weights(self.weights_path)
-
-        self.out_channels = self.get_out_channels(weights)
-        if self.rec_algorithm == 'NRTR':
-            self.out_channels = list(weights.values())[-1].numpy().shape[0]
-        elif self.rec_algorithm == 'SAR':
-            self.out_channels = list(weights.values())[-3].numpy().shape[0]
-
-        kwargs['out_channels'] = self.out_channels
-        super(TextRecognizer, self).__init__(network_config, **kwargs)
-
-        self.load_state_dict(weights)
-        self.net.eval()
-        self.net.to(self.device)
-
-    def resize_norm_img(self, img, max_wh_ratio):
-        imgC, imgH, imgW = self.rec_image_shape
-        if self.rec_algorithm == 'NRTR' or self.rec_algorithm == 'ViTSTR':
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-            # return padding_im
-            image_pil = Image.fromarray(np.uint8(img))
-            if self.rec_algorithm == 'ViTSTR':
-                img = image_pil.resize([imgW, imgH], Image.BICUBIC)
-            else:
-                img = image_pil.resize([imgW, imgH], Image.ANTIALIAS)
-            img = np.array(img)
-            norm_img = np.expand_dims(img, -1)
-            norm_img = norm_img.transpose((2, 0, 1))
-            if self.rec_algorithm == 'ViTSTR':
-                norm_img = norm_img.astype(np.float32) / 255.
-            else:
-                norm_img = norm_img.astype(np.float32) / 128. - 1.
-            return norm_img
-        elif self.rec_algorithm == 'RFL':
-            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-            resized_image = cv2.resize(
-                img, (imgW, imgH), interpolation=cv2.INTER_CUBIC)
-            resized_image = resized_image.astype('float32')
-            resized_image = resized_image / 255
-            resized_image = resized_image[np.newaxis, :]
-            resized_image -= 0.5
-            resized_image /= 0.5
-            return resized_image
-
-        assert imgC == img.shape[2]
-        max_wh_ratio = max(max_wh_ratio, imgW / imgH)
-        imgW = int((imgH * max_wh_ratio))
-        imgW = max(min(imgW, self.limited_max_width), self.limited_min_width)
-        h, w = img.shape[:2]
-        ratio = w / float(h)
-        ratio_imgH = math.ceil(imgH * ratio)
-        ratio_imgH = max(ratio_imgH, self.limited_min_width)
-        if ratio_imgH > imgW:
-            resized_w = imgW
-        else:
-            resized_w = int(ratio_imgH)
-        resized_image = cv2.resize(img, (resized_w, imgH))
-        resized_image = resized_image.astype('float32')
-        resized_image = resized_image.transpose((2, 0, 1)) / 255
-        resized_image -= 0.5
-        resized_image /= 0.5
-        padding_im = np.zeros((imgC, imgH, imgW), dtype=np.float32)
-        padding_im[:, :, 0:resized_w] = resized_image
-        return padding_im
-
-    def resize_norm_img_svtr(self, img, image_shape):
-
-        imgC, imgH, imgW = image_shape
-        resized_image = cv2.resize(
-            img, (imgW, imgH), interpolation=cv2.INTER_LINEAR)
-        resized_image = resized_image.astype('float32')
-        resized_image = resized_image.transpose((2, 0, 1)) / 255
-        resized_image -= 0.5
-        resized_image /= 0.5
-        return resized_image
-
-
-    def resize_norm_img_srn(self, img, image_shape):
-        imgC, imgH, imgW = image_shape
-
-        img_black = np.zeros((imgH, imgW))
-        im_hei = img.shape[0]
-        im_wid = img.shape[1]
-
-        if im_wid <= im_hei * 1:
-            img_new = cv2.resize(img, (imgH * 1, imgH))
-        elif im_wid <= im_hei * 2:
-            img_new = cv2.resize(img, (imgH * 2, imgH))
-        elif im_wid <= im_hei * 3:
-            img_new = cv2.resize(img, (imgH * 3, imgH))
-        else:
-            img_new = cv2.resize(img, (imgW, imgH))
-
-        img_np = np.asarray(img_new)
-        img_np = cv2.cvtColor(img_np, cv2.COLOR_BGR2GRAY)
-        img_black[:, 0:img_np.shape[1]] = img_np
-        img_black = img_black[:, :, np.newaxis]
-
-        row, col, c = img_black.shape
-        c = 1
-
-        return np.reshape(img_black, (c, row, col)).astype(np.float32)
-
-    def srn_other_inputs(self, image_shape, num_heads, max_text_length):
-
-        imgC, imgH, imgW = image_shape
-        feature_dim = int((imgH / 8) * (imgW / 8))
-
-        encoder_word_pos = np.array(range(0, feature_dim)).reshape(
-            (feature_dim, 1)).astype('int64')
-        gsrm_word_pos = np.array(range(0, max_text_length)).reshape(
-            (max_text_length, 1)).astype('int64')
-
-        gsrm_attn_bias_data = np.ones((1, max_text_length, max_text_length))
-        gsrm_slf_attn_bias1 = np.triu(gsrm_attn_bias_data, 1).reshape(
-            [-1, 1, max_text_length, max_text_length])
-        gsrm_slf_attn_bias1 = np.tile(
-            gsrm_slf_attn_bias1,
-            [1, num_heads, 1, 1]).astype('float32') * [-1e9]
-
-        gsrm_slf_attn_bias2 = np.tril(gsrm_attn_bias_data, -1).reshape(
-            [-1, 1, max_text_length, max_text_length])
-        gsrm_slf_attn_bias2 = np.tile(
-            gsrm_slf_attn_bias2,
-            [1, num_heads, 1, 1]).astype('float32') * [-1e9]
-
-        encoder_word_pos = encoder_word_pos[np.newaxis, :]
-        gsrm_word_pos = gsrm_word_pos[np.newaxis, :]
-
-        return [
-            encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
-            gsrm_slf_attn_bias2
-        ]
-
-    def process_image_srn(self, img, image_shape, num_heads, max_text_length):
-        norm_img = self.resize_norm_img_srn(img, image_shape)
-        norm_img = norm_img[np.newaxis, :]
-
-        [encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1, gsrm_slf_attn_bias2] = \
-            self.srn_other_inputs(image_shape, num_heads, max_text_length)
-
-        gsrm_slf_attn_bias1 = gsrm_slf_attn_bias1.astype(np.float32)
-        gsrm_slf_attn_bias2 = gsrm_slf_attn_bias2.astype(np.float32)
-        encoder_word_pos = encoder_word_pos.astype(np.int64)
-        gsrm_word_pos = gsrm_word_pos.astype(np.int64)
-
-        return (norm_img, encoder_word_pos, gsrm_word_pos, gsrm_slf_attn_bias1,
-                gsrm_slf_attn_bias2)
-
-    def resize_norm_img_sar(self, img, image_shape,
-                            width_downsample_ratio=0.25):
-        imgC, imgH, imgW_min, imgW_max = image_shape
-        h = img.shape[0]
-        w = img.shape[1]
-        valid_ratio = 1.0
-        # make sure new_width is an integral multiple of width_divisor.
-        width_divisor = int(1 / width_downsample_ratio)
-        # resize
-        ratio = w / float(h)
-        resize_w = math.ceil(imgH * ratio)
-        if resize_w % width_divisor != 0:
-            resize_w = round(resize_w / width_divisor) * width_divisor
-        if imgW_min is not None:
-            resize_w = max(imgW_min, resize_w)
-        if imgW_max is not None:
-            valid_ratio = min(1.0, 1.0 * resize_w / imgW_max)
-            resize_w = min(imgW_max, resize_w)
-        resized_image = cv2.resize(img, (resize_w, imgH))
-        resized_image = resized_image.astype('float32')
-        # norm
-        if image_shape[0] == 1:
-            resized_image = resized_image / 255
-            resized_image = resized_image[np.newaxis, :]
-        else:
-            resized_image = resized_image.transpose((2, 0, 1)) / 255
-        resized_image -= 0.5
-        resized_image /= 0.5
-        resize_shape = resized_image.shape
-        padding_im = -1.0 * np.ones((imgC, imgH, imgW_max), dtype=np.float32)
-        padding_im[:, :, 0:resize_w] = resized_image
-        pad_shape = padding_im.shape
-
-        return padding_im, resize_shape, pad_shape, valid_ratio
-
-
-    def norm_img_can(self, img, image_shape):
-
-        img = cv2.cvtColor(
-            img, cv2.COLOR_BGR2GRAY)  # CAN only predict gray scale image
-
-        if self.inverse:
-            img = 255 - img
-
-        if self.rec_image_shape[0] == 1:
-            h, w = img.shape
-            _, imgH, imgW = self.rec_image_shape
-            if h < imgH or w < imgW:
-                padding_h = max(imgH - h, 0)
-                padding_w = max(imgW - w, 0)
-                img_padded = np.pad(img, ((0, padding_h), (0, padding_w)),
-                                    'constant',
-                                    constant_values=(255))
-                img = img_padded
-
-        img = np.expand_dims(img, 0) / 255.0  # h,w,c -> c,h,w
-        img = img.astype('float32')
-
-        return img
-
-    def __call__(self, img_list, tqdm_enable=False):
-        img_num = len(img_list)
-        # Calculate the aspect ratio of all text bars
-        width_list = []
-        for img in img_list:
-            width_list.append(img.shape[1] / float(img.shape[0]))
-        # Sorting can speed up the recognition process
-        indices = np.argsort(np.array(width_list))
-
-        # rec_res = []
-        rec_res = [['', 0.0]] * img_num
-        batch_num = self.rec_batch_num
-        elapse = 0
-        # for beg_img_no in range(0, img_num, batch_num):
-        with tqdm(total=img_num, desc='OCR-rec Predict', disable=not tqdm_enable) as pbar:
-            index = 0
-            for beg_img_no in range(0, img_num, batch_num):
-                end_img_no = min(img_num, beg_img_no + batch_num)
-                norm_img_batch = []
-                max_wh_ratio = 0
-                for ino in range(beg_img_no, end_img_no):
-                    # h, w = img_list[ino].shape[0:2]
-                    h, w = img_list[indices[ino]].shape[0:2]
-                    wh_ratio = w * 1.0 / h
-                    max_wh_ratio = max(max_wh_ratio, wh_ratio)
-                for ino in range(beg_img_no, end_img_no):
-                    if self.rec_algorithm == "SAR":
-                        norm_img, _, _, valid_ratio = self.resize_norm_img_sar(
-                            img_list[indices[ino]], self.rec_image_shape)
-                        norm_img = norm_img[np.newaxis, :]
-                        valid_ratio = np.expand_dims(valid_ratio, axis=0)
-                        valid_ratios = []
-                        valid_ratios.append(valid_ratio)
-                        norm_img_batch.append(norm_img)
-
-                    elif self.rec_algorithm == "SVTR":
-                        norm_img = self.resize_norm_img_svtr(img_list[indices[ino]],
-                                                             self.rec_image_shape)
-                        norm_img = norm_img[np.newaxis, :]
-                        norm_img_batch.append(norm_img)
-                    elif self.rec_algorithm == "SRN":
-                        norm_img = self.process_image_srn(img_list[indices[ino]],
-                                                          self.rec_image_shape, 8,
-                                                          self.max_text_length)
-                        encoder_word_pos_list = []
-                        gsrm_word_pos_list = []
-                        gsrm_slf_attn_bias1_list = []
-                        gsrm_slf_attn_bias2_list = []
-                        encoder_word_pos_list.append(norm_img[1])
-                        gsrm_word_pos_list.append(norm_img[2])
-                        gsrm_slf_attn_bias1_list.append(norm_img[3])
-                        gsrm_slf_attn_bias2_list.append(norm_img[4])
-                        norm_img_batch.append(norm_img[0])
-                    elif self.rec_algorithm == "CAN":
-                        norm_img = self.norm_img_can(img_list[indices[ino]],
-                                                     max_wh_ratio)
-                        norm_img = norm_img[np.newaxis, :]
-                        norm_img_batch.append(norm_img)
-                        norm_image_mask = np.ones(norm_img.shape, dtype='float32')
-                        word_label = np.ones([1, 36], dtype='int64')
-                        norm_img_mask_batch = []
-                        word_label_list = []
-                        norm_img_mask_batch.append(norm_image_mask)
-                        word_label_list.append(word_label)
-                    else:
-                        norm_img = self.resize_norm_img(img_list[indices[ino]],
-                                                        max_wh_ratio)
-                        norm_img = norm_img[np.newaxis, :]
-                        norm_img_batch.append(norm_img)
-                norm_img_batch = np.concatenate(norm_img_batch)
-                norm_img_batch = norm_img_batch.copy()
-
-                if self.rec_algorithm == "SRN":
-                    starttime = time.time()
-                    encoder_word_pos_list = np.concatenate(encoder_word_pos_list)
-                    gsrm_word_pos_list = np.concatenate(gsrm_word_pos_list)
-                    gsrm_slf_attn_bias1_list = np.concatenate(
-                        gsrm_slf_attn_bias1_list)
-                    gsrm_slf_attn_bias2_list = np.concatenate(
-                        gsrm_slf_attn_bias2_list)
-
-                    with torch.no_grad():
-                        inp = torch.from_numpy(norm_img_batch)
-                        encoder_word_pos_inp = torch.from_numpy(encoder_word_pos_list)
-                        gsrm_word_pos_inp = torch.from_numpy(gsrm_word_pos_list)
-                        gsrm_slf_attn_bias1_inp = torch.from_numpy(gsrm_slf_attn_bias1_list)
-                        gsrm_slf_attn_bias2_inp = torch.from_numpy(gsrm_slf_attn_bias2_list)
-
-                        inp = inp.to(self.device)
-                        encoder_word_pos_inp = encoder_word_pos_inp.to(self.device)
-                        gsrm_word_pos_inp = gsrm_word_pos_inp.to(self.device)
-                        gsrm_slf_attn_bias1_inp = gsrm_slf_attn_bias1_inp.to(self.device)
-                        gsrm_slf_attn_bias2_inp = gsrm_slf_attn_bias2_inp.to(self.device)
-
-                        backbone_out = self.net.backbone(inp) # backbone_feat
-                        prob_out = self.net.head(backbone_out, [encoder_word_pos_inp, gsrm_word_pos_inp, gsrm_slf_attn_bias1_inp, gsrm_slf_attn_bias2_inp])
-                    # preds = {"predict": prob_out[2]}
-                    preds = {"predict": prob_out["predict"]}
-
-                elif self.rec_algorithm == "SAR":
-                    starttime = time.time()
-                    # valid_ratios = np.concatenate(valid_ratios)
-                    # inputs = [
-                    #     norm_img_batch,
-                    #     valid_ratios,
-                    # ]
-
-                    with torch.no_grad():
-                        inp = torch.from_numpy(norm_img_batch)
-                        inp = inp.to(self.device)
-                        preds = self.net(inp)
-
-                elif self.rec_algorithm == "CAN":
-                    starttime = time.time()
-                    norm_img_mask_batch = np.concatenate(norm_img_mask_batch)
-                    word_label_list = np.concatenate(word_label_list)
-                    inputs = [norm_img_batch, norm_img_mask_batch, word_label_list]
-
-                    inp = [torch.from_numpy(e_i) for e_i in inputs]
-                    inp = [e_i.to(self.device) for e_i in inp]
-                    with torch.no_grad():
-                        outputs = self.net(inp)
-                        outputs = [v.cpu().numpy() for k, v in enumerate(outputs)]
-
-                    preds = outputs
-
-                else:
-                    starttime = time.time()
-
-                    with torch.no_grad():
-                        inp = torch.from_numpy(norm_img_batch)
-                        inp = inp.to(self.device)
-                        prob_out = self.net(inp)
-
-                    if isinstance(prob_out, list):
-                        preds = [v.cpu().numpy() for v in prob_out]
-                    else:
-                        preds = prob_out.cpu().numpy()
-
-                rec_result = self.postprocess_op(preds)
-                for rno in range(len(rec_result)):
-                    rec_res[indices[beg_img_no + rno]] = rec_result[rno]
-                elapse += time.time() - starttime
-
-                # 更新进度条，每次增加batch_size，但要注意最后一个batch可能不足batch_size
-                current_batch_size = min(batch_num, img_num - index * batch_num)
-                index += 1
-                pbar.update(current_batch_size)
-
-        # Fix NaN values in recognition results
-        for i in range(len(rec_res)):
-            text, score = rec_res[i]
-            if isinstance(score, float) and math.isnan(score):
-                rec_res[i] = (text, 0.0)
-
-        return rec_res, elapse
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py
deleted file mode 100755
index e35b9a4b1535ad89d7df2e2be6d31c5475d2acb2..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/predict_system.py
+++ /dev/null
@@ -1,104 +0,0 @@
-import cv2
-import copy
-import numpy as np
-
-from . import predict_rec
-from . import  predict_det
-from . import  predict_cls
-
-
-class TextSystem(object):
-    def __init__(self, args, **kwargs):
-        self.text_detector = predict_det.TextDetector(args, **kwargs)
-        self.text_recognizer = predict_rec.TextRecognizer(args, **kwargs)
-        self.use_angle_cls = args.use_angle_cls
-        self.drop_score = args.drop_score
-        if self.use_angle_cls:
-            self.text_classifier = predict_cls.TextClassifier(args, **kwargs)
-
-    def get_rotate_crop_image(self, img, points):
-        '''
-        img_height, img_width = img.shape[0:2]
-        left = int(np.min(points[:, 0]))
-        right = int(np.max(points[:, 0]))
-        top = int(np.min(points[:, 1]))
-        bottom = int(np.max(points[:, 1]))
-        img_crop = img[top:bottom, left:right, :].copy()
-        points[:, 0] = points[:, 0] - left
-        points[:, 1] = points[:, 1] - top
-        '''
-        img_crop_width = int(
-            max(
-                np.linalg.norm(points[0] - points[1]),
-                np.linalg.norm(points[2] - points[3])))
-        img_crop_height = int(
-            max(
-                np.linalg.norm(points[0] - points[3]),
-                np.linalg.norm(points[1] - points[2])))
-        pts_std = np.float32([[0, 0], [img_crop_width, 0],
-                              [img_crop_width, img_crop_height],
-                              [0, img_crop_height]])
-        M = cv2.getPerspectiveTransform(points, pts_std)
-        dst_img = cv2.warpPerspective(
-            img,
-            M, (img_crop_width, img_crop_height),
-            borderMode=cv2.BORDER_REPLICATE,
-            flags=cv2.INTER_CUBIC)
-        dst_img_height, dst_img_width = dst_img.shape[0:2]
-        if dst_img_height * 1.0 / dst_img_width >= 1.5:
-            dst_img = np.rot90(dst_img)
-        return dst_img
-
-    def __call__(self, img):
-        ori_im = img.copy()
-        dt_boxes, elapse = self.text_detector(img)
-        print("dt_boxes num : {}, elapse : {}".format(
-            len(dt_boxes), elapse))
-        if dt_boxes is None:
-            return None, None
-        img_crop_list = []
-
-        dt_boxes = sorted_boxes(dt_boxes)
-
-        for bno in range(len(dt_boxes)):
-            tmp_box = copy.deepcopy(dt_boxes[bno])
-            img_crop = self.get_rotate_crop_image(ori_im, tmp_box)
-            img_crop_list.append(img_crop)
-        if self.use_angle_cls:
-            img_crop_list, angle_list, elapse = self.text_classifier(
-                img_crop_list)
-            print("cls num  : {}, elapse : {}".format(
-                len(img_crop_list), elapse))
-
-        rec_res, elapse = self.text_recognizer(img_crop_list)
-        print("rec_res num  : {}, elapse : {}".format(
-            len(rec_res), elapse))
-        # self.print_draw_crop_rec_res(img_crop_list, rec_res)
-        filter_boxes, filter_rec_res = [], []
-        for box, rec_reuslt in zip(dt_boxes, rec_res):
-            text, score = rec_reuslt
-            if score >= self.drop_score:
-                filter_boxes.append(box)
-                filter_rec_res.append(rec_reuslt)
-        return filter_boxes, filter_rec_res
-
-
-def sorted_boxes(dt_boxes):
-    """
-    Sort text boxes in order from top to bottom, left to right
-    args:
-        dt_boxes(array):detected text boxes with shape [4, 2]
-    return:
-        sorted boxes(array) with shape [4, 2]
-    """
-    num_boxes = dt_boxes.shape[0]
-    sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
-    _boxes = list(sorted_boxes)
-
-    for i in range(num_boxes - 1):
-        if abs(_boxes[i + 1][0][1] - _boxes[i][0][1]) < 10 and \
-                (_boxes[i + 1][0][0] < _boxes[i][0][0]):
-            tmp = _boxes[i]
-            _boxes[i] = _boxes[i + 1]
-            _boxes[i + 1] = tmp
-    return _boxes
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py b/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py
deleted file mode 100755
index 912d124ef4683740d014d881cc825673d577b628..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/pytorchocr_utility.py
+++ /dev/null
@@ -1,227 +0,0 @@
-import os
-import math
-from pathlib import Path
-import numpy as np
-import cv2
-import argparse
-
-
-root_dir = Path(__file__).resolve().parent.parent.parent
-DEFAULT_CFG_PATH = root_dir / "pytorchocr" / "utils" / "resources" / "arch_config.yaml"
-
-
-def init_args():
-    def str2bool(v):
-        return v.lower() in ("true", "t", "1")
-
-    parser = argparse.ArgumentParser()
-    # params for prediction engine
-    parser.add_argument("--use_gpu", type=str2bool, default=False)
-    parser.add_argument("--det", type=str2bool, default=True)
-    parser.add_argument("--rec", type=str2bool, default=True)
-    parser.add_argument("--device", type=str, default='cpu')
-    # parser.add_argument("--ir_optim", type=str2bool, default=True)
-    # parser.add_argument("--use_tensorrt", type=str2bool, default=False)
-    # parser.add_argument("--use_fp16", type=str2bool, default=False)
-    parser.add_argument("--gpu_mem", type=int, default=500)
-    parser.add_argument("--warmup", type=str2bool, default=False)
-
-    # params for text detector
-    parser.add_argument("--image_dir", type=str)
-    parser.add_argument("--det_algorithm", type=str, default='DB')
-    parser.add_argument("--det_model_path", type=str)
-    parser.add_argument("--det_limit_side_len", type=float, default=960)
-    parser.add_argument("--det_limit_type", type=str, default='max')
-
-    # DB parmas
-    parser.add_argument("--det_db_thresh", type=float, default=0.3)
-    parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
-    parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
-    parser.add_argument("--max_batch_size", type=int, default=10)
-    parser.add_argument("--use_dilation", type=str2bool, default=False)
-    parser.add_argument("--det_db_score_mode", type=str, default="fast")
-
-    # EAST parmas
-    parser.add_argument("--det_east_score_thresh", type=float, default=0.8)
-    parser.add_argument("--det_east_cover_thresh", type=float, default=0.1)
-    parser.add_argument("--det_east_nms_thresh", type=float, default=0.2)
-
-    # SAST parmas
-    parser.add_argument("--det_sast_score_thresh", type=float, default=0.5)
-    parser.add_argument("--det_sast_nms_thresh", type=float, default=0.2)
-    parser.add_argument("--det_sast_polygon", type=str2bool, default=False)
-
-    # PSE parmas
-    parser.add_argument("--det_pse_thresh", type=float, default=0)
-    parser.add_argument("--det_pse_box_thresh", type=float, default=0.85)
-    parser.add_argument("--det_pse_min_area", type=float, default=16)
-    parser.add_argument("--det_pse_box_type", type=str, default='box')
-    parser.add_argument("--det_pse_scale", type=int, default=1)
-
-    # FCE parmas
-    parser.add_argument("--scales", type=list, default=[8, 16, 32])
-    parser.add_argument("--alpha", type=float, default=1.0)
-    parser.add_argument("--beta", type=float, default=1.0)
-    parser.add_argument("--fourier_degree", type=int, default=5)
-    parser.add_argument("--det_fce_box_type", type=str, default='poly')
-
-    # params for text recognizer
-    parser.add_argument("--rec_algorithm", type=str, default='CRNN')
-    parser.add_argument("--rec_model_path", type=str)
-    parser.add_argument("--rec_image_inverse", type=str2bool, default=True)
-    parser.add_argument("--rec_image_shape", type=str, default="3, 48, 320")
-    parser.add_argument("--rec_char_type", type=str, default='ch')
-    parser.add_argument("--rec_batch_num", type=int, default=6)
-    parser.add_argument("--max_text_length", type=int, default=25)
-
-    parser.add_argument("--use_space_char", type=str2bool, default=True)
-    parser.add_argument("--drop_score", type=float, default=0.5)
-    parser.add_argument("--limited_max_width", type=int, default=1280)
-    parser.add_argument("--limited_min_width", type=int, default=16)
-
-    parser.add_argument(
-        "--vis_font_path", type=str,
-        default=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))), 'doc/fonts/simfang.ttf'))
-    parser.add_argument(
-        "--rec_char_dict_path",
-        type=str,
-        default=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
-                             'pytorchocr/utils/ppocr_keys_v1.txt'))
-
-    # params for text classifier
-    parser.add_argument("--use_angle_cls", type=str2bool, default=False)
-    parser.add_argument("--cls_model_path", type=str)
-    parser.add_argument("--cls_image_shape", type=str, default="3, 48, 192")
-    parser.add_argument("--label_list", type=list, default=['0', '180'])
-    parser.add_argument("--cls_batch_num", type=int, default=6)
-    parser.add_argument("--cls_thresh", type=float, default=0.9)
-
-    parser.add_argument("--enable_mkldnn", type=str2bool, default=False)
-    parser.add_argument("--use_pdserving", type=str2bool, default=False)
-
-    # params for e2e
-    parser.add_argument("--e2e_algorithm", type=str, default='PGNet')
-    parser.add_argument("--e2e_model_path", type=str)
-    parser.add_argument("--e2e_limit_side_len", type=float, default=768)
-    parser.add_argument("--e2e_limit_type", type=str, default='max')
-
-    # PGNet parmas
-    parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
-    parser.add_argument(
-        "--e2e_char_dict_path", type=str,
-        default=os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))),
-                             'pytorchocr/utils/ic15_dict.txt'))
-    parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
-    parser.add_argument("--e2e_pgnet_polygon", type=bool, default=True)
-    parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
-
-    # SR parmas
-    parser.add_argument("--sr_model_path", type=str)
-    parser.add_argument("--sr_image_shape", type=str, default="3, 32, 128")
-    parser.add_argument("--sr_batch_num", type=int, default=1)
-
-    # params .yaml
-    parser.add_argument("--det_yaml_path", type=str, default=None)
-    parser.add_argument("--rec_yaml_path", type=str, default=None)
-    parser.add_argument("--cls_yaml_path", type=str, default=None)
-    parser.add_argument("--e2e_yaml_path", type=str, default=None)
-    parser.add_argument("--sr_yaml_path", type=str, default=None)
-
-    # multi-process
-    parser.add_argument("--use_mp", type=str2bool, default=False)
-    parser.add_argument("--total_process_num", type=int, default=1)
-    parser.add_argument("--process_id", type=int, default=0)
-
-    parser.add_argument("--benchmark", type=str2bool, default=False)
-    parser.add_argument("--save_log_path", type=str, default="./log_output/")
-
-    parser.add_argument("--show_log", type=str2bool, default=True)
-
-    return parser
-
-def parse_args():
-    parser = init_args()
-    return parser.parse_args()
-
-def get_default_config(args):
-    return vars(args)
-
-
-def read_network_config_from_yaml(yaml_path, char_num=None):
-    if not os.path.exists(yaml_path):
-        raise FileNotFoundError('{} is not existed.'.format(yaml_path))
-    import yaml
-    with open(yaml_path, encoding='utf-8') as f:
-        res = yaml.safe_load(f)
-    if res.get('Architecture') is None:
-        raise ValueError('{} has no Architecture'.format(yaml_path))
-    if res['Architecture']['Head']['name'] == 'MultiHead' and char_num is not None:
-        res['Architecture']['Head']['out_channels_list'] = {
-            'CTCLabelDecode': char_num,
-            'SARLabelDecode': char_num + 2,
-            'NRTRLabelDecode': char_num + 3
-        }
-    return res['Architecture']
-
-def AnalysisConfig(weights_path, yaml_path=None, char_num=None):
-    if not os.path.exists(os.path.abspath(weights_path)):
-        raise FileNotFoundError('{} is not found.'.format(weights_path))
-
-    if yaml_path is not None:
-        return read_network_config_from_yaml(yaml_path, char_num=char_num)
-
-
-def resize_img(img, input_size=600):
-    """
-    resize img and limit the longest side of the image to input_size
-    """
-    img = np.array(img)
-    im_shape = img.shape
-    im_size_max = np.max(im_shape[0:2])
-    im_scale = float(input_size) / float(im_size_max)
-    img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
-    return img
-
-
-def str_count(s):
-    """
-    Count the number of Chinese characters,
-    a single English character and a single number
-    equal to half the length of Chinese characters.
-    args:
-        s(string): the input of string
-    return(int):
-        the number of Chinese characters
-    """
-    import string
-    count_zh = count_pu = 0
-    s_len = len(s)
-    en_dg_count = 0
-    for c in s:
-        if c in string.ascii_letters or c.isdigit() or c.isspace():
-            en_dg_count += 1
-        elif c.isalpha():
-            count_zh += 1
-        else:
-            count_pu += 1
-    return s_len - math.ceil(en_dg_count / 2)
-
-
-def base64_to_cv2(b64str):
-    import base64
-    data = base64.b64decode(b64str.encode('utf8'))
-    data = np.fromstring(data, np.uint8)
-    data = cv2.imdecode(data, cv2.IMREAD_COLOR)
-    return data
-
-
-def get_arch_config(model_path):
-    from omegaconf import OmegaConf
-    all_arch_config = OmegaConf.load(DEFAULT_CFG_PATH)
-    path = Path(model_path)
-    file_name = path.stem
-    if file_name not in all_arch_config:
-        raise ValueError(f"architecture {file_name} is not in arch_config.yaml")
-
-    arch_config = all_arch_config[file_name]
-    return arch_config
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/reading_oreder/__init__.py b/magic_pdf/model/sub_modules/reading_oreder/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py b/magic_pdf/model/sub_modules/reading_oreder/layoutreader/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py b/magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py
deleted file mode 100644
index dfe71a89cf99e1f5807055115ceeda3abbceb363..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/reading_oreder/layoutreader/helpers.py
+++ /dev/null
@@ -1,125 +0,0 @@
-from collections import defaultdict
-from typing import List, Dict
-
-import torch
-from transformers import LayoutLMv3ForTokenClassification
-
-MAX_LEN = 510
-CLS_TOKEN_ID = 0
-UNK_TOKEN_ID = 3
-EOS_TOKEN_ID = 2
-
-
-class DataCollator:
-    def __call__(self, features: List[dict]) -> Dict[str, torch.Tensor]:
-        bbox = []
-        labels = []
-        input_ids = []
-        attention_mask = []
-
-        # clip bbox and labels to max length, build input_ids and attention_mask
-        for feature in features:
-            _bbox = feature["source_boxes"]
-            if len(_bbox) > MAX_LEN:
-                _bbox = _bbox[:MAX_LEN]
-            _labels = feature["target_index"]
-            if len(_labels) > MAX_LEN:
-                _labels = _labels[:MAX_LEN]
-            _input_ids = [UNK_TOKEN_ID] * len(_bbox)
-            _attention_mask = [1] * len(_bbox)
-            assert len(_bbox) == len(_labels) == len(_input_ids) == len(_attention_mask)
-            bbox.append(_bbox)
-            labels.append(_labels)
-            input_ids.append(_input_ids)
-            attention_mask.append(_attention_mask)
-
-        # add CLS and EOS tokens
-        for i in range(len(bbox)):
-            bbox[i] = [[0, 0, 0, 0]] + bbox[i] + [[0, 0, 0, 0]]
-            labels[i] = [-100] + labels[i] + [-100]
-            input_ids[i] = [CLS_TOKEN_ID] + input_ids[i] + [EOS_TOKEN_ID]
-            attention_mask[i] = [1] + attention_mask[i] + [1]
-
-        # padding to max length
-        max_len = max(len(x) for x in bbox)
-        for i in range(len(bbox)):
-            bbox[i] = bbox[i] + [[0, 0, 0, 0]] * (max_len - len(bbox[i]))
-            labels[i] = labels[i] + [-100] * (max_len - len(labels[i]))
-            input_ids[i] = input_ids[i] + [EOS_TOKEN_ID] * (max_len - len(input_ids[i]))
-            attention_mask[i] = attention_mask[i] + [0] * (
-                max_len - len(attention_mask[i])
-            )
-
-        ret = {
-            "bbox": torch.tensor(bbox),
-            "attention_mask": torch.tensor(attention_mask),
-            "labels": torch.tensor(labels),
-            "input_ids": torch.tensor(input_ids),
-        }
-        # set label > MAX_LEN to -100, because original labels may be > MAX_LEN
-        ret["labels"][ret["labels"] > MAX_LEN] = -100
-        # set label > 0 to label-1, because original labels are 1-indexed
-        ret["labels"][ret["labels"] > 0] -= 1
-        return ret
-
-
-def boxes2inputs(boxes: List[List[int]]) -> Dict[str, torch.Tensor]:
-    bbox = [[0, 0, 0, 0]] + boxes + [[0, 0, 0, 0]]
-    input_ids = [CLS_TOKEN_ID] + [UNK_TOKEN_ID] * len(boxes) + [EOS_TOKEN_ID]
-    attention_mask = [1] + [1] * len(boxes) + [1]
-    return {
-        "bbox": torch.tensor([bbox]),
-        "attention_mask": torch.tensor([attention_mask]),
-        "input_ids": torch.tensor([input_ids]),
-    }
-
-
-def prepare_inputs(
-    inputs: Dict[str, torch.Tensor], model: LayoutLMv3ForTokenClassification
-) -> Dict[str, torch.Tensor]:
-    ret = {}
-    for k, v in inputs.items():
-        v = v.to(model.device)
-        if torch.is_floating_point(v):
-            v = v.to(model.dtype)
-        ret[k] = v
-    return ret
-
-
-def parse_logits(logits: torch.Tensor, length: int) -> List[int]:
-    """
-    parse logits to orders
-
-    :param logits: logits from model
-    :param length: input length
-    :return: orders
-    """
-    logits = logits[1 : length + 1, :length]
-    orders = logits.argsort(descending=False).tolist()
-    ret = [o.pop() for o in orders]
-    while True:
-        order_to_idxes = defaultdict(list)
-        for idx, order in enumerate(ret):
-            order_to_idxes[order].append(idx)
-        # filter idxes len > 1
-        order_to_idxes = {k: v for k, v in order_to_idxes.items() if len(v) > 1}
-        if not order_to_idxes:
-            break
-        # filter
-        for order, idxes in order_to_idxes.items():
-            # find original logits of idxes
-            idxes_to_logit = {}
-            for idx in idxes:
-                idxes_to_logit[idx] = logits[idx, order]
-            idxes_to_logit = sorted(
-                idxes_to_logit.items(), key=lambda x: x[1], reverse=True
-            )
-            # keep the highest logit as order, set others to next candidate
-            for idx, _ in idxes_to_logit[1:]:
-                ret[idx] = orders[idx].pop()
-
-    return ret
-
-
-def check_duplicate(a: List[int]) -> bool:
-    return len(a) != len(set(a))
diff --git a/magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py b/magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py
deleted file mode 100644
index 7a36f527673f7ba830a768027ae4b7e1659f4b5f..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/reading_oreder/layoutreader/xycut.py
+++ /dev/null
@@ -1,242 +0,0 @@
-from typing import List
-import cv2
-import numpy as np
-
-
-def projection_by_bboxes(boxes: np.array, axis: int) -> np.ndarray:
-    """
-     通过一组 bbox 获得投影直方图，最后以 per-pixel 形式输出
-
-    Args:
-        boxes: [N, 4]
-        axis: 0-x坐标向水平方向投影， 1-y坐标向垂直方向投影
-
-    Returns:
-        1D 投影直方图，长度为投影方向坐标的最大值(我们不需要图片的实际边长，因为只是要找文本框的间隔)
-
-    """
-    assert axis in [0, 1]
-    length = np.max(boxes[:, axis::2])
-    res = np.zeros(length, dtype=int)
-    # TODO: how to remove for loop?
-    for start, end in boxes[:, axis::2]:
-        res[start:end] += 1
-    return res
-
-
-# from: https://dothinking.github.io/2021-06-19-%E9%80%92%E5%BD%92%E6%8A%95%E5%BD%B1%E5%88%86%E5%89%B2%E7%AE%97%E6%B3%95/#:~:text=%E9%80%92%E5%BD%92%E6%8A%95%E5%BD%B1%E5%88%86%E5%89%B2%EF%BC%88Recursive%20XY,%EF%BC%8C%E5%8F%AF%E4%BB%A5%E5%88%92%E5%88%86%E6%AE%B5%E8%90%BD%E3%80%81%E8%A1%8C%E3%80%82
-def split_projection_profile(arr_values: np.array, min_value: float, min_gap: float):
-    """Split projection profile:
-
-    ```
-                              ┌──┐
-         arr_values           │  │       ┌─┐───
-             ┌──┐             │  │       │ │ |
-             │  │             │  │ ┌───┐ │ │min_value
-             │  │<- min_gap ->│  │ │   │ │ │ |
-         ────┴──┴─────────────┴──┴─┴───┴─┴─┴─┴───
-         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
-    ```
-
-    Args:
-        arr_values (np.array): 1-d array representing the projection profile.
-        min_value (float): Ignore the profile if `arr_value` is less than `min_value`.
-        min_gap (float): Ignore the gap if less than this value.
-
-    Returns:
-        tuple: Start indexes and end indexes of split groups.
-    """
-    # all indexes with projection height exceeding the threshold
-    arr_index = np.where(arr_values > min_value)[0]
-    if not len(arr_index):
-        return
-
-    # find zero intervals between adjacent projections
-    # |  |                    ||
-    # ||||<- zero-interval -> |||||
-    arr_diff = arr_index[1:] - arr_index[0:-1]
-    arr_diff_index = np.where(arr_diff > min_gap)[0]
-    arr_zero_intvl_start = arr_index[arr_diff_index]
-    arr_zero_intvl_end = arr_index[arr_diff_index + 1]
-
-    # convert to index of projection range:
-    # the start index of zero interval is the end index of projection
-    arr_start = np.insert(arr_zero_intvl_end, 0, arr_index[0])
-    arr_end = np.append(arr_zero_intvl_start, arr_index[-1])
-    arr_end += 1  # end index will be excluded as index slice
-
-    return arr_start, arr_end
-
-
-def recursive_xy_cut(boxes: np.ndarray, indices: List[int], res: List[int]):
-    """
-
-    Args:
-        boxes: (N, 4)
-        indices: 递归过程中始终表示 box 在原始数据中的索引
-        res: 保存输出结果
-
-    """
-    # 向 y 轴投影
-    assert len(boxes) == len(indices)
-
-    _indices = boxes[:, 1].argsort()
-    y_sorted_boxes = boxes[_indices]
-    y_sorted_indices = indices[_indices]
-
-    # debug_vis(y_sorted_boxes, y_sorted_indices)
-
-    y_projection = projection_by_bboxes(boxes=y_sorted_boxes, axis=1)
-    pos_y = split_projection_profile(y_projection, 0, 1)
-    if not pos_y:
-        return
-
-    arr_y0, arr_y1 = pos_y
-    for r0, r1 in zip(arr_y0, arr_y1):
-        # [r0, r1] 表示按照水平切分，有 bbox 的区域，对这些区域会再进行垂直切分
-        _indices = (r0 <= y_sorted_boxes[:, 1]) & (y_sorted_boxes[:, 1] < r1)
-
-        y_sorted_boxes_chunk = y_sorted_boxes[_indices]
-        y_sorted_indices_chunk = y_sorted_indices[_indices]
-
-        _indices = y_sorted_boxes_chunk[:, 0].argsort()
-        x_sorted_boxes_chunk = y_sorted_boxes_chunk[_indices]
-        x_sorted_indices_chunk = y_sorted_indices_chunk[_indices]
-
-        # 往 x 方向投影
-        x_projection = projection_by_bboxes(boxes=x_sorted_boxes_chunk, axis=0)
-        pos_x = split_projection_profile(x_projection, 0, 1)
-        if not pos_x:
-            continue
-
-        arr_x0, arr_x1 = pos_x
-        if len(arr_x0) == 1:
-            # x 方向无法切分
-            res.extend(x_sorted_indices_chunk)
-            continue
-
-        # x 方向上能分开，继续递归调用
-        for c0, c1 in zip(arr_x0, arr_x1):
-            _indices = (c0 <= x_sorted_boxes_chunk[:, 0]) & (
-                x_sorted_boxes_chunk[:, 0] < c1
-            )
-            recursive_xy_cut(
-                x_sorted_boxes_chunk[_indices], x_sorted_indices_chunk[_indices], res
-            )
-
-
-def points_to_bbox(points):
-    assert len(points) == 8
-
-    # [x1,y1,x2,y2,x3,y3,x4,y4]
-    left = min(points[::2])
-    right = max(points[::2])
-    top = min(points[1::2])
-    bottom = max(points[1::2])
-
-    left = max(left, 0)
-    top = max(top, 0)
-    right = max(right, 0)
-    bottom = max(bottom, 0)
-    return [left, top, right, bottom]
-
-
-def bbox2points(bbox):
-    left, top, right, bottom = bbox
-    return [left, top, right, top, right, bottom, left, bottom]
-
-
-def vis_polygon(img, points, thickness=2, color=None):
-    br2bl_color = color
-    tl2tr_color = color
-    tr2br_color = color
-    bl2tl_color = color
-    cv2.line(
-        img,
-        (points[0][0], points[0][1]),
-        (points[1][0], points[1][1]),
-        color=tl2tr_color,
-        thickness=thickness,
-    )
-
-    cv2.line(
-        img,
-        (points[1][0], points[1][1]),
-        (points[2][0], points[2][1]),
-        color=tr2br_color,
-        thickness=thickness,
-    )
-
-    cv2.line(
-        img,
-        (points[2][0], points[2][1]),
-        (points[3][0], points[3][1]),
-        color=br2bl_color,
-        thickness=thickness,
-    )
-
-    cv2.line(
-        img,
-        (points[3][0], points[3][1]),
-        (points[0][0], points[0][1]),
-        color=bl2tl_color,
-        thickness=thickness,
-    )
-    return img
-
-
-def vis_points(
-    img: np.ndarray, points, texts: List[str] = None, color=(0, 200, 0)
-) -> np.ndarray:
-    """
-
-    Args:
-        img:
-        points: [N, 8]  8: x1,y1,x2,y2,x3,y3,x3,y4
-        texts:
-        color:
-
-    Returns:
-
-    """
-    points = np.array(points)
-    if texts is not None:
-        assert len(texts) == points.shape[0]
-
-    for i, _points in enumerate(points):
-        vis_polygon(img, _points.reshape(-1, 2), thickness=2, color=color)
-        bbox = points_to_bbox(_points)
-        left, top, right, bottom = bbox
-        cx = (left + right) // 2
-        cy = (top + bottom) // 2
-
-        txt = texts[i]
-        font = cv2.FONT_HERSHEY_SIMPLEX
-        cat_size = cv2.getTextSize(txt, font, 0.5, 2)[0]
-
-        img = cv2.rectangle(
-            img,
-            (cx - 5 * len(txt), cy - cat_size[1] - 5),
-            (cx - 5 * len(txt) + cat_size[0], cy - 5),
-            color,
-            -1,
-        )
-
-        img = cv2.putText(
-            img,
-            txt,
-            (cx - 5 * len(txt), cy - 5),
-            font,
-            0.5,
-            (255, 255, 255),
-            thickness=1,
-            lineType=cv2.LINE_AA,
-        )
-
-    return img
-
-
-def vis_polygons_with_index(image, points):
-    texts = [str(i) for i in range(len(points))]
-    res_img = vis_points(image.copy(), points, texts)
-    return res_img
\ No newline at end of file
diff --git a/magic_pdf/model/sub_modules/table/__init__.py b/magic_pdf/model/sub_modules/table/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/table/rapidtable/__init__.py b/magic_pdf/model/sub_modules/table/rapidtable/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py b/magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
deleted file mode 100644
index b698b3efb3c7ce57f2b526f6c88a1b0d04a0fd35..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/table/rapidtable/rapid_table.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import os
-from pathlib import Path
-import cv2
-import numpy as np
-import torch
-from loguru import logger
-from rapid_table import RapidTable, RapidTableInput
-from rapid_table.main import ModelType
-
-from magic_pdf.libs.config_reader import get_device
-
-
-class RapidTableModel(object):
-    def __init__(self, ocr_engine, table_sub_model_name='slanet_plus'):
-        sub_model_list = [model.value for model in ModelType]
-        if table_sub_model_name is None:
-            input_args = RapidTableInput()
-        elif table_sub_model_name in  sub_model_list:
-            if torch.cuda.is_available() and table_sub_model_name == "unitable":
-                input_args = RapidTableInput(model_type=table_sub_model_name, use_cuda=True, device=get_device())
-            else:
-                root_dir = Path(__file__).absolute().parent.parent.parent.parent.parent
-                slanet_plus_model_path = os.path.join(root_dir, 'resources', 'slanet_plus', 'slanet-plus.onnx')
-                input_args = RapidTableInput(model_type=table_sub_model_name, model_path=slanet_plus_model_path)
-        else:
-            raise ValueError(f"Invalid table_sub_model_name: {table_sub_model_name}. It must be one of {sub_model_list}")
-
-        self.table_model = RapidTable(input_args)
-
-        # self.ocr_model_name = "RapidOCR"
-        # if torch.cuda.is_available():
-        #     from rapidocr_paddle import RapidOCR
-        #     self.ocr_engine = RapidOCR(det_use_cuda=True, cls_use_cuda=True, rec_use_cuda=True)
-        # else:
-        #     from rapidocr_onnxruntime import RapidOCR
-        #     self.ocr_engine = RapidOCR()
-
-        # self.ocr_model_name = "PaddleOCR"
-        self.ocr_engine = ocr_engine
-
-
-    def predict(self, image):
-        bgr_image = cv2.cvtColor(np.asarray(image), cv2.COLOR_RGB2BGR)
-
-        # First check the overall image aspect ratio (height/width)
-        img_height, img_width = bgr_image.shape[:2]
-        img_aspect_ratio = img_height / img_width if img_width > 0 else 1.0
-        img_is_portrait = img_aspect_ratio > 1.2
-
-        if img_is_portrait:
-
-            det_res = self.ocr_engine.ocr(bgr_image, rec=False)[0]
-            # Check if table is rotated by analyzing text box aspect ratios
-            is_rotated = False
-            if det_res:
-                vertical_count = 0
-
-                for box_ocr_res in det_res:
-                    p1, p2, p3, p4 = box_ocr_res
-
-                    # Calculate width and height
-                    width = p3[0] - p1[0]
-                    height = p3[1] - p1[1]
-
-                    aspect_ratio = width / height if height > 0 else 1.0
-
-                    # Count vertical vs horizontal text boxes
-                    if aspect_ratio < 0.8:  # Taller than wide - vertical text
-                        vertical_count += 1
-                    # elif aspect_ratio > 1.2:  # Wider than tall - horizontal text
-                    #     horizontal_count += 1
-
-                # If we have more vertical text boxes than horizontal ones,
-                # and vertical ones are significant, table might be rotated
-                if vertical_count >= len(det_res) * 0.3:
-                    is_rotated = True
-
-                # logger.debug(f"Text orientation analysis: vertical={vertical_count}, det_res={len(det_res)}, rotated={is_rotated}")
-
-            # Rotate image if necessary
-            if is_rotated:
-                # logger.debug("Table appears to be in portrait orientation, rotating 90 degrees clockwise")
-                image = cv2.rotate(np.asarray(image), cv2.ROTATE_90_CLOCKWISE)
-                bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
-
-        # Continue with OCR on potentially rotated image
-        ocr_result = self.ocr_engine.ocr(bgr_image)[0]
-        if ocr_result:
-            ocr_result = [[item[0], item[1][0], item[1][1]] for item in ocr_result if
-                      len(item) == 2 and isinstance(item[1], tuple)]
-        else:
-            ocr_result = None
-
-
-        if ocr_result:
-            table_results = self.table_model(np.asarray(image), ocr_result)
-            html_code = table_results.pred_html
-            table_cell_bboxes = table_results.cell_bboxes
-            logic_points = table_results.logic_points
-            elapse = table_results.elapse
-            return html_code, table_cell_bboxes, logic_points, elapse
-        else:
-            return None, None, None, None
diff --git a/magic_pdf/model/sub_modules/table/table_utils.py b/magic_pdf/model/sub_modules/table/table_utils.py
deleted file mode 100644
index f04bf98d5d14c6bd69184eac94a54a88b3ad50e7..0000000000000000000000000000000000000000
--- a/magic_pdf/model/sub_modules/table/table_utils.py
+++ /dev/null
@@ -1,11 +0,0 @@
-import re
-
-
-def minify_html(html):
-    # 移除多余的空白字符
-    html = re.sub(r'\s+', ' ', html)
-    # 移除行尾的空白字符
-    html = re.sub(r'\s*>\s*', '>', html)
-    # 移除标签前的空白字符
-    html = re.sub(r'\s*<\s*', '<', html)
-    return html.strip()
\ No newline at end of file
diff --git a/magic_pdf/operators/__init__.py b/magic_pdf/operators/__init__.py
deleted file mode 100644
index 84ae24aefa4153ff32b5cc540da1b730ad927c6a..0000000000000000000000000000000000000000
--- a/magic_pdf/operators/__init__.py
+++ /dev/null
@@ -1,94 +0,0 @@
-from abc import ABC, abstractmethod
-from typing import Callable
-
-from magic_pdf.data.data_reader_writer import DataWriter
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.operators.pipes import PipeResult
-
-
-class InferenceResultBase(ABC):
-
-    @abstractmethod
-    def __init__(self, inference_results: list, dataset: Dataset):
-        """Initialized method.
-
-        Args:
-            inference_results (list): the inference result generated by model
-            dataset (Dataset): the dataset related with model inference result
-        """
-        pass
-
-    @abstractmethod
-    def draw_model(self, file_path: str) -> None:
-        """Draw model inference result.
-
-        Args:
-            file_path (str): the output file path
-        """
-        pass
-
-    @abstractmethod
-    def dump_model(self, writer: DataWriter, file_path: str):
-        """Dump model inference result to file.
-
-        Args:
-            writer (DataWriter): writer handle
-            file_path (str): the location of target file
-        """
-        pass
-
-    @abstractmethod
-    def get_infer_res(self):
-        """Get the inference result.
-
-        Returns:
-            list: the inference result generated by model
-        """
-        pass
-
-    @abstractmethod
-    def apply(self, proc: Callable, *args, **kwargs):
-        """Apply callable method which.
-
-        Args:
-            proc (Callable): invoke proc as follows:
-                proc(inference_result, *args, **kwargs)
-
-        Returns:
-            Any: return the result generated by proc
-        """
-        pass
-
-    def pipe_txt_mode(
-        self,
-        imageWriter: DataWriter,
-        start_page_id=0,
-        end_page_id=None,
-        debug_mode=False,
-        lang=None,
-    ) -> PipeResult:
-        """Post-proc the model inference result, Extract the text using the
-        third library, such as `pymupdf`
-
-        Args:
-            imageWriter (DataWriter): the image writer handle
-            start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
-            end_page_id (int, optional):  Defaults to the last page index of dataset. Let user select some pages He/She want to process
-            debug_mode (bool, optional): Defaults to False. will dump more log if enabled
-            lang (str, optional): Defaults to None.
-
-        Returns:
-            PipeResult: the result
-        """
-        pass
-
-    @abstractmethod
-    def pipe_ocr_mode(
-        self,
-        imageWriter: DataWriter,
-        start_page_id=0,
-        end_page_id=None,
-        debug_mode=False,
-        lang=None,
-    ) -> PipeResult:
-        pass
diff --git a/magic_pdf/operators/models.py b/magic_pdf/operators/models.py
deleted file mode 100644
index 34cbfe4bd1c804c4e9ecc3888cc6805948d2f164..0000000000000000000000000000000000000000
--- a/magic_pdf/operators/models.py
+++ /dev/null
@@ -1,154 +0,0 @@
-import copy
-import json
-import os
-from typing import Callable
-
-from magic_pdf.config.constants import PARSE_TYPE_OCR, PARSE_TYPE_TXT
-from magic_pdf.config.enums import SupportedPdfParseMethod
-from magic_pdf.data.data_reader_writer import DataWriter
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.libs.draw_bbox import draw_model_bbox
-from magic_pdf.libs.version import __version__
-from magic_pdf.operators.pipes import PipeResult
-from magic_pdf.pdf_parse_union_core_v2 import pdf_parse_union
-from magic_pdf.operators import InferenceResultBase
-
-class InferenceResult(InferenceResultBase):
-    def __init__(self, inference_results: list, dataset: Dataset):
-        """Initialized method.
-
-        Args:
-            inference_results (list): the inference result generated by model
-            dataset (Dataset): the dataset related with model inference result
-        """
-        self._infer_res = inference_results
-        self._dataset = dataset
-
-    def draw_model(self, file_path: str) -> None:
-        """Draw model inference result.
-
-        Args:
-            file_path (str): the output file path
-        """
-        dir_name = os.path.dirname(file_path)
-        base_name = os.path.basename(file_path)
-        if not os.path.exists(dir_name):
-            os.makedirs(dir_name, exist_ok=True)
-        draw_model_bbox(
-            copy.deepcopy(self._infer_res), self._dataset, dir_name, base_name
-        )
-
-    def dump_model(self, writer: DataWriter, file_path: str):
-        """Dump model inference result to file.
-
-        Args:
-            writer (DataWriter): writer handle
-            file_path (str): the location of target file
-        """
-        writer.write_string(
-            file_path, json.dumps(self._infer_res, ensure_ascii=False, indent=4)
-        )
-
-    def get_infer_res(self):
-        """Get the inference result.
-
-        Returns:
-            list: the inference result generated by model
-        """
-        return self._infer_res
-
-    def apply(self, proc: Callable, *args, **kwargs):
-        """Apply callable method which.
-
-        Args:
-            proc (Callable): invoke proc as follows:
-                proc(inference_result, *args, **kwargs)
-
-        Returns:
-            Any: return the result generated by proc
-        """
-        return proc(copy.deepcopy(self._infer_res), *args, **kwargs)
-
-    def pipe_txt_mode(
-        self,
-        imageWriter: DataWriter,
-        start_page_id=0,
-        end_page_id=None,
-        debug_mode=False,
-        lang=None,
-    ) -> PipeResult:
-        """Post-proc the model inference result, Extract the text using the
-        third library, such as `pymupdf`
-
-        Args:
-            imageWriter (DataWriter): the image writer handle
-            start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
-            end_page_id (int, optional):  Defaults to the last page index of dataset. Let user select some pages He/She want to process
-            debug_mode (bool, optional): Defaults to False. will dump more log if enabled
-            lang (str, optional): Defaults to None.
-
-        Returns:
-            PipeResult: the result
-        """
-
-        def proc(*args, **kwargs) -> PipeResult:
-            res = pdf_parse_union(*args, **kwargs)
-            res['_parse_type'] = PARSE_TYPE_TXT
-            res['_version_name'] = __version__
-            if 'lang' in kwargs and kwargs['lang'] is not None:
-                res['lang'] = kwargs['lang']
-            return PipeResult(res, self._dataset)
-
-        res = self.apply(
-            proc,
-            self._dataset,
-            imageWriter,
-            SupportedPdfParseMethod.TXT,
-            start_page_id=start_page_id,
-            end_page_id=end_page_id,
-            debug_mode=debug_mode,
-            lang=lang,
-        )
-        return res
-
-    def pipe_ocr_mode(
-        self,
-        imageWriter: DataWriter,
-        start_page_id=0,
-        end_page_id=None,
-        debug_mode=False,
-        lang=None,
-    ) -> PipeResult:
-        """Post-proc the model inference result, Extract the text using `OCR`
-        technical.
-
-        Args:
-            imageWriter (DataWriter): the image writer handle
-            start_page_id (int, optional): Defaults to 0. Let user select some pages He/She want to process
-            end_page_id (int, optional):  Defaults to the last page index of dataset. Let user select some pages He/She want to process
-            debug_mode (bool, optional): Defaults to False. will dump more log if enabled
-            lang (str, optional): Defaults to None.
-
-        Returns:
-            PipeResult: the result
-        """
-
-        def proc(*args, **kwargs) -> PipeResult:
-            res = pdf_parse_union(*args, **kwargs)
-            res['_parse_type'] = PARSE_TYPE_OCR
-            res['_version_name'] = __version__
-            if 'lang' in kwargs and kwargs['lang'] is not None:
-                res['lang'] = kwargs['lang']
-            return PipeResult(res, self._dataset)
-
-        res = self.apply(
-            proc,
-            self._dataset,
-            imageWriter,
-            SupportedPdfParseMethod.OCR,
-            start_page_id=start_page_id,
-            end_page_id=end_page_id,
-            debug_mode=debug_mode,
-            lang=lang,
-        )
-        return res
diff --git a/magic_pdf/operators/pipes.py b/magic_pdf/operators/pipes.py
deleted file mode 100644
index 8a9f7a563682d5271017550d4753ec3d045e6d43..0000000000000000000000000000000000000000
--- a/magic_pdf/operators/pipes.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import copy
-import json
-import os
-from typing import Callable
-
-from magic_pdf.config.make_content_config import DropMode, MakeMode
-from magic_pdf.data.data_reader_writer import DataWriter
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.dict2md.ocr_mkcontent import union_make
-from magic_pdf.libs.draw_bbox import (draw_layout_bbox, draw_line_sort_bbox,
-                                      draw_span_bbox)
-from magic_pdf.libs.json_compressor import JsonCompressor
-
-
-class PipeResult:
-    def __init__(self, pipe_res, dataset: Dataset):
-        """Initialized.
-
-        Args:
-            pipe_res (list[dict]): the pipeline processed result of model inference result
-            dataset (Dataset): the dataset associated with pipe_res
-        """
-        self._pipe_res = pipe_res
-        self._dataset = dataset
-
-    def get_markdown(
-        self,
-        img_dir_or_bucket_prefix: str,
-        drop_mode=DropMode.NONE,
-        md_make_mode=MakeMode.MM_MD,
-    ) -> str:
-        """Get markdown content.
-
-        Args:
-            img_dir_or_bucket_prefix (str): The s3 bucket prefix or local file directory which used to store the figure
-            drop_mode (str, optional): Drop strategy when some page which is corrupted or inappropriate. Defaults to DropMode.NONE.
-            md_make_mode (str, optional): The content Type of Markdown be made. Defaults to MakeMode.MM_MD.
-
-        Returns:
-            str: return markdown content
-        """
-        pdf_info_list = self._pipe_res['pdf_info']
-        md_content = union_make(
-            pdf_info_list, md_make_mode, drop_mode, img_dir_or_bucket_prefix
-        )
-        return md_content
-
-    def dump_md(
-        self,
-        writer: DataWriter,
-        file_path: str,
-        img_dir_or_bucket_prefix: str,
-        drop_mode=DropMode.NONE,
-        md_make_mode=MakeMode.MM_MD,
-    ):
-        """Dump The Markdown.
-
-        Args:
-            writer (DataWriter): File writer handle
-            file_path (str): The file location of markdown
-            img_dir_or_bucket_prefix (str): The s3 bucket prefix or local file directory which used to store the figure
-            drop_mode (str, optional): Drop strategy when some page which is corrupted or inappropriate. Defaults to DropMode.NONE.
-            md_make_mode (str, optional): The content Type of Markdown be made. Defaults to MakeMode.MM_MD.
-        """
-
-        md_content = self.get_markdown(
-            img_dir_or_bucket_prefix, drop_mode=drop_mode, md_make_mode=md_make_mode
-        )
-        writer.write_string(file_path, md_content)
-
-    def get_content_list(
-        self,
-        image_dir_or_bucket_prefix: str,
-        drop_mode=DropMode.NONE,
-    ) -> str:
-        """Get Content List.
-
-        Args:
-            image_dir_or_bucket_prefix (str): The s3 bucket prefix or local file directory which used to store the figure
-            drop_mode (str, optional): Drop strategy when some page which is corrupted or inappropriate. Defaults to DropMode.NONE.
-
-        Returns:
-            str: content list content
-        """
-        pdf_info_list = self._pipe_res['pdf_info']
-        content_list = union_make(
-            pdf_info_list,
-            MakeMode.STANDARD_FORMAT,
-            drop_mode,
-            image_dir_or_bucket_prefix,
-        )
-        return content_list
-
-    def dump_content_list(
-        self,
-        writer: DataWriter,
-        file_path: str,
-        image_dir_or_bucket_prefix: str,
-        drop_mode=DropMode.NONE,
-    ):
-        """Dump Content List.
-
-        Args:
-            writer (DataWriter): File writer handle
-            file_path (str): The file location of content list
-            image_dir_or_bucket_prefix (str): The s3 bucket prefix or local file directory which used to store the figure
-            drop_mode (str, optional): Drop strategy when some page which is corrupted or inappropriate. Defaults to DropMode.NONE.
-        """
-        content_list = self.get_content_list(
-            image_dir_or_bucket_prefix, drop_mode=drop_mode,
-        )
-        writer.write_string(
-            file_path, json.dumps(content_list, ensure_ascii=False, indent=4)
-        )
-
-    def get_middle_json(self) -> str:
-        """Get middle json.
-
-        Returns:
-            str: The content of middle json
-        """
-        return json.dumps(self._pipe_res, ensure_ascii=False, indent=4)
-
-    def dump_middle_json(self, writer: DataWriter, file_path: str):
-        """Dump the result of pipeline.
-
-        Args:
-            writer (DataWriter): File writer handler
-            file_path (str): The file location of middle json
-        """
-        middle_json = self.get_middle_json()
-        writer.write_string(file_path, middle_json)
-
-    def draw_layout(self, file_path: str) -> None:
-        """Draw the layout.
-
-        Args:
-            file_path (str): The file location of layout result file
-        """
-        dir_name = os.path.dirname(file_path)
-        base_name = os.path.basename(file_path)
-        if not os.path.exists(dir_name):
-            os.makedirs(dir_name, exist_ok=True)
-        pdf_info = self._pipe_res['pdf_info']
-        draw_layout_bbox(pdf_info, self._dataset.data_bits(), dir_name, base_name)
-
-    def draw_span(self, file_path: str):
-        """Draw the Span.
-
-        Args:
-            file_path (str): The file location of span result file
-        """
-        dir_name = os.path.dirname(file_path)
-        base_name = os.path.basename(file_path)
-        if not os.path.exists(dir_name):
-            os.makedirs(dir_name, exist_ok=True)
-        pdf_info = self._pipe_res['pdf_info']
-        draw_span_bbox(pdf_info, self._dataset.data_bits(), dir_name, base_name)
-
-    def draw_line_sort(self, file_path: str):
-        """Draw line sort.
-
-        Args:
-            file_path (str): The file location of line sort result file
-        """
-        dir_name = os.path.dirname(file_path)
-        base_name = os.path.basename(file_path)
-        if not os.path.exists(dir_name):
-            os.makedirs(dir_name, exist_ok=True)
-        pdf_info = self._pipe_res['pdf_info']
-        draw_line_sort_bbox(pdf_info, self._dataset.data_bits(), dir_name, base_name)
-
-    def get_compress_pdf_mid_data(self):
-        """Compress the pipeline result.
-
-        Returns:
-            str: compress the pipeline result and return
-        """
-        return JsonCompressor.compress_json(self._pipe_res)
-
-    def apply(self, proc: Callable, *args, **kwargs):
-        """Apply callable method which.
-
-        Args:
-            proc (Callable): invoke proc as follows:
-                proc(pipeline_result, *args, **kwargs)
-
-        Returns:
-            Any: return the result generated by proc
-        """
-        return proc(copy.deepcopy(self._pipe_res), *args, **kwargs)
diff --git a/magic_pdf/pdf_parse_union_core_v2.py b/magic_pdf/pdf_parse_union_core_v2.py
deleted file mode 100644
index 2ca8fa11f8bb54712be1647a048090dc79257a35..0000000000000000000000000000000000000000
--- a/magic_pdf/pdf_parse_union_core_v2.py
+++ /dev/null
@@ -1,1049 +0,0 @@
-import copy
-import math
-import os
-import re
-import statistics
-import time
-import warnings
-from typing import List
-
-import cv2
-import fitz
-import torch
-import numpy as np
-from loguru import logger
-from tqdm import tqdm
-
-from magic_pdf.config.enums import SupportedPdfParseMethod
-from magic_pdf.config.ocr_content_type import BlockType, ContentType
-from magic_pdf.data.dataset import Dataset, PageableData
-from magic_pdf.libs.boxbase import calculate_overlap_area_in_bbox1_area_ratio, __is_overlaps_y_exceeds_threshold
-from magic_pdf.libs.clean_memory import clean_memory
-from magic_pdf.libs.config_reader import get_local_layoutreader_model_dir, get_llm_aided_config, get_device
-from magic_pdf.libs.convert_utils import dict_to_list
-from magic_pdf.libs.hash_utils import compute_md5
-from magic_pdf.libs.pdf_image_tools import cut_image_to_pil_image
-from magic_pdf.model.magic_model import MagicModel
-from magic_pdf.post_proc.llm_aided import llm_aided_formula, llm_aided_text, llm_aided_title
-
-from magic_pdf.model.sub_modules.model_init import AtomModelSingleton
-from magic_pdf.post_proc.para_split_v3 import para_split
-from magic_pdf.pre_proc.construct_page_dict import ocr_construct_page_component_v2
-from magic_pdf.pre_proc.cut_image import ocr_cut_image_and_table
-from magic_pdf.pre_proc.ocr_detect_all_bboxes import ocr_prepare_bboxes_for_layout_split_v2
-from magic_pdf.pre_proc.ocr_dict_merge import fill_spans_in_blocks, fix_block_spans_v2, fix_discarded_block
-from magic_pdf.pre_proc.ocr_span_list_modify import get_qa_need_list_v2, remove_overlaps_low_confidence_spans, \
-    remove_overlaps_min_spans, remove_x_overlapping_chars
-
-os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
-
-
-def __replace_STX_ETX(text_str: str):
-    """Replace \u0002 and \u0003, as these characters become garbled when extracted using pymupdf. In fact, they were originally quotation marks.
-    Drawback: This issue is only observed in English text; it has not been found in Chinese text so far.
-
-        Args:
-            text_str (str): raw text
-
-        Returns:
-            _type_: replaced text
-    """  # noqa: E501
-    if text_str:
-        s = text_str.replace('\u0002', "'")
-        s = s.replace('\u0003', "'")
-        return s
-    return text_str
-
-
-# 连写字符拆分
-def __replace_ligatures(text: str):
-    ligatures = {
-        'ﬁ': 'fi', 'ﬂ': 'fl', 'ﬀ': 'ff', 'ﬃ': 'ffi', 'ﬄ': 'ffl', 'ﬅ': 'ft', 'ﬆ': 'st'
-    }
-    return re.sub('|'.join(map(re.escape, ligatures.keys())), lambda m: ligatures[m.group()], text)
-
-
-def chars_to_content(span):
-    # 检查span中的char是否为空
-    if len(span['chars']) == 0:
-        pass
-    else:
-        # 先给chars按char['bbox']的中心点的x坐标排序
-        span['chars'] = sorted(span['chars'], key=lambda x: (x['bbox'][0] + x['bbox'][2]) / 2)
-
-        # Calculate the width of each character
-        char_widths = [char['bbox'][2] - char['bbox'][0] for char in span['chars']]
-        # Calculate the median width
-        median_width = statistics.median(char_widths)
-
-        # 通过x轴重叠比率移除一部分char
-        span = remove_x_overlapping_chars(span, median_width)
-
-        content = ''
-        for char in span['chars']:
-
-            # 如果下一个char的x0和上一个char的x1距离超过0.25个字符宽度，则需要在中间插入一个空格
-            char1 = char
-            char2 = span['chars'][span['chars'].index(char) + 1] if span['chars'].index(char) + 1 < len(span['chars']) else None
-            if char2 and char2['bbox'][0] - char1['bbox'][2] > median_width * 0.25 and char['c'] != ' ' and char2['c'] != ' ':
-                content += f"{char['c']} "
-            else:
-                content += char['c']
-
-        span['content'] = __replace_ligatures(content)
-
-    del span['chars']
-
-
-LINE_STOP_FLAG = ('.', '!', '?', '。', '！', '？', ')', '）', '"', '”', ':', '：', ';', '；', ']', '】', '}', '}', '>', '》', '、', ',', '，', '-', '—', '–',)
-LINE_START_FLAG = ('(', '（', '"', '“', '【', '{', '《', '<', '「', '『', '【', '[',)
-
-
-def fill_char_in_spans(spans, all_chars):
-
-    # 简单从上到下排一下序
-    spans = sorted(spans, key=lambda x: x['bbox'][1])
-
-    for char in all_chars:
-
-        for span in spans:
-            if calculate_char_in_span(char['bbox'], span['bbox'], char['c']):
-                span['chars'].append(char)
-                break
-
-    need_ocr_spans = []
-    for span in spans:
-        chars_to_content(span)
-        # 有的span中虽然没有字但有一两个空的占位符，用宽高和content长度过滤
-        if len(span['content']) * span['height'] < span['width'] * 0.5:
-            # logger.info(f"maybe empty span: {len(span['content'])}, {span['height']}, {span['width']}")
-            need_ocr_spans.append(span)
-        del span['height'], span['width']
-    return need_ocr_spans
-
-
-# 使用鲁棒性更强的中心点坐标判断
-def calculate_char_in_span(char_bbox, span_bbox, char, span_height_radio=0.33):
-    char_center_x = (char_bbox[0] + char_bbox[2]) / 2
-    char_center_y = (char_bbox[1] + char_bbox[3]) / 2
-    span_center_y = (span_bbox[1] + span_bbox[3]) / 2
-    span_height = span_bbox[3] - span_bbox[1]
-
-    if (
-        span_bbox[0] < char_center_x < span_bbox[2]
-        and span_bbox[1] < char_center_y < span_bbox[3]
-        and abs(char_center_y - span_center_y) < span_height * span_height_radio  # 字符的中轴和span的中轴高度差不能超过1/4span高度
-    ):
-        return True
-    else:
-        # 如果char是LINE_STOP_FLAG，就不用中心点判定，换一种方案（左边界在span区域内，高度判定和之前逻辑一致）
-        # 主要是给结尾符号一个进入span的机会，这个char还应该离span右边界较近
-        if char in LINE_STOP_FLAG:
-            if (
-                (span_bbox[2] - span_height) < char_bbox[0] < span_bbox[2]
-                and char_center_x > span_bbox[0]
-                and span_bbox[1] < char_center_y < span_bbox[3]
-                and abs(char_center_y - span_center_y) < span_height * span_height_radio
-            ):
-                return True
-        elif char in LINE_START_FLAG:
-            if (
-                span_bbox[0] < char_bbox[2] < (span_bbox[0] + span_height)
-                and char_center_x < span_bbox[2]
-                and span_bbox[1] < char_center_y < span_bbox[3]
-                and abs(char_center_y - span_center_y) < span_height * span_height_radio
-            ):
-                return True
-        else:
-            return False
-
-
-def remove_tilted_line(text_blocks):
-    for block in text_blocks:
-        remove_lines = []
-        for line in block['lines']:
-            cosine, sine = line['dir']
-            # 计算弧度值
-            angle_radians = math.atan2(sine, cosine)
-            # 将弧度值转换为角度值
-            angle_degrees = math.degrees(angle_radians)
-            if 2 < abs(angle_degrees) < 88:
-                remove_lines.append(line)
-        for line in remove_lines:
-            block['lines'].remove(line)
-
-
-def calculate_contrast(img, img_mode) -> float:
-    """
-    计算给定图像的对比度。
-    :param img: 图像，类型为numpy.ndarray
-    :Param img_mode = 图像的色彩通道，'rgb' 或 'bgr'
-    :return: 图像的对比度值
-    """
-    if img_mode == 'rgb':
-        # 将RGB图像转换为灰度图
-        gray_img = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
-    elif img_mode == 'bgr':
-        # 将BGR图像转换为灰度图
-        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
-    else:
-        raise ValueError("Invalid image mode. Please provide 'rgb' or 'bgr'.")
-
-    # 计算均值和标准差
-    mean_value = np.mean(gray_img)
-    std_dev = np.std(gray_img)
-    # 对比度定义为标准差除以平均值（加上小常数避免除零错误）
-    contrast = std_dev / (mean_value + 1e-6)
-    # logger.debug(f"contrast: {contrast}")
-    return round(contrast, 2)
-
-# @measure_time
-def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang):
-    # cid用0xfffd表示，连字符拆开
-    # text_blocks_raw = pdf_page.get_text('rawdict', flags=fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)['blocks']
-
-    # cid用0xfffd表示，连字符不拆开
-    #text_blocks_raw = pdf_page.get_text('rawdict', flags=fitz.TEXT_PRESERVE_LIGATURES | fitz.TEXT_PRESERVE_WHITESPACE | fitz.TEXT_MEDIABOX_CLIP)['blocks']
-
-    # 自定义flags出现较多0xfffd，可能是pymupdf可以自行处理内置字典的pdf，不再使用
-    text_blocks_raw = pdf_page.get_text('rawdict', flags=fitz.TEXTFLAGS_TEXT)['blocks']
-    # text_blocks = pdf_page.get_text('dict', flags=fitz.TEXTFLAGS_TEXT)['blocks']
-
-    # 移除所有角度不为0或90的line
-    remove_tilted_line(text_blocks_raw)
-
-    all_pymu_chars = []
-    for block in text_blocks_raw:
-        for line in block['lines']:
-            cosine, sine = line['dir']
-            if abs(cosine) < 0.9 or abs(sine) > 0.1:
-                continue
-            for span in line['spans']:
-                all_pymu_chars.extend(span['chars'])
-
-    # 计算所有sapn的高度的中位数
-    span_height_list = []
-    for span in spans:
-        if span['type'] in [ContentType.InterlineEquation, ContentType.Image, ContentType.Table]:
-            continue
-        span_height = span['bbox'][3] - span['bbox'][1]
-        span['height'] = span_height
-        span['width'] = span['bbox'][2] - span['bbox'][0]
-        span_height_list.append(span_height)
-    if len(span_height_list) == 0:
-        return spans
-    else:
-        median_span_height = statistics.median(span_height_list)
-
-    useful_spans = []
-    unuseful_spans = []
-    # 纵向span的两个特征：1. 高度超过多个line 2. 高宽比超过某个值
-    vertical_spans = []
-    for span in spans:
-        if span['type'] in [ContentType.InterlineEquation, ContentType.Image, ContentType.Table]:
-            continue
-        for block in all_bboxes + all_discarded_blocks:
-            if block[7] in [BlockType.ImageBody, BlockType.TableBody, BlockType.InterlineEquation]:
-                continue
-            if calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block[0:4]) > 0.5:
-                if span['height'] > median_span_height * 3 and span['height'] > span['width'] * 3:
-                    vertical_spans.append(span)
-                elif block in all_bboxes:
-                    useful_spans.append(span)
-                else:
-                    unuseful_spans.append(span)
-
-                break
-
-    """垂直的span框直接用pymu的line进行填充"""
-    if len(vertical_spans) > 0:
-        text_blocks = pdf_page.get_text('dict', flags=fitz.TEXTFLAGS_TEXT)['blocks']
-        all_pymu_lines = []
-        for block in text_blocks:
-            for line in block['lines']:
-                all_pymu_lines.append(line)
-
-        for pymu_line in all_pymu_lines:
-            for span in vertical_spans:
-                if calculate_overlap_area_in_bbox1_area_ratio(pymu_line['bbox'], span['bbox']) > 0.5:
-                    for pymu_span in pymu_line['spans']:
-                        span['content'] += pymu_span['text']
-                    break
-
-        for span in vertical_spans:
-            if len(span['content']) == 0:
-                spans.remove(span)
-
-    """水平的span框如果没有char则用ocr进行填充"""
-    new_spans = []
-
-    for span in useful_spans + unuseful_spans:
-        if span['type'] in [ContentType.Text]:
-            span['chars'] = []
-            new_spans.append(span)
-
-    need_ocr_spans = fill_char_in_spans(new_spans, all_pymu_chars)
-
-    if len(need_ocr_spans) > 0:
-
-        # 初始化ocr模型
-        # atom_model_manager = AtomModelSingleton()
-        # ocr_model = atom_model_manager.get_atom_model(
-        #     atom_model_name='ocr',
-        #     ocr_show_log=False,
-        #     det_db_box_thresh=0.3,
-        #     lang=lang
-        # )
-
-        for span in need_ocr_spans:
-            # 对span的bbox截图再ocr
-            span_img = cut_image_to_pil_image(span['bbox'], pdf_page, mode='cv2')
-
-            # 计算span的对比度，低于0.20的span不进行ocr
-            if calculate_contrast(span_img, img_mode='bgr') <= 0.17:
-                spans.remove(span)
-                continue
-                # pass
-
-            span['content'] = ''
-            span['score'] = 1
-            span['np_img'] = span_img
-
-
-            # ocr_res = ocr_model.ocr(span_img, det=False)
-            # if ocr_res and len(ocr_res) > 0:
-            #     if len(ocr_res[0]) > 0:
-            #         ocr_text, ocr_score = ocr_res[0][0]
-            #         # logger.info(f"ocr_text: {ocr_text}, ocr_score: {ocr_score}")
-            #         if ocr_score > 0.5 and len(ocr_text) > 0:
-            #             span['content'] = ocr_text
-            #             span['score'] = float(round(ocr_score, 2))
-            #         else:
-            #             spans.remove(span)
-
-    return spans
-
-
-def model_init(model_name: str):
-    from transformers import LayoutLMv3ForTokenClassification
-    device_name = get_device()
-    bf_16_support = False
-    if device_name.startswith("cuda"):
-        bf_16_support = torch.cuda.is_bf16_supported()
-    elif device_name.startswith("mps"):
-        bf_16_support = True
-
-    device = torch.device(device_name)
-    if model_name == 'layoutreader':
-        # 检测modelscope的缓存目录是否存在
-        layoutreader_model_dir = get_local_layoutreader_model_dir()
-        if os.path.exists(layoutreader_model_dir):
-            model = LayoutLMv3ForTokenClassification.from_pretrained(
-                layoutreader_model_dir
-            )
-        else:
-            logger.warning(
-                'local layoutreader model not exists, use online model from huggingface'
-            )
-            model = LayoutLMv3ForTokenClassification.from_pretrained(
-                'hantian/layoutreader'
-            )
-        if bf_16_support:
-            model.to(device).eval().bfloat16()
-        else:
-            model.to(device).eval()
-    else:
-        logger.error('model name not allow')
-        exit(1)
-    return model
-
-
-class ModelSingleton:
-    _instance = None
-    _models = {}
-
-    def __new__(cls, *args, **kwargs):
-        if cls._instance is None:
-            cls._instance = super().__new__(cls)
-        return cls._instance
-
-    def get_model(self, model_name: str):
-        if model_name not in self._models:
-            self._models[model_name] = model_init(model_name=model_name)
-        return self._models[model_name]
-
-
-def do_predict(boxes: List[List[int]], model) -> List[int]:
-    from magic_pdf.model.sub_modules.reading_oreder.layoutreader.helpers import (
-        boxes2inputs, parse_logits, prepare_inputs)
-
-    with warnings.catch_warnings():
-        warnings.filterwarnings("ignore", category=FutureWarning, module="transformers")
-
-        inputs = boxes2inputs(boxes)
-        inputs = prepare_inputs(inputs, model)
-        logits = model(**inputs).logits.cpu().squeeze(0)
-    return parse_logits(logits, len(boxes))
-
-
-def cal_block_index(fix_blocks, sorted_bboxes):
-
-    if sorted_bboxes is not None:
-        # 使用layoutreader排序
-        for block in fix_blocks:
-            line_index_list = []
-            if len(block['lines']) == 0:
-                block['index'] = sorted_bboxes.index(block['bbox'])
-            else:
-                for line in block['lines']:
-                    line['index'] = sorted_bboxes.index(line['bbox'])
-                    line_index_list.append(line['index'])
-                median_value = statistics.median(line_index_list)
-                block['index'] = median_value
-
-            # 删除图表body block中的虚拟line信息, 并用real_lines信息回填
-            if block['type'] in [BlockType.ImageBody, BlockType.TableBody, BlockType.Title, BlockType.InterlineEquation]:
-                if 'real_lines' in block:
-                    block['virtual_lines'] = copy.deepcopy(block['lines'])
-                    block['lines'] = copy.deepcopy(block['real_lines'])
-                    del block['real_lines']
-    else:
-        # 使用xycut排序
-        block_bboxes = []
-        for block in fix_blocks:
-            # 如果block['bbox']任意值小于0，将其置为0
-            block['bbox'] = [max(0, x) for x in block['bbox']]
-            block_bboxes.append(block['bbox'])
-
-            # 删除图表body block中的虚拟line信息, 并用real_lines信息回填
-            if block['type'] in [BlockType.ImageBody, BlockType.TableBody, BlockType.Title, BlockType.InterlineEquation]:
-                if 'real_lines' in block:
-                    block['virtual_lines'] = copy.deepcopy(block['lines'])
-                    block['lines'] = copy.deepcopy(block['real_lines'])
-                    del block['real_lines']
-
-        import numpy as np
-
-        from magic_pdf.model.sub_modules.reading_oreder.layoutreader.xycut import \
-            recursive_xy_cut
-
-        random_boxes = np.array(block_bboxes)
-        np.random.shuffle(random_boxes)
-        res = []
-        recursive_xy_cut(np.asarray(random_boxes).astype(int), np.arange(len(block_bboxes)), res)
-        assert len(res) == len(block_bboxes)
-        sorted_boxes = random_boxes[np.array(res)].tolist()
-
-        for i, block in enumerate(fix_blocks):
-            block['index'] = sorted_boxes.index(block['bbox'])
-
-        # 生成line index
-        sorted_blocks = sorted(fix_blocks, key=lambda b: b['index'])
-        line_inedx = 1
-        for block in sorted_blocks:
-            for line in block['lines']:
-                line['index'] = line_inedx
-                line_inedx += 1
-
-    return fix_blocks
-
-
-def insert_lines_into_block(block_bbox, line_height, page_w, page_h):
-    # block_bbox是一个元组(x0, y0, x1, y1)，其中(x0, y0)是左下角坐标，(x1, y1)是右上角坐标
-    x0, y0, x1, y1 = block_bbox
-
-    block_height = y1 - y0
-    block_weight = x1 - x0
-
-    # 如果block高度小于n行正文，则直接返回block的bbox
-    if line_height * 2 < block_height:
-        if (
-            block_height > page_h * 0.25 and page_w * 0.5 > block_weight > page_w * 0.25
-        ):  # 可能是双列结构，可以切细点
-            lines = int(block_height / line_height)
-        else:
-            # 如果block的宽度超过0.4页面宽度，则将block分成3行(是一种复杂布局，图不能切的太细)
-            if block_weight > page_w * 0.4:
-                lines = 3
-            elif block_weight > page_w * 0.25:  # （可能是三列结构，也切细点）
-                lines = int(block_height / line_height)
-            else:  # 判断长宽比
-                if block_height / block_weight > 1.2:  # 细长的不分
-                    return [[x0, y0, x1, y1]]
-                else:  # 不细长的还是分成两行
-                    lines = 2
-
-        line_height = (y1 - y0) / lines
-
-        # 确定从哪个y位置开始绘制线条
-        current_y = y0
-
-        # 用于存储线条的位置信息[(x0, y), ...]
-        lines_positions = []
-
-        for i in range(lines):
-            lines_positions.append([x0, current_y, x1, current_y + line_height])
-            current_y += line_height
-        return lines_positions
-
-    else:
-        return [[x0, y0, x1, y1]]
-
-
-def sort_lines_by_model(fix_blocks, page_w, page_h, line_height, footnote_blocks):
-    page_line_list = []
-
-    def add_lines_to_block(b):
-        line_bboxes = insert_lines_into_block(b['bbox'], line_height, page_w, page_h)
-        b['lines'] = []
-        for line_bbox in line_bboxes:
-            b['lines'].append({'bbox': line_bbox, 'spans': []})
-        page_line_list.extend(line_bboxes)
-
-    for block in fix_blocks:
-        if block['type'] in [
-            BlockType.Text, BlockType.Title,
-            BlockType.ImageCaption, BlockType.ImageFootnote,
-            BlockType.TableCaption, BlockType.TableFootnote
-        ]:
-            if len(block['lines']) == 0:
-                add_lines_to_block(block)
-            elif block['type'] in [BlockType.Title] and len(block['lines']) == 1 and (block['bbox'][3] - block['bbox'][1]) > line_height * 2:
-                block['real_lines'] = copy.deepcopy(block['lines'])
-                add_lines_to_block(block)
-            else:
-                for line in block['lines']:
-                    bbox = line['bbox']
-                    page_line_list.append(bbox)
-        elif block['type'] in [BlockType.ImageBody, BlockType.TableBody, BlockType.InterlineEquation]:
-            block['real_lines'] = copy.deepcopy(block['lines'])
-            add_lines_to_block(block)
-
-    for block in footnote_blocks:
-        footnote_block = {'bbox': block[:4]}
-        add_lines_to_block(footnote_block)
-
-    if len(page_line_list) > 200:  # layoutreader最高支持512line
-        return None
-
-    # 使用layoutreader排序
-    x_scale = 1000.0 / page_w
-    y_scale = 1000.0 / page_h
-    boxes = []
-    # logger.info(f"Scale: {x_scale}, {y_scale}, Boxes len: {len(page_line_list)}")
-    for left, top, right, bottom in page_line_list:
-        if left < 0:
-            logger.warning(
-                f'left < 0, left: {left}, right: {right}, top: {top}, bottom: {bottom}, page_w: {page_w}, page_h: {page_h}'
-            )  # noqa: E501
-            left = 0
-        if right > page_w:
-            logger.warning(
-                f'right > page_w, left: {left}, right: {right}, top: {top}, bottom: {bottom}, page_w: {page_w}, page_h: {page_h}'
-            )  # noqa: E501
-            right = page_w
-        if top < 0:
-            logger.warning(
-                f'top < 0, left: {left}, right: {right}, top: {top}, bottom: {bottom}, page_w: {page_w}, page_h: {page_h}'
-            )  # noqa: E501
-            top = 0
-        if bottom > page_h:
-            logger.warning(
-                f'bottom > page_h, left: {left}, right: {right}, top: {top}, bottom: {bottom}, page_w: {page_w}, page_h: {page_h}'
-            )  # noqa: E501
-            bottom = page_h
-
-        left = round(left * x_scale)
-        top = round(top * y_scale)
-        right = round(right * x_scale)
-        bottom = round(bottom * y_scale)
-        assert (
-            1000 >= right >= left >= 0 and 1000 >= bottom >= top >= 0
-        ), f'Invalid box. right: {right}, left: {left}, bottom: {bottom}, top: {top}'  # noqa: E126, E121
-        boxes.append([left, top, right, bottom])
-    model_manager = ModelSingleton()
-    model = model_manager.get_model('layoutreader')
-    with torch.no_grad():
-        orders = do_predict(boxes, model)
-    sorted_bboxes = [page_line_list[i] for i in orders]
-
-    return sorted_bboxes
-
-
-def get_line_height(blocks):
-    page_line_height_list = []
-    for block in blocks:
-        if block['type'] in [
-            BlockType.Text, BlockType.Title,
-            BlockType.ImageCaption, BlockType.ImageFootnote,
-            BlockType.TableCaption, BlockType.TableFootnote
-        ]:
-            for line in block['lines']:
-                bbox = line['bbox']
-                page_line_height_list.append(int(bbox[3] - bbox[1]))
-    if len(page_line_height_list) > 0:
-        return statistics.median(page_line_height_list)
-    else:
-        return 10
-
-
-def process_groups(groups, body_key, caption_key, footnote_key):
-    body_blocks = []
-    caption_blocks = []
-    footnote_blocks = []
-    for i, group in enumerate(groups):
-        group[body_key]['group_id'] = i
-        body_blocks.append(group[body_key])
-        for caption_block in group[caption_key]:
-            caption_block['group_id'] = i
-            caption_blocks.append(caption_block)
-        for footnote_block in group[footnote_key]:
-            footnote_block['group_id'] = i
-            footnote_blocks.append(footnote_block)
-    return body_blocks, caption_blocks, footnote_blocks
-
-
-def process_block_list(blocks, body_type, block_type):
-    indices = [block['index'] for block in blocks]
-    median_index = statistics.median(indices)
-
-    body_bbox = next((block['bbox'] for block in blocks if block.get('type') == body_type), [])
-
-    return {
-        'type': block_type,
-        'bbox': body_bbox,
-        'blocks': blocks,
-        'index': median_index,
-    }
-
-
-def revert_group_blocks(blocks):
-    image_groups = {}
-    table_groups = {}
-    new_blocks = []
-    for block in blocks:
-        if block['type'] in [BlockType.ImageBody, BlockType.ImageCaption, BlockType.ImageFootnote]:
-            group_id = block['group_id']
-            if group_id not in image_groups:
-                image_groups[group_id] = []
-            image_groups[group_id].append(block)
-        elif block['type'] in [BlockType.TableBody, BlockType.TableCaption, BlockType.TableFootnote]:
-            group_id = block['group_id']
-            if group_id not in table_groups:
-                table_groups[group_id] = []
-            table_groups[group_id].append(block)
-        else:
-            new_blocks.append(block)
-
-    for group_id, blocks in image_groups.items():
-        new_blocks.append(process_block_list(blocks, BlockType.ImageBody, BlockType.Image))
-
-    for group_id, blocks in table_groups.items():
-        new_blocks.append(process_block_list(blocks, BlockType.TableBody, BlockType.Table))
-
-    return new_blocks
-
-
-def remove_outside_spans(spans, all_bboxes, all_discarded_blocks):
-    def get_block_bboxes(blocks, block_type_list):
-        return [block[0:4] for block in blocks if block[7] in block_type_list]
-
-    image_bboxes = get_block_bboxes(all_bboxes, [BlockType.ImageBody])
-    table_bboxes = get_block_bboxes(all_bboxes, [BlockType.TableBody])
-    other_block_type = []
-    for block_type in BlockType.__dict__.values():
-        if not isinstance(block_type, str):
-            continue
-        if block_type not in [BlockType.ImageBody, BlockType.TableBody]:
-            other_block_type.append(block_type)
-    other_block_bboxes = get_block_bboxes(all_bboxes, other_block_type)
-    discarded_block_bboxes = get_block_bboxes(all_discarded_blocks, [BlockType.Discarded])
-
-    new_spans = []
-
-    for span in spans:
-        span_bbox = span['bbox']
-        span_type = span['type']
-
-        if any(calculate_overlap_area_in_bbox1_area_ratio(span_bbox, block_bbox) > 0.4 for block_bbox in
-               discarded_block_bboxes):
-            new_spans.append(span)
-            continue
-
-        if span_type == ContentType.Image:
-            if any(calculate_overlap_area_in_bbox1_area_ratio(span_bbox, block_bbox) > 0.5 for block_bbox in
-                   image_bboxes):
-                new_spans.append(span)
-        elif span_type == ContentType.Table:
-            if any(calculate_overlap_area_in_bbox1_area_ratio(span_bbox, block_bbox) > 0.5 for block_bbox in
-                   table_bboxes):
-                new_spans.append(span)
-        else:
-            if any(calculate_overlap_area_in_bbox1_area_ratio(span_bbox, block_bbox) > 0.5 for block_bbox in
-                   other_block_bboxes):
-                new_spans.append(span)
-
-    return new_spans
-
-
-def parse_page_core(
-    page_doc: PageableData, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode, lang
-):
-    need_drop = False
-    drop_reason = []
-
-    """从magic_model对象中获取后面会用到的区块信息"""
-    img_groups = magic_model.get_imgs_v2(page_id)
-    table_groups = magic_model.get_tables_v2(page_id)
-
-    """对image和table的区块分组"""
-    img_body_blocks, img_caption_blocks, img_footnote_blocks = process_groups(
-        img_groups, 'image_body', 'image_caption_list', 'image_footnote_list'
-    )
-
-    table_body_blocks, table_caption_blocks, table_footnote_blocks = process_groups(
-        table_groups, 'table_body', 'table_caption_list', 'table_footnote_list'
-    )
-
-    discarded_blocks = magic_model.get_discarded(page_id)
-    text_blocks = magic_model.get_text_blocks(page_id)
-    title_blocks = magic_model.get_title_blocks(page_id)
-    inline_equations, interline_equations, interline_equation_blocks = magic_model.get_equations(page_id)
-    page_w, page_h = magic_model.get_page_size(page_id)
-
-    def merge_title_blocks(blocks, x_distance_threshold=0.1*page_w):
-        def merge_two_bbox(b1, b2):
-            x_min = min(b1['bbox'][0], b2['bbox'][0])
-            y_min = min(b1['bbox'][1], b2['bbox'][1])
-            x_max = max(b1['bbox'][2], b2['bbox'][2])
-            y_max = max(b1['bbox'][3], b2['bbox'][3])
-            return x_min, y_min, x_max, y_max
-
-        def merge_two_blocks(b1, b2):
-            # 合并两个标题块的边界框
-            b1['bbox'] = merge_two_bbox(b1, b2)
-
-            # 合并两个标题块的文本内容
-            line1 = b1['lines'][0]
-            line2 = b2['lines'][0]
-            line1['bbox'] = merge_two_bbox(line1, line2)
-            line1['spans'].extend(line2['spans'])
-
-            return b1, b2
-
-        # 按 y 轴重叠度聚集标题块
-        y_overlapping_blocks = []
-        title_bs = [b for b in blocks if b['type'] == BlockType.Title]
-        while title_bs:
-            block1 = title_bs.pop(0)
-            current_row = [block1]
-            to_remove = []
-            for block2 in title_bs:
-                if (
-                    __is_overlaps_y_exceeds_threshold(block1['bbox'], block2['bbox'], 0.9)
-                    and len(block1['lines']) == 1
-                    and len(block2['lines']) == 1
-                ):
-                    current_row.append(block2)
-                    to_remove.append(block2)
-            for b in to_remove:
-                title_bs.remove(b)
-            y_overlapping_blocks.append(current_row)
-
-        # 按x轴坐标排序并合并标题块
-        to_remove_blocks = []
-        for row in y_overlapping_blocks:
-            if len(row) == 1:
-                continue
-
-            # 按x轴坐标排序
-            row.sort(key=lambda x: x['bbox'][0])
-
-            merged_block = row[0]
-            for i in range(1, len(row)):
-                left_block = merged_block
-                right_block = row[i]
-
-                left_height = left_block['bbox'][3] - left_block['bbox'][1]
-                right_height = right_block['bbox'][3] - right_block['bbox'][1]
-
-                if (
-                    right_block['bbox'][0] - left_block['bbox'][2] < x_distance_threshold
-                    and left_height * 0.95 < right_height < left_height * 1.05
-                ):
-                    merged_block, to_remove_block = merge_two_blocks(merged_block, right_block)
-                    to_remove_blocks.append(to_remove_block)
-                else:
-                    merged_block = right_block
-
-        for b in to_remove_blocks:
-            blocks.remove(b)
-
-    """将所有区块的bbox整理到一起"""
-    # interline_equation_blocks参数不够准，后面切换到interline_equations上
-    interline_equation_blocks = []
-    if len(interline_equation_blocks) > 0:
-        all_bboxes, all_discarded_blocks, footnote_blocks = ocr_prepare_bboxes_for_layout_split_v2(
-            img_body_blocks, img_caption_blocks, img_footnote_blocks,
-            table_body_blocks, table_caption_blocks, table_footnote_blocks,
-            discarded_blocks,
-            text_blocks,
-            title_blocks,
-            interline_equation_blocks,
-            page_w,
-            page_h,
-        )
-    else:
-        all_bboxes, all_discarded_blocks, footnote_blocks = ocr_prepare_bboxes_for_layout_split_v2(
-            img_body_blocks, img_caption_blocks, img_footnote_blocks,
-            table_body_blocks, table_caption_blocks, table_footnote_blocks,
-            discarded_blocks,
-            text_blocks,
-            title_blocks,
-            interline_equations,
-            page_w,
-            page_h,
-        )
-
-    """获取所有的spans信息"""
-    spans = magic_model.get_all_spans(page_id)
-
-    """在删除重复span之前，应该通过image_body和table_body的block过滤一下image和table的span"""
-    """顺便删除大水印并保留abandon的span"""
-    spans = remove_outside_spans(spans, all_bboxes, all_discarded_blocks)
-
-    """删除重叠spans中置信度较低的那些"""
-    spans, dropped_spans_by_confidence = remove_overlaps_low_confidence_spans(spans)
-    """删除重叠spans中较小的那些"""
-    spans, dropped_spans_by_span_overlap = remove_overlaps_min_spans(spans)
-
-    """根据parse_mode，构造spans，主要是文本类的字符填充"""
-    if parse_mode == SupportedPdfParseMethod.TXT:
-
-        """使用新版本的混合ocr方案."""
-        spans = txt_spans_extract_v2(page_doc, spans, all_bboxes, all_discarded_blocks, lang)
-
-    elif parse_mode == SupportedPdfParseMethod.OCR:
-        pass
-    else:
-        raise Exception('parse_mode must be txt or ocr')
-
-    """先处理不需要排版的discarded_blocks"""
-    discarded_block_with_spans, spans = fill_spans_in_blocks(
-        all_discarded_blocks, spans, 0.4
-    )
-    fix_discarded_blocks = fix_discarded_block(discarded_block_with_spans)
-
-    """如果当前页面没有有效的bbox则跳过"""
-    if len(all_bboxes) == 0:
-        logger.warning(f'skip this page, not found useful bbox, page_id: {page_id}')
-        return ocr_construct_page_component_v2(
-            [],
-            [],
-            page_id,
-            page_w,
-            page_h,
-            [],
-            [],
-            [],
-            interline_equations,
-            fix_discarded_blocks,
-            need_drop,
-            drop_reason,
-        )
-
-    """对image和table截图"""
-    spans = ocr_cut_image_and_table(
-        spans, page_doc, page_id, pdf_bytes_md5, imageWriter
-    )
-
-    """span填充进block"""
-    block_with_spans, spans = fill_spans_in_blocks(all_bboxes, spans, 0.5)
-
-    """对block进行fix操作"""
-    fix_blocks = fix_block_spans_v2(block_with_spans)
-
-    """同一行被断开的titile合并"""
-    merge_title_blocks(fix_blocks)
-
-    """获取所有line并计算正文line的高度"""
-    line_height = get_line_height(fix_blocks)
-
-    """获取所有line并对line排序"""
-    sorted_bboxes = sort_lines_by_model(fix_blocks, page_w, page_h, line_height, footnote_blocks)
-
-    """根据line的中位数算block的序列关系"""
-    fix_blocks = cal_block_index(fix_blocks, sorted_bboxes)
-
-    """将image和table的block还原回group形式参与后续流程"""
-    fix_blocks = revert_group_blocks(fix_blocks)
-
-    """重排block"""
-    sorted_blocks = sorted(fix_blocks, key=lambda b: b['index'])
-
-    """block内重排(img和table的block内多个caption或footnote的排序)"""
-    for block in sorted_blocks:
-        if block['type'] in [BlockType.Image, BlockType.Table]:
-            block['blocks'] = sorted(block['blocks'], key=lambda b: b['index'])
-
-    """获取QA需要外置的list"""
-    images, tables, interline_equations = get_qa_need_list_v2(sorted_blocks)
-
-    """构造pdf_info_dict"""
-    page_info = ocr_construct_page_component_v2(
-        sorted_blocks,
-        [],
-        page_id,
-        page_w,
-        page_h,
-        [],
-        images,
-        tables,
-        interline_equations,
-        fix_discarded_blocks,
-        need_drop,
-        drop_reason,
-    )
-    return page_info
-
-
-def pdf_parse_union(
-    model_list,
-    dataset: Dataset,
-    imageWriter,
-    parse_mode,
-    start_page_id=0,
-    end_page_id=None,
-    debug_mode=False,
-    lang=None,
-):
-
-    pdf_bytes_md5 = compute_md5(dataset.data_bits())
-
-    """初始化空的pdf_info_dict"""
-    pdf_info_dict = {}
-
-    """用model_list和docs对象初始化magic_model"""
-    magic_model = MagicModel(model_list, dataset)
-
-    """根据输入的起始范围解析pdf"""
-    end_page_id = (
-        end_page_id
-        if end_page_id is not None and end_page_id >= 0
-        else len(dataset) - 1
-    )
-
-    if end_page_id > len(dataset) - 1:
-        logger.warning('end_page_id is out of range, use pdf_docs length')
-        end_page_id = len(dataset) - 1
-
-    # """初始化启动时间"""
-    # start_time = time.time()
-
-    # for page_id, page in enumerate(dataset):
-    for page_id, page in tqdm(enumerate(dataset), total=len(dataset), desc="Processing pages"):
-        # """debug时输出每页解析的耗时."""
-        # if debug_mode:
-            # time_now = time.time()
-            # logger.info(
-            #     f'page_id: {page_id}, last_page_cost_time: {round(time.time() - start_time, 2)}'
-            # )
-            # start_time = time_now
-
-        """解析pdf中的每一页"""
-        if start_page_id <= page_id <= end_page_id:
-            page_info = parse_page_core(
-                page, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode, lang
-            )
-        else:
-            page_info = page.get_page_info()
-            page_w = page_info.w
-            page_h = page_info.h
-            page_info = ocr_construct_page_component_v2(
-                [], [], page_id, page_w, page_h, [], [], [], [], [], True, 'skip page'
-            )
-        pdf_info_dict[f'page_{page_id}'] = page_info
-
-    need_ocr_list = []
-    img_crop_list = []
-    text_block_list = []
-    for pange_id, page_info in pdf_info_dict.items():
-        for block in page_info['preproc_blocks']:
-            if block['type'] in ['table', 'image']:
-                for sub_block in block['blocks']:
-                    if sub_block['type'] in ['image_caption', 'image_footnote', 'table_caption', 'table_footnote']:
-                        text_block_list.append(sub_block)
-            elif block['type'] in ['text', 'title']:
-                text_block_list.append(block)
-        for block in page_info['discarded_blocks']:
-            text_block_list.append(block)
-    for block in text_block_list:
-        for line in block['lines']:
-            for span in line['spans']:
-                if 'np_img' in span:
-                    need_ocr_list.append(span)
-                    img_crop_list.append(span['np_img'])
-                    span.pop('np_img')
-    if len(img_crop_list) > 0:
-        # Get OCR results for this language's images
-        atom_model_manager = AtomModelSingleton()
-        ocr_model = atom_model_manager.get_atom_model(
-            atom_model_name='ocr',
-            ocr_show_log=False,
-            det_db_box_thresh=0.3,
-            lang=lang
-        )
-        # rec_start = time.time()
-        ocr_res_list = ocr_model.ocr(img_crop_list, det=False, tqdm_enable=True)[0]
-        # Verify we have matching counts
-        assert len(ocr_res_list) == len(need_ocr_list), f'ocr_res_list: {len(ocr_res_list)}, need_ocr_list: {len(need_ocr_list)}'
-        # Process OCR results for this language
-        for index, span in enumerate(need_ocr_list):
-            ocr_text, ocr_score = ocr_res_list[index]
-            span['content'] = ocr_text
-            span['score'] = float(f"{ocr_score:.3f}")
-        # rec_time = time.time() - rec_start
-        # logger.info(f'ocr-dynamic-rec time: {round(rec_time, 2)}, total images processed: {len(img_crop_list)}')
-
-
-    """分段"""
-    para_split(pdf_info_dict)
-
-    """llm优化"""
-    llm_aided_config = get_llm_aided_config()
-    if llm_aided_config is not None:
-        """公式优化"""
-        formula_aided_config = llm_aided_config.get('formula_aided', None)
-        if formula_aided_config is not None:
-            if formula_aided_config.get('enable', False):
-                llm_aided_formula_start_time = time.time()
-                llm_aided_formula(pdf_info_dict, formula_aided_config)
-                logger.info(f'llm aided formula time: {round(time.time() - llm_aided_formula_start_time, 2)}')
-        """文本优化"""
-        text_aided_config = llm_aided_config.get('text_aided', None)
-        if text_aided_config is not None:
-            if text_aided_config.get('enable', False):
-                llm_aided_text_start_time = time.time()
-                llm_aided_text(pdf_info_dict, text_aided_config)
-                logger.info(f'llm aided text time: {round(time.time() - llm_aided_text_start_time, 2)}')
-        """标题优化"""
-        title_aided_config = llm_aided_config.get('title_aided', None)
-        if title_aided_config is not None:
-            if title_aided_config.get('enable', False):
-                llm_aided_title_start_time = time.time()
-                llm_aided_title(pdf_info_dict, title_aided_config)
-                logger.info(f'llm aided title time: {round(time.time() - llm_aided_title_start_time, 2)}')
-
-    """dict转list"""
-    pdf_info_list = dict_to_list(pdf_info_dict)
-    new_pdf_info_dict = {
-        'pdf_info': pdf_info_list,
-    }
-
-    clean_memory(get_device())
-
-    return new_pdf_info_dict
-
-
-if __name__ == '__main__':
-    pass
diff --git a/magic_pdf/post_proc/llm_aided.py b/magic_pdf/post_proc/llm_aided.py
deleted file mode 100644
index c37481b3298a654b4596383e565f4514734d2dec..0000000000000000000000000000000000000000
--- a/magic_pdf/post_proc/llm_aided.py
+++ /dev/null
@@ -1,164 +0,0 @@
-# Copyright (c) Opendatalab. All rights reserved.
-import json
-from loguru import logger
-from magic_pdf.dict2md.ocr_mkcontent import merge_para_with_text
-from openai import OpenAI
-import ast
-
-
-#@todo: 有的公式以"\"结尾，这样会导致尾部拼接的"$"被转义，也需要修复
-formula_optimize_prompt = """请根据以下指南修正LaTeX公式的错误，确保公式能够渲染且符合原始内容：
-
-1. 修正渲染或编译错误：
-    - Some syntax errors such as mismatched/missing/extra tokens. Your task is to fix these syntax errors and make sure corrected results conform to latex math syntax principles.
-    - 包含KaTeX不支持的关键词等原因导致的无法编译或渲染的错误
-
-2. 保留原始信息：
-   - 保留原始公式中的所有重要信息
-   - 不要添加任何原始公式中没有的新信息
-
-IMPORTANT:请仅返回修正后的公式，不要包含任何介绍、解释或元数据。
-
-LaTeX recognition result:
-$FORMULA
-
-Your corrected result:
-"""
-
-text_optimize_prompt = f"""请根据以下指南修正OCR引起的错误，确保文本连贯并符合原始内容：
-
-1. 修正OCR引起的拼写错误和错误：
-   - 修正常见的OCR错误（例如，'rn' 被误读为 'm'）
-   - 使用上下文和常识进行修正
-   - 只修正明显的错误，不要不必要的修改内容
-   - 不要添加额外的句号或其他不必要的标点符号
-
-2. 保持原始结构：
-   - 保留所有标题和子标题
-
-3. 保留原始内容：
-   - 保留原始文本中的所有重要信息
-   - 不要添加任何原始文本中没有的新信息
-   - 保留段落之间的换行符
-
-4. 保持连贯性：
-   - 确保内容与前文顺畅连接
-   - 适当处理在句子中间开始或结束的文本
-   
-5. 修正行内公式：
-   - 去除行内公式前后多余的空格
-   - 修正公式中的OCR错误
-   - 确保公式能够通过KaTeX渲染
-   
-6. 修正全角字符
-    - 修正全角标点符号为半角标点符号
-    - 修正全角字母为半角字母
-    - 修正全角数字为半角数字
-
-IMPORTANT:请仅返回修正后的文本，保留所有原始格式，包括换行符。不要包含任何介绍、解释或元数据。
-
-Previous context:
-
-Current chunk to process:
-
-Corrected text:
-"""
-
-def llm_aided_formula(pdf_info_dict, formula_aided_config):
-    pass
-
-def llm_aided_text(pdf_info_dict, text_aided_config):
-    pass
-
-def llm_aided_title(pdf_info_dict, title_aided_config):
-    client = OpenAI(
-        api_key=title_aided_config["api_key"],
-        base_url=title_aided_config["base_url"],
-    )
-    title_dict = {}
-    origin_title_list = []
-    i = 0
-    for page_num, page in pdf_info_dict.items():
-        blocks = page["para_blocks"]
-        for block in blocks:
-            if block["type"] == "title":
-                origin_title_list.append(block)
-                title_text = merge_para_with_text(block)
-                page_line_height_list = []
-                for line in block['lines']:
-                    bbox = line['bbox']
-                    page_line_height_list.append(int(bbox[3] - bbox[1]))
-                if len(page_line_height_list) > 0:
-                    line_avg_height = sum(page_line_height_list) / len(page_line_height_list)
-                else:
-                    line_avg_height = int(block['bbox'][3] - block['bbox'][1])
-                title_dict[f"{i}"] = [title_text, line_avg_height, int(page_num[5:])+1]
-                i += 1
-    # logger.info(f"Title list: {title_dict}")
-
-    title_optimize_prompt = f"""输入的内容是一篇文档中所有标题组成的字典，请根据以下指南优化标题的结果，使结果符合正常文档的层次结构：
-
-1. 字典中每个value均为一个list，包含以下元素：
-    - 标题文本
-    - 文本行高是标题所在块的平均行高
-    - 标题所在的页码
-
-2. 保留原始内容：
-    - 输入的字典中所有元素都是有效的，不能删除字典中的任何元素
-    - 请务必保证输出的字典中元素的数量和输入的数量一致
-
-3. 保持字典内key-value的对应关系不变
-
-4. 优化层次结构：
-    - 为每个标题元素添加适当的层次结构
-    - 行高较大的标题一般是更高级别的标题
-    - 标题从前至后的层级必须是连续的，不能跳过层级
-    - 标题层级最多为4级，不要添加过多的层级
-    - 优化后的标题只保留代表该标题的层级的整数，不要保留其他信息
-    
-5. 合理性检查与微调：
-    - 在完成初步分级后，仔细检查分级结果的合理性
-    - 根据上下文关系和逻辑顺序，对不合理的分级进行微调
-    - 确保最终的分级结果符合文档的实际结构和逻辑
-    - 字典中可能包含被误当成标题的正文，你可以通过将其层级标记为 0 来排除它们
-    
-IMPORTANT: 
-请直接返回优化过的由标题层级组成的字典，格式为{{标题id:标题层级}}，如下：
-{{0:1,1:2,2:2,3:3}}
-不需要对字典格式化，不需要返回任何其他信息。
-
-Input title list:
-{title_dict}
-
-Corrected title list:
-"""
-
-    retry_count = 0
-    max_retries = 3
-    dict_completion = None
-
-    while retry_count < max_retries:
-        try:
-            completion = client.chat.completions.create(
-                model=title_aided_config["model"],
-                messages=[
-                    {'role': 'user', 'content': title_optimize_prompt}],
-                temperature=0.7,
-            )
-            # logger.info(f"Title completion: {completion.choices[0].message.content}")
-            dict_completion = ast.literal_eval(completion.choices[0].message.content)
-            # logger.info(f"len(dict_completion): {len(dict_completion)}, len(title_dict): {len(title_dict)}")
-
-            if len(dict_completion) == len(title_dict):
-                for i, origin_title_block in enumerate(origin_title_list):
-                    origin_title_block["level"] = int(dict_completion[i])
-                break
-            else:
-                logger.warning("The number of titles in the optimized result is not equal to the number of titles in the input.")
-                retry_count += 1
-        except Exception as e:
-            logger.exception(e)
-            retry_count += 1
-
-    if dict_completion is None:
-        logger.error("Failed to decode dict after maximum retries.")
diff --git a/magic_pdf/post_proc/para_split_v3.py b/magic_pdf/post_proc/para_split_v3.py
deleted file mode 100644
index 5f6852a69c40bbf3e6bd2d42c2c0218ce88ab280..0000000000000000000000000000000000000000
--- a/magic_pdf/post_proc/para_split_v3.py
+++ /dev/null
@@ -1,394 +0,0 @@
-import copy
-
-from loguru import logger
-
-from magic_pdf.config.constants import CROSS_PAGE, LINES_DELETED
-from magic_pdf.config.ocr_content_type import BlockType, ContentType
-from magic_pdf.libs.language import detect_lang
-
-LINE_STOP_FLAG = (
-    '.',
-    '!',
-    '?',
-    '。',
-    '！',
-    '？',
-    ')',
-    '）',
-    '"',
-    '”',
-    ':',
-    '：',
-    ';',
-    '；',
-)
-LIST_END_FLAG = ('.', '。', ';', '；')
-
-
-class ListLineTag:
-    IS_LIST_START_LINE = 'is_list_start_line'
-    IS_LIST_END_LINE = 'is_list_end_line'
-
-
-def __process_blocks(blocks):
-    # 对所有block预处理
-    # 1.通过title和interline_equation将block分组
-    # 2.bbox边界根据line信息重置
-
-    result = []
-    current_group = []
-
-    for i in range(len(blocks)):
-        current_block = blocks[i]
-
-        # 如果当前块是 text 类型
-        if current_block['type'] == 'text':
-            current_block['bbox_fs'] = copy.deepcopy(current_block['bbox'])
-            if 'lines' in current_block and len(current_block['lines']) > 0:
-                current_block['bbox_fs'] = [
-                    min([line['bbox'][0] for line in current_block['lines']]),
-                    min([line['bbox'][1] for line in current_block['lines']]),
-                    max([line['bbox'][2] for line in current_block['lines']]),
-                    max([line['bbox'][3] for line in current_block['lines']]),
-                ]
-            current_group.append(current_block)
-
-        # 检查下一个块是否存在
-        if i + 1 < len(blocks):
-            next_block = blocks[i + 1]
-            # 如果下一个块不是 text 类型且是 title 或 interline_equation 类型
-            if next_block['type'] in ['title', 'interline_equation']:
-                result.append(current_group)
-                current_group = []
-
-    # 处理最后一个 group
-    if current_group:
-        result.append(current_group)
-
-    return result
-
-
-def __is_list_or_index_block(block):
-    # 一个block如果是list block 应该同时满足以下特征
-    # 1.block内有多个line 2.block 内有多个line左侧顶格写 3.block内有多个line 右侧不顶格（狗牙状）
-    # 1.block内有多个line 2.block 内有多个line左侧顶格写 3.多个line以endflag结尾
-    # 1.block内有多个line 2.block 内有多个line左侧顶格写 3.block内有多个line 左侧不顶格
-
-    # index block 是一种特殊的list block
-    # 一个block如果是index block 应该同时满足以下特征
-    # 1.block内有多个line 2.block 内有多个line两侧均顶格写 3.line的开头或者结尾均为数字
-    if len(block['lines']) >= 2:
-        first_line = block['lines'][0]
-        line_height = first_line['bbox'][3] - first_line['bbox'][1]
-        block_weight = block['bbox_fs'][2] - block['bbox_fs'][0]
-        block_height = block['bbox_fs'][3] - block['bbox_fs'][1]
-        page_weight, page_height = block['page_size']
-
-        left_close_num = 0
-        left_not_close_num = 0
-        right_not_close_num = 0
-        right_close_num = 0
-        lines_text_list = []
-        center_close_num = 0
-        external_sides_not_close_num = 0
-        multiple_para_flag = False
-        last_line = block['lines'][-1]
-
-        if page_weight == 0:
-            block_weight_radio = 0
-        else:
-            block_weight_radio = block_weight / page_weight
-        # logger.info(f"block_weight_radio: {block_weight_radio}")
-
-        # 如果首行左边不顶格而右边顶格,末行左边顶格而右边不顶格 （第一行可能可以右边不顶格）
-        if (
-            first_line['bbox'][0] - block['bbox_fs'][0] > line_height / 2
-            and abs(last_line['bbox'][0] - block['bbox_fs'][0]) < line_height / 2
-            and block['bbox_fs'][2] - last_line['bbox'][2] > line_height
-        ):
-            multiple_para_flag = True
-
-        block_text = ''
-
-        for line in block['lines']:
-            line_text = ''
-
-            for span in line['spans']:
-                span_type = span['type']
-                if span_type == ContentType.Text:
-                    line_text += span['content'].strip()
-            # 添加所有文本，包括空行，保持与block['lines']长度一致
-            lines_text_list.append(line_text)
-            block_text = ''.join(lines_text_list)
-
-        block_lang = detect_lang(block_text)
-        # logger.info(f"block_lang: {block_lang}")
-
-        for line in block['lines']:
-            line_mid_x = (line['bbox'][0] + line['bbox'][2]) / 2
-            block_mid_x = (block['bbox_fs'][0] + block['bbox_fs'][2]) / 2
-            if (
-                line['bbox'][0] - block['bbox_fs'][0] > 0.7 * line_height
-                and block['bbox_fs'][2] - line['bbox'][2] > 0.7 * line_height
-            ):
-                external_sides_not_close_num += 1
-            if abs(line_mid_x - block_mid_x) < line_height / 2:
-                center_close_num += 1
-
-            # 计算line左侧顶格数量是否大于2，是否顶格用abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height/2 来判断
-            if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:
-                left_close_num += 1
-            elif line['bbox'][0] - block['bbox_fs'][0] > line_height:
-                left_not_close_num += 1
-
-            # 计算右侧是否顶格
-            if abs(block['bbox_fs'][2] - line['bbox'][2]) < line_height:
-                right_close_num += 1
-            else:
-                # 类中文没有超长单词的情况，可以用统一的阈值
-                if block_lang in ['zh', 'ja', 'ko']:
-                    closed_area = 0.26 * block_weight
-                else:
-                    # 右侧不顶格情况下是否有一段距离，拍脑袋用0.3block宽度做阈值
-                    # block宽的阈值可以小些，block窄的阈值要大
-                    if block_weight_radio >= 0.5:
-                        closed_area = 0.26 * block_weight
-                    else:
-                        closed_area = 0.36 * block_weight
-                if block['bbox_fs'][2] - line['bbox'][2] > closed_area:
-                    right_not_close_num += 1
-
-        # 判断lines_text_list中的元素是否有超过80%都以LIST_END_FLAG结尾
-        line_end_flag = False
-        # 判断lines_text_list中的元素是否有超过80%都以数字开头或都以数字结尾
-        line_num_flag = False
-        num_start_count = 0
-        num_end_count = 0
-        flag_end_count = 0
-
-        if len(lines_text_list) > 0:
-            for line_text in lines_text_list:
-                if len(line_text) > 0:
-                    if line_text[-1] in LIST_END_FLAG:
-                        flag_end_count += 1
-                    if line_text[0].isdigit():
-                        num_start_count += 1
-                    if line_text[-1].isdigit():
-                        num_end_count += 1
-
-            if (
-                num_start_count / len(lines_text_list) >= 0.8
-                or num_end_count / len(lines_text_list) >= 0.8
-            ):
-                line_num_flag = True
-            if flag_end_count / len(lines_text_list) >= 0.8:
-                line_end_flag = True
-
-        # 有的目录右侧不贴边, 目前认为左边或者右边有一边全贴边，且符合数字规则极为index
-        if (
-            left_close_num / len(block['lines']) >= 0.8
-            or right_close_num / len(block['lines']) >= 0.8
-        ) and line_num_flag:
-            for line in block['lines']:
-                line[ListLineTag.IS_LIST_START_LINE] = True
-            return BlockType.Index
-
-        # 全部line都居中的特殊list识别，每行都需要换行，特征是多行，且大多数行都前后not_close,每line中点x坐标接近
-        # 补充条件block的长宽比有要求
-        elif (
-            external_sides_not_close_num >= 2
-            and center_close_num == len(block['lines'])
-            and external_sides_not_close_num / len(block['lines']) >= 0.5
-            and block_height / block_weight > 0.4
-        ):
-            for line in block['lines']:
-                line[ListLineTag.IS_LIST_START_LINE] = True
-            return BlockType.List
-
-        elif (
-            left_close_num >= 2
-            and (right_not_close_num >= 2 or line_end_flag or left_not_close_num >= 2)
-            and not multiple_para_flag
-            # and block_weight_radio > 0.27
-        ):
-            # 处理一种特殊的没有缩进的list，所有行都贴左边，通过右边的空隙判断是否是item尾
-            if left_close_num / len(block['lines']) > 0.8:
-                # 这种是每个item只有一行，且左边都贴边的短item list
-                if flag_end_count == 0 and right_close_num / len(block['lines']) < 0.5:
-                    for line in block['lines']:
-                        if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:
-                            line[ListLineTag.IS_LIST_START_LINE] = True
-                # 这种是大部分line item 都有结束标识符的情况，按结束标识符区分不同item
-                elif line_end_flag:
-                    for i, line in enumerate(block['lines']):
-                        if (
-                            len(lines_text_list[i]) > 0
-                            and lines_text_list[i][-1] in LIST_END_FLAG
-                        ):
-                            line[ListLineTag.IS_LIST_END_LINE] = True
-                            if i + 1 < len(block['lines']):
-                                block['lines'][i + 1][
-                                    ListLineTag.IS_LIST_START_LINE
-                                ] = True
-                # line item基本没有结束标识符，而且也没有缩进，按右侧空隙判断哪些是item end
-                else:
-                    line_start_flag = False
-                    for i, line in enumerate(block['lines']):
-                        if line_start_flag:
-                            line[ListLineTag.IS_LIST_START_LINE] = True
-                            line_start_flag = False
-
-                        if (
-                            abs(block['bbox_fs'][2] - line['bbox'][2])
-                            > 0.1 * block_weight
-                        ):
-                            line[ListLineTag.IS_LIST_END_LINE] = True
-                            line_start_flag = True
-            # 一种有缩进的特殊有序list,start line 左侧不贴边且以数字开头，end line 以 IS_LIST_END_FLAG 结尾且数量和start line 一致
-            elif num_start_count >= 2 and num_start_count == flag_end_count:
-                for i, line in enumerate(block['lines']):
-                    if len(lines_text_list[i]) > 0:
-                        if lines_text_list[i][0].isdigit():
-                            line[ListLineTag.IS_LIST_START_LINE] = True
-                        if lines_text_list[i][-1] in LIST_END_FLAG:
-                            line[ListLineTag.IS_LIST_END_LINE] = True
-            else:
-                # 正常有缩进的list处理
-                for line in block['lines']:
-                    if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:
-                        line[ListLineTag.IS_LIST_START_LINE] = True
-                    if abs(block['bbox_fs'][2] - line['bbox'][2]) > line_height:
-                        line[ListLineTag.IS_LIST_END_LINE] = True
-
-            return BlockType.List
-        else:
-            return BlockType.Text
-    else:
-        return BlockType.Text
-
-
-def __merge_2_text_blocks(block1, block2):
-    if len(block1['lines']) > 0:
-        first_line = block1['lines'][0]
-        line_height = first_line['bbox'][3] - first_line['bbox'][1]
-        block1_weight = block1['bbox'][2] - block1['bbox'][0]
-        block2_weight = block2['bbox'][2] - block2['bbox'][0]
-        min_block_weight = min(block1_weight, block2_weight)
-        if abs(block1['bbox_fs'][0] - first_line['bbox'][0]) < line_height / 2:
-            last_line = block2['lines'][-1]
-            if len(last_line['spans']) > 0:
-                last_span = last_line['spans'][-1]
-                line_height = last_line['bbox'][3] - last_line['bbox'][1]
-                if len(first_line['spans']) > 0:
-                    first_span = first_line['spans'][0]
-                    if len(first_span['content']) > 0:
-                        span_start_with_num = first_span['content'][0].isdigit()
-                        span_start_with_big_char = first_span['content'][0].isupper()
-                        if (
-                            # 上一个block的最后一个line的右边界和block的右边界差距不超过line_height
-                            abs(block2['bbox_fs'][2] - last_line['bbox'][2]) < line_height
-                            # 上一个block的最后一个span不是以特定符号结尾
-                            and not last_span['content'].endswith(LINE_STOP_FLAG)
-                            # 两个block宽度差距超过2倍也不合并
-                            and abs(block1_weight - block2_weight) < min_block_weight
-                            # 下一个block的第一个字符是数字
-                            and not span_start_with_num
-                            # 下一个block的第一个字符是大写字母
-                            and not span_start_with_big_char
-                        ):
-                            if block1['page_num'] != block2['page_num']:
-                                for line in block1['lines']:
-                                    for span in line['spans']:
-                                        span[CROSS_PAGE] = True
-                            block2['lines'].extend(block1['lines'])
-                            block1['lines'] = []
-                            block1[LINES_DELETED] = True
-
-    return block1, block2
-
-
-def __merge_2_list_blocks(block1, block2):
-    if block1['page_num'] != block2['page_num']:
-        for line in block1['lines']:
-            for span in line['spans']:
-                span[CROSS_PAGE] = True
-    block2['lines'].extend(block1['lines'])
-    block1['lines'] = []
-    block1[LINES_DELETED] = True
-
-    return block1, block2
-
-
-def __is_list_group(text_blocks_group):
-    # list group的特征是一个group内的所有block都满足以下条件
-    # 1.每个block都不超过3行 2. 每个block 的左边界都比较接近(逻辑简单点先不加这个规则)
-    for block in text_blocks_group:
-        if len(block['lines']) > 3:
-            return False
-    return True
-
-
-def __para_merge_page(blocks):
-    page_text_blocks_groups = __process_blocks(blocks)
-    for text_blocks_group in page_text_blocks_groups:
-        if len(text_blocks_group) > 0:
-            # 需要先在合并前对所有block判断是否为list or index block
-            for block in text_blocks_group:
-                block_type = __is_list_or_index_block(block)
-                block['type'] = block_type
-                # logger.info(f"{block['type']}:{block}")
-
-        if len(text_blocks_group) > 1:
-            # 在合并前判断这个group 是否是一个 list group
-            is_list_group = __is_list_group(text_blocks_group)
-
-            # 倒序遍历
-            for i in range(len(text_blocks_group) - 1, -1, -1):
-                current_block = text_blocks_group[i]
-
-                # 检查是否有前一个块
-                if i - 1 >= 0:
-                    prev_block = text_blocks_group[i - 1]
-
-                    if (
-                        current_block['type'] == 'text'
-                        and prev_block['type'] == 'text'
-                        and not is_list_group
-                    ):
-                        __merge_2_text_blocks(current_block, prev_block)
-                    elif (
-                        current_block['type'] == BlockType.List
-                        and prev_block['type'] == BlockType.List
-                    ) or (
-                        current_block['type'] == BlockType.Index
-                        and prev_block['type'] == BlockType.Index
-                    ):
-                        __merge_2_list_blocks(current_block, prev_block)
-
-        else:
-            continue
-
-
-def para_split(pdf_info_dict):
-    all_blocks = []
-    for page_num, page in pdf_info_dict.items():
-        blocks = copy.deepcopy(page['preproc_blocks'])
-        for block in blocks:
-            block['page_num'] = page_num
-            block['page_size'] = page['page_size']
-        all_blocks.extend(blocks)
-
-    __para_merge_page(all_blocks)
-    for page_num, page in pdf_info_dict.items():
-        page['para_blocks'] = []
-        for block in all_blocks:
-            if block['page_num'] == page_num:
-                page['para_blocks'].append(block)
-
-
-if __name__ == '__main__':
-    input_blocks = []
-    # 调用函数
-    groups = __process_blocks(input_blocks)
-    for group_index, group in enumerate(groups):
-        print(f'Group {group_index}: {group}')
diff --git a/magic_pdf/pre_proc/__init__.py b/magic_pdf/pre_proc/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/pre_proc/construct_page_dict.py b/magic_pdf/pre_proc/construct_page_dict.py
deleted file mode 100644
index 09c09c137bc75c869c4d6f58594bb713c6944ec8..0000000000000000000000000000000000000000
--- a/magic_pdf/pre_proc/construct_page_dict.py
+++ /dev/null
@@ -1,17 +0,0 @@
-
-def ocr_construct_page_component_v2(blocks, layout_bboxes, page_id, page_w, page_h, layout_tree,
-                                    images, tables, interline_equations, discarded_blocks, need_drop, drop_reason):
-    return_dict = {
-        'preproc_blocks': blocks,
-        'layout_bboxes': layout_bboxes,
-        'page_idx': page_id,
-        'page_size': [page_w, page_h],
-        '_layout_tree': layout_tree,
-        'images': images,
-        'tables': tables,
-        'interline_equations': interline_equations,
-        'discarded_blocks': discarded_blocks,
-        'need_drop': need_drop,
-        'drop_reason': drop_reason,
-    }
-    return return_dict
diff --git a/magic_pdf/pre_proc/cut_image.py b/magic_pdf/pre_proc/cut_image.py
deleted file mode 100644
index 901d372ec9ee6a5c6d9cdd0e25cf3b683f9179a9..0000000000000000000000000000000000000000
--- a/magic_pdf/pre_proc/cut_image.py
+++ /dev/null
@@ -1,32 +0,0 @@
-from loguru import logger
-
-from magic_pdf.config.ocr_content_type import ContentType
-from magic_pdf.libs.commons import join_path
-from magic_pdf.libs.pdf_image_tools import cut_image
-
-
-def ocr_cut_image_and_table(spans, page, page_id, pdf_bytes_md5, imageWriter):
-    def return_path(type):
-        return join_path(pdf_bytes_md5, type)
-
-    for span in spans:
-        span_type = span['type']
-        if span_type == ContentType.Image:
-            if not check_img_bbox(span['bbox']) or not imageWriter:
-                continue
-            span['image_path'] = cut_image(span['bbox'], page_id, page, return_path=return_path('images'),
-                                           imageWriter=imageWriter)
-        elif span_type == ContentType.Table:
-            if not check_img_bbox(span['bbox']) or not imageWriter:
-                continue
-            span['image_path'] = cut_image(span['bbox'], page_id, page, return_path=return_path('tables'),
-                                           imageWriter=imageWriter)
-
-    return spans
-
-
-def check_img_bbox(bbox) -> bool:
-    if any([bbox[0] >= bbox[2], bbox[1] >= bbox[3]]):
-        logger.warning(f'image_bboxes: 错误的box, {bbox}')
-        return False
-    return True
diff --git a/magic_pdf/pre_proc/ocr_detect_all_bboxes.py b/magic_pdf/pre_proc/ocr_detect_all_bboxes.py
deleted file mode 100644
index b9fd5b029f1647e59e3c5f603b936201a726ddc8..0000000000000000000000000000000000000000
--- a/magic_pdf/pre_proc/ocr_detect_all_bboxes.py
+++ /dev/null
@@ -1,257 +0,0 @@
-from magic_pdf.config.ocr_content_type import BlockType
-from magic_pdf.libs.boxbase import (
-    calculate_iou,
-    calculate_overlap_area_in_bbox1_area_ratio,
-    calculate_vertical_projection_overlap_ratio,
-    get_minbox_if_overlap_by_ratio
-)
-from magic_pdf.pre_proc.remove_bbox_overlap import remove_overlap_between_bbox_for_block
-
-
-def add_bboxes(blocks, block_type, bboxes):
-    for block in blocks:
-        x0, y0, x1, y1 = block['bbox']
-        if block_type in [
-            BlockType.ImageBody,
-            BlockType.ImageCaption,
-            BlockType.ImageFootnote,
-            BlockType.TableBody,
-            BlockType.TableCaption,
-            BlockType.TableFootnote,
-        ]:
-            bboxes.append(
-                [
-                    x0,
-                    y0,
-                    x1,
-                    y1,
-                    None,
-                    None,
-                    None,
-                    block_type,
-                    None,
-                    None,
-                    None,
-                    None,
-                    block['score'],
-                    block['group_id'],
-                ]
-            )
-        else:
-            bboxes.append(
-                [
-                    x0,
-                    y0,
-                    x1,
-                    y1,
-                    None,
-                    None,
-                    None,
-                    block_type,
-                    None,
-                    None,
-                    None,
-                    None,
-                    block['score'],
-                ]
-            )
-
-
-def ocr_prepare_bboxes_for_layout_split_v2(
-    img_body_blocks,
-    img_caption_blocks,
-    img_footnote_blocks,
-    table_body_blocks,
-    table_caption_blocks,
-    table_footnote_blocks,
-    discarded_blocks,
-    text_blocks,
-    title_blocks,
-    interline_equation_blocks,
-    page_w,
-    page_h,
-):
-    all_bboxes = []
-
-    add_bboxes(img_body_blocks, BlockType.ImageBody, all_bboxes)
-    add_bboxes(img_caption_blocks, BlockType.ImageCaption, all_bboxes)
-    add_bboxes(img_footnote_blocks, BlockType.ImageFootnote, all_bboxes)
-    add_bboxes(table_body_blocks, BlockType.TableBody, all_bboxes)
-    add_bboxes(table_caption_blocks, BlockType.TableCaption, all_bboxes)
-    add_bboxes(table_footnote_blocks, BlockType.TableFootnote, all_bboxes)
-    add_bboxes(text_blocks, BlockType.Text, all_bboxes)
-    add_bboxes(title_blocks, BlockType.Title, all_bboxes)
-    add_bboxes(interline_equation_blocks, BlockType.InterlineEquation, all_bboxes)
-
-    """block嵌套问题解决"""
-    """文本框与标题框重叠，优先信任文本框"""
-    all_bboxes = fix_text_overlap_title_blocks(all_bboxes)
-    """任何框体与舍弃框重叠，优先信任舍弃框"""
-    all_bboxes = remove_need_drop_blocks(all_bboxes, discarded_blocks)
-
-    # interline_equation 与title或text框冲突的情况，分两种情况处理
-    """interline_equation框与文本类型框iou比较接近1的时候，信任行间公式框"""
-    all_bboxes = fix_interline_equation_overlap_text_blocks_with_hi_iou(all_bboxes)
-    """interline_equation框被包含在文本类型框内，且interline_equation比文本区块小很多时信任文本框，这时需要舍弃公式框"""
-    # 通过后续大框套小框逻辑删除
-
-    """discarded_blocks"""
-    all_discarded_blocks = []
-    add_bboxes(discarded_blocks, BlockType.Discarded, all_discarded_blocks)
-
-    """footnote识别：宽度超过1/3页面宽度的，高度超过10的，处于页面下半30%区域的"""
-    footnote_blocks = []
-    for discarded in discarded_blocks:
-        x0, y0, x1, y1 = discarded['bbox']
-        if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h * 0.7):
-            footnote_blocks.append([x0, y0, x1, y1])
-
-    """移除在footnote下面的任何框"""
-    need_remove_blocks = find_blocks_under_footnote(all_bboxes, footnote_blocks)
-    if len(need_remove_blocks) > 0:
-        for block in need_remove_blocks:
-            all_bboxes.remove(block)
-            all_discarded_blocks.append(block)
-
-    """经过以上处理后，还存在大框套小框的情况，则删除小框"""
-    all_bboxes = remove_overlaps_min_blocks(all_bboxes)
-    all_discarded_blocks = remove_overlaps_min_blocks(all_discarded_blocks)
-    """将剩余的bbox做分离处理，防止后面分layout时出错"""
-    # all_bboxes, drop_reasons = remove_overlap_between_bbox_for_block(all_bboxes)
-    all_bboxes.sort(key=lambda x: x[0]+x[1])
-    return all_bboxes, all_discarded_blocks, footnote_blocks
-
-
-def find_blocks_under_footnote(all_bboxes, footnote_blocks):
-    need_remove_blocks = []
-    for block in all_bboxes:
-        block_x0, block_y0, block_x1, block_y1 = block[:4]
-        for footnote_bbox in footnote_blocks:
-            footnote_x0, footnote_y0, footnote_x1, footnote_y1 = footnote_bbox
-            # 如果footnote的纵向投影覆盖了block的纵向投影的80%且block的y0大于等于footnote的y1
-            if (
-                block_y0 >= footnote_y1
-                and calculate_vertical_projection_overlap_ratio(
-                    (block_x0, block_y0, block_x1, block_y1), footnote_bbox
-                )
-                >= 0.8
-            ):
-                if block not in need_remove_blocks:
-                    need_remove_blocks.append(block)
-                    break
-    return need_remove_blocks
-
-
-def fix_interline_equation_overlap_text_blocks_with_hi_iou(all_bboxes):
-    # 先提取所有text和interline block
-    text_blocks = []
-    for block in all_bboxes:
-        if block[7] == BlockType.Text:
-            text_blocks.append(block)
-    interline_equation_blocks = []
-    for block in all_bboxes:
-        if block[7] == BlockType.InterlineEquation:
-            interline_equation_blocks.append(block)
-
-    need_remove = []
-
-    for interline_equation_block in interline_equation_blocks:
-        for text_block in text_blocks:
-            interline_equation_block_bbox = interline_equation_block[:4]
-            text_block_bbox = text_block[:4]
-            if calculate_iou(interline_equation_block_bbox, text_block_bbox) > 0.8:
-                if text_block not in need_remove:
-                    need_remove.append(text_block)
-
-    if len(need_remove) > 0:
-        for block in need_remove:
-            all_bboxes.remove(block)
-
-    return all_bboxes
-
-
-def fix_text_overlap_title_blocks(all_bboxes):
-    # 先提取所有text和title block
-    text_blocks = []
-    for block in all_bboxes:
-        if block[7] == BlockType.Text:
-            text_blocks.append(block)
-    title_blocks = []
-    for block in all_bboxes:
-        if block[7] == BlockType.Title:
-            title_blocks.append(block)
-
-    need_remove = []
-
-    for text_block in text_blocks:
-        for title_block in title_blocks:
-            text_block_bbox = text_block[:4]
-            title_block_bbox = title_block[:4]
-            if calculate_iou(text_block_bbox, title_block_bbox) > 0.8:
-                if title_block not in need_remove:
-                    need_remove.append(title_block)
-
-    if len(need_remove) > 0:
-        for block in need_remove:
-            all_bboxes.remove(block)
-
-    return all_bboxes
-
-
-def remove_need_drop_blocks(all_bboxes, discarded_blocks):
-    need_remove = []
-    for block in all_bboxes:
-        for discarded_block in discarded_blocks:
-            block_bbox = block[:4]
-            if (
-                calculate_overlap_area_in_bbox1_area_ratio(
-                    block_bbox, discarded_block['bbox']
-                )
-                > 0.6
-            ):
-                if block not in need_remove:
-                    need_remove.append(block)
-                    break
-
-    if len(need_remove) > 0:
-        for block in need_remove:
-            all_bboxes.remove(block)
-    return all_bboxes
-
-
-def remove_overlaps_min_blocks(all_bboxes):
-    #  重叠block，小的不能直接删除，需要和大的那个合并成一个更大的。
-    #  删除重叠blocks中较小的那些
-    need_remove = []
-    for block1 in all_bboxes:
-        for block2 in all_bboxes:
-            if block1 != block2:
-                block1_bbox = block1[:4]
-                block2_bbox = block2[:4]
-                overlap_box = get_minbox_if_overlap_by_ratio(
-                    block1_bbox, block2_bbox, 0.8
-                )
-                if overlap_box is not None:
-                    block_to_remove = next(
-                        (block for block in all_bboxes if block[:4] == overlap_box),
-                        None,
-                    )
-                    if (
-                        block_to_remove is not None
-                        and block_to_remove not in need_remove
-                    ):
-                        large_block = block1 if block1 != block_to_remove else block2
-                        x1, y1, x2, y2 = large_block[:4]
-                        sx1, sy1, sx2, sy2 = block_to_remove[:4]
-                        x1 = min(x1, sx1)
-                        y1 = min(y1, sy1)
-                        x2 = max(x2, sx2)
-                        y2 = max(y2, sy2)
-                        large_block[:4] = [x1, y1, x2, y2]
-                        need_remove.append(block_to_remove)
-
-    if len(need_remove) > 0:
-        for block in need_remove:
-            all_bboxes.remove(block)
-
-    return all_bboxes
diff --git a/magic_pdf/pre_proc/ocr_dict_merge.py b/magic_pdf/pre_proc/ocr_dict_merge.py
deleted file mode 100644
index 38ca3652b2d34e09e74b5bc2f8acbaddc11d4917..0000000000000000000000000000000000000000
--- a/magic_pdf/pre_proc/ocr_dict_merge.py
+++ /dev/null
@@ -1,159 +0,0 @@
-from magic_pdf.config.ocr_content_type import BlockType, ContentType
-from magic_pdf.libs.boxbase import __is_overlaps_y_exceeds_threshold, calculate_overlap_area_in_bbox1_area_ratio
-
-
-# 将每一个line中的span从左到右排序
-def line_sort_spans_by_left_to_right(lines):
-    line_objects = []
-    for line in lines:
-        #  按照x0坐标排序
-        line.sort(key=lambda span: span['bbox'][0])
-        line_bbox = [
-            min(span['bbox'][0] for span in line),  # x0
-            min(span['bbox'][1] for span in line),  # y0
-            max(span['bbox'][2] for span in line),  # x1
-            max(span['bbox'][3] for span in line),  # y1
-        ]
-        line_objects.append({
-            'bbox': line_bbox,
-            'spans': line,
-        })
-    return line_objects
-
-
-def merge_spans_to_line(spans, threshold=0.6):
-    if len(spans) == 0:
-        return []
-    else:
-        # 按照y0坐标排序
-        spans.sort(key=lambda span: span['bbox'][1])
-
-        lines = []
-        current_line = [spans[0]]
-        for span in spans[1:]:
-            # 如果当前的span类型为"interline_equation" 或者 当前行中已经有"interline_equation"
-            # image和table类型，同上
-            if span['type'] in [
-                    ContentType.InterlineEquation, ContentType.Image,
-                    ContentType.Table
-            ] or any(s['type'] in [
-                    ContentType.InterlineEquation, ContentType.Image,
-                    ContentType.Table
-            ] for s in current_line):
-                # 则开始新行
-                lines.append(current_line)
-                current_line = [span]
-                continue
-
-            # 如果当前的span与当前行的最后一个span在y轴上重叠，则添加到当前行
-            if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], threshold):
-                current_line.append(span)
-            else:
-                # 否则，开始新行
-                lines.append(current_line)
-                current_line = [span]
-
-        # 添加最后一行
-        if current_line:
-            lines.append(current_line)
-
-        return lines
-
-
-def span_block_type_compatible(span_type, block_type):
-    if span_type in [ContentType.Text, ContentType.InlineEquation]:
-        return block_type in [
-            BlockType.Text,
-            BlockType.Title,
-            BlockType.ImageCaption,
-            BlockType.ImageFootnote,
-            BlockType.TableCaption,
-            BlockType.TableFootnote,
-            BlockType.Discarded
-        ]
-    elif span_type == ContentType.InterlineEquation:
-        return block_type in [BlockType.InterlineEquation, BlockType.Text]
-    elif span_type == ContentType.Image:
-        return block_type in [BlockType.ImageBody]
-    elif span_type == ContentType.Table:
-        return block_type in [BlockType.TableBody]
-    else:
-        return False
-
-
-def fill_spans_in_blocks(blocks, spans, radio):
-    """将allspans中的span按位置关系，放入blocks中."""
-    block_with_spans = []
-    for block in blocks:
-        block_type = block[7]
-        block_bbox = block[0:4]
-        block_dict = {
-            'type': block_type,
-            'bbox': block_bbox,
-        }
-        if block_type in [
-            BlockType.ImageBody, BlockType.ImageCaption, BlockType.ImageFootnote,
-            BlockType.TableBody, BlockType.TableCaption, BlockType.TableFootnote
-        ]:
-            block_dict['group_id'] = block[-1]
-        block_spans = []
-        for span in spans:
-            span_bbox = span['bbox']
-            if calculate_overlap_area_in_bbox1_area_ratio(span_bbox, block_bbox) > radio and span_block_type_compatible(span['type'], block_type):
-                block_spans.append(span)
-
-        block_dict['spans'] = block_spans
-        block_with_spans.append(block_dict)
-
-        # 从spans删除已经放入block_spans中的span
-        if len(block_spans) > 0:
-            for span in block_spans:
-                spans.remove(span)
-
-    return block_with_spans, spans
-
-
-def fix_block_spans_v2(block_with_spans):
-    fix_blocks = []
-    for block in block_with_spans:
-        block_type = block['type']
-
-        if block_type in [BlockType.Text, BlockType.Title,
-                          BlockType.ImageCaption, BlockType.ImageFootnote,
-                          BlockType.TableCaption, BlockType.TableFootnote
-                          ]:
-            block = fix_text_block(block)
-        elif block_type in [BlockType.InterlineEquation, BlockType.ImageBody, BlockType.TableBody]:
-            block = fix_interline_block(block)
-        else:
-            continue
-        fix_blocks.append(block)
-    return fix_blocks
-
-
-def fix_discarded_block(discarded_block_with_spans):
-    fix_discarded_blocks = []
-    for block in discarded_block_with_spans:
-        block = fix_text_block(block)
-        fix_discarded_blocks.append(block)
-    return fix_discarded_blocks
-
-
-def fix_text_block(block):
-    # 文本block中的公式span都应该转换成行内type
-    for span in block['spans']:
-        if span['type'] == ContentType.InterlineEquation:
-            span['type'] = ContentType.InlineEquation
-    block_lines = merge_spans_to_line(block['spans'])
-    sort_block_lines = line_sort_spans_by_left_to_right(block_lines)
-    block['lines'] = sort_block_lines
-    del block['spans']
-    return block
-
-
-def fix_interline_block(block):
-    block_lines = merge_spans_to_line(block['spans'])
-    sort_block_lines = line_sort_spans_by_left_to_right(block_lines)
-    block['lines'] = sort_block_lines
-    del block['spans']
-    return block
diff --git a/magic_pdf/pre_proc/ocr_span_list_modify.py b/magic_pdf/pre_proc/ocr_span_list_modify.py
deleted file mode 100644
index 3fa9d2dd556c07b7d8571141f7103b024513e78c..0000000000000000000000000000000000000000
--- a/magic_pdf/pre_proc/ocr_span_list_modify.py
+++ /dev/null
@@ -1,131 +0,0 @@
-
-from magic_pdf.config.drop_tag import DropTag
-from magic_pdf.config.ocr_content_type import BlockType
-from magic_pdf.libs.boxbase import calculate_iou, get_minbox_if_overlap_by_ratio
-
-
-def remove_overlaps_low_confidence_spans(spans):
-    dropped_spans = []
-    #  删除重叠spans中置信度低的的那些
-    for span1 in spans:
-        for span2 in spans:
-            if span1 != span2:
-                # span1 或 span2 任何一个都不应该在 dropped_spans 中
-                if span1 in dropped_spans or span2 in dropped_spans:
-                    continue
-                else:
-                    if calculate_iou(span1['bbox'], span2['bbox']) > 0.9:
-                        if span1['score'] < span2['score']:
-                            span_need_remove = span1
-                        else:
-                            span_need_remove = span2
-                        if (
-                            span_need_remove is not None
-                            and span_need_remove not in dropped_spans
-                        ):
-                            dropped_spans.append(span_need_remove)
-
-    if len(dropped_spans) > 0:
-        for span_need_remove in dropped_spans:
-            spans.remove(span_need_remove)
-            span_need_remove['tag'] = DropTag.SPAN_OVERLAP
-
-    return spans, dropped_spans
-
-
-def check_chars_is_overlap_in_span(chars):
-    for i in range(len(chars)):
-        for j in range(i + 1, len(chars)):
-            if calculate_iou(chars[i]['bbox'], chars[j]['bbox']) > 0.35:
-                return True
-    return False
-
-
-def remove_x_overlapping_chars(span, median_width):
-    """
-    Remove characters from a span that overlap significantly on the x-axis.
-
-    Args:
-        median_width:
-        span (dict): A span containing a list of chars, each with bbox coordinates
-                    in the format [x0, y0, x1, y1]
-
-    Returns:
-        dict: The span with overlapping characters removed
-    """
-    if 'chars' not in span or len(span['chars']) < 2:
-        return span
-
-    overlap_threshold = median_width * 0.3
-
-    i = 0
-    while i < len(span['chars']) - 1:
-        char1 = span['chars'][i]
-        char2 = span['chars'][i + 1]
-
-        # Calculate overlap width
-        x_left = max(char1['bbox'][0], char2['bbox'][0])
-        x_right = min(char1['bbox'][2], char2['bbox'][2])
-
-        if x_right > x_left:  # There is overlap
-            overlap_width = x_right - x_left
-
-            if overlap_width > overlap_threshold:
-                if char1['c'] == char2['c'] or char1['c'] == ' ' or char2['c'] == ' ':
-                    # Determine which character to remove
-                    width1 = char1['bbox'][2] - char1['bbox'][0]
-                    width2 = char2['bbox'][2] - char2['bbox'][0]
-                    if width1 < width2:
-                        # Remove the narrower character
-                        span['chars'].pop(i)
-                    else:
-                        span['chars'].pop(i + 1)
-                else:
-                    i += 1
-
-                # Don't increment i since we need to check the new pair
-            else:
-                i += 1
-        else:
-            i += 1
-
-    return span
-
-
-def remove_overlaps_min_spans(spans):
-    dropped_spans = []
-    #  删除重叠spans中较小的那些
-    for span1 in spans:
-        for span2 in spans:
-            if span1 != span2:
-                # span1 或 span2 任何一个都不应该在 dropped_spans 中
-                if span1 in dropped_spans or span2 in dropped_spans:
-                    continue
-                else:
-                    overlap_box = get_minbox_if_overlap_by_ratio(span1['bbox'], span2['bbox'], 0.65)
-                    if overlap_box is not None:
-                        span_need_remove = next((span for span in spans if span['bbox'] == overlap_box), None)
-                        if span_need_remove is not None and span_need_remove not in dropped_spans:
-                            dropped_spans.append(span_need_remove)
-    if len(dropped_spans) > 0:
-        for span_need_remove in dropped_spans:
-            spans.remove(span_need_remove)
-            span_need_remove['tag'] = DropTag.SPAN_OVERLAP
-
-    return spans, dropped_spans
-
-
-def get_qa_need_list_v2(blocks):
-    # 创建 images, tables, interline_equations, inline_equations 的副本
-    images = []
-    tables = []
-    interline_equations = []
-
-    for block in blocks:
-        if block['type'] == BlockType.Image:
-            images.append(block)
-        elif block['type'] == BlockType.Table:
-            tables.append(block)
-        elif block['type'] == BlockType.InterlineEquation:
-            interline_equations.append(block)
-    return images, tables, interline_equations
diff --git a/magic_pdf/pre_proc/remove_bbox_overlap.py b/magic_pdf/pre_proc/remove_bbox_overlap.py
deleted file mode 100644
index 35f96a10ba30221d6fb218a5e99e17253b5e30b1..0000000000000000000000000000000000000000
--- a/magic_pdf/pre_proc/remove_bbox_overlap.py
+++ /dev/null
@@ -1,100 +0,0 @@
-from magic_pdf.config.drop_reason import DropReason
-from magic_pdf.libs.boxbase import _is_in, _is_part_overlap
-
-
-def _remove_overlap_between_bbox(bbox1, bbox2):
-    if _is_part_overlap(bbox1, bbox2):
-        ix0, iy0, ix1, iy1 = bbox1
-        x0, y0, x1, y1 = bbox2
-
-        diff_x = min(x1, ix1) - max(x0, ix0)
-        diff_y = min(y1, iy1) - max(y0, iy0)
-
-        if diff_y > diff_x:
-            if x1 >= ix1:
-                mid = (x0 + ix1) // 2
-                ix1 = min(mid - 0.25, ix1)
-                x0 = max(mid + 0.25, x0)
-            else:
-                mid = (ix0 + x1) // 2
-                ix0 = max(mid + 0.25, ix0)
-                x1 = min(mid - 0.25, x1)
-        else:
-            if y1 >= iy1:
-                mid = (y0 + iy1) // 2
-                y0 = max(mid + 0.25, y0)
-                iy1 = min(iy1, mid - 0.25)
-            else:
-                mid = (iy0 + y1) // 2
-                y1 = min(y1, mid - 0.25)
-                iy0 = max(mid + 0.25, iy0)
-
-        if ix1 > ix0 and iy1 > iy0 and y1 > y0 and x1 > x0:
-            bbox1 = [ix0, iy0, ix1, iy1]
-            bbox2 = [x0, y0, x1, y1]
-            return bbox1, bbox2, None
-        else:
-            return bbox1, bbox2, DropReason.NEGATIVE_BBOX_AREA
-    else:
-        return bbox1, bbox2, None
-
-
-def _remove_overlap_between_bboxes(arr):
-    drop_reasons = []
-    N = len(arr)
-    keeps = [True] * N
-    res = [None] * N
-    for i in range(N):
-        for j in range(N):
-            if i == j:
-                continue
-            if _is_in(arr[i]['bbox'], arr[j]['bbox']):
-                keeps[i] = False
-
-    for idx, v in enumerate(arr):
-        if not keeps[idx]:
-            continue
-        for i in range(N):
-            if res[i] is None:
-                continue
-
-            bbox1, bbox2, drop_reason = _remove_overlap_between_bbox(
-                v['bbox'], res[i]['bbox']
-            )
-            if drop_reason is None:
-                v['bbox'] = bbox1
-                res[i]['bbox'] = bbox2
-            else:
-                if v['score'] > res[i]['score']:
-                    keeps[i] = False
-                    res[i] = None
-                else:
-                    keeps[idx] = False
-                drop_reasons.append(drop_reason)
-        if keeps[idx]:
-            res[idx] = v
-    return res, drop_reasons
-
-
-def remove_overlap_between_bbox_for_span(spans):
-    arr = [{'bbox': span['bbox'], 'score': span.get('score', 0.1)} for span in spans]
-    res, drop_reasons = _remove_overlap_between_bboxes(arr)
-    ret = []
-    for i in range(len(res)):
-        if res[i] is None:
-            continue
-        spans[i]['bbox'] = res[i]['bbox']
-        ret.append(spans[i])
-    return ret, drop_reasons
-
-
-def remove_overlap_between_bbox_for_block(all_bboxes):
-    arr = [{'bbox': bbox[:4], 'score': bbox[-1]} for bbox in all_bboxes]
-    res, drop_reasons = _remove_overlap_between_bboxes(arr)
-    ret = []
-    for i in range(len(res)):
-        if res[i] is None:
-            continue
-        all_bboxes[i][:4] = res[i]['bbox']
-        ret.append(all_bboxes[i])
-    return ret, drop_reasons
diff --git a/magic_pdf/resources/fasttext-langdetect/lid.176.ftz b/magic_pdf/resources/fasttext-langdetect/lid.176.ftz
deleted file mode 100644
index 1fb85b357b22f67f019567f0e7003f4d49bda7a0..0000000000000000000000000000000000000000
Binary files a/magic_pdf/resources/fasttext-langdetect/lid.176.ftz and /dev/null differ
diff --git a/magic_pdf/resources/model_config/model_configs.yaml b/magic_pdf/resources/model_config/model_configs.yaml
deleted file mode 100644
index 0ee1aa0e8de8ef6baec4964b75d9f753b5c25da5..0000000000000000000000000000000000000000
--- a/magic_pdf/resources/model_config/model_configs.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-weights:
-  layoutlmv3: Layout/LayoutLMv3/model_final.pth
-  doclayout_yolo: Layout/YOLO/doclayout_yolo_docstructbench_imgsz1280_2501.pt
-  yolo_v8_mfd: MFD/YOLO/yolo_v8_ft.pt
-  unimernet_small: MFR/unimernet_hf_small_2503
-  struct_eqtable: TabRec/StructEqTable
-  tablemaster: TabRec/TableMaster
-  rapid_table: TabRec/RapidTable
\ No newline at end of file
diff --git a/magic_pdf/resources/slanet_plus/slanet-plus.onnx b/magic_pdf/resources/slanet_plus/slanet-plus.onnx
deleted file mode 100644
index d263823cdbc683f63ab7ec3d46eaa381a93b8079..0000000000000000000000000000000000000000
Binary files a/magic_pdf/resources/slanet_plus/slanet-plus.onnx and /dev/null differ
diff --git a/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt b/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt
deleted file mode 100644
index 8e7dbe703a4431b318d5b73724bd9693acdd2be2..0000000000000000000000000000000000000000
Binary files a/magic_pdf/resources/yolov11-langdetect/yolo_v11_ft.pt and /dev/null differ
diff --git a/magic_pdf/spark/__init__.py b/magic_pdf/spark/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/spark/spark_api.py b/magic_pdf/spark/spark_api.py
deleted file mode 100644
index 5d9faeb580840336978e8f4c3e683a7ec709dfa1..0000000000000000000000000000000000000000
--- a/magic_pdf/spark/spark_api.py
+++ /dev/null
@@ -1,49 +0,0 @@
-from loguru import logger
-
-from magic_pdf.config.drop_reason import DropReason
-
-
-def get_data_source(jso: dict):
-    data_source = jso.get('data_source')
-    if data_source is None:
-        data_source = jso.get('file_source')
-    return data_source
-
-
-def get_data_type(jso: dict):
-    data_type = jso.get('data_type')
-    if data_type is None:
-        data_type = jso.get('file_type')
-    return data_type
-
-
-def get_bookid(jso: dict):
-    book_id = jso.get('bookid')
-    if book_id is None:
-        book_id = jso.get('original_file_id')
-    return book_id
-
-
-def exception_handler(jso: dict, e):
-    logger.exception(e)
-    jso['_need_drop'] = True
-    jso['_drop_reason'] = DropReason.Exception
-    jso['_exception'] = f'ERROR: {e}'
-    return jso
-
-
-def get_bookname(jso: dict):
-    data_source = get_data_source(jso)
-    file_id = jso.get('file_id')
-    book_name = f'{data_source}/{file_id}'
-    return book_name
-
-
-def spark_json_extractor(jso: dict) -> dict:
-
-    """从json中提取数据，返回一个dict."""
-
-    return {
-        '_pdf_type': jso['_pdf_type'],
-        'model_list': jso['doc_layout_result'],
-    }
diff --git a/magic_pdf/tools/__init__.py b/magic_pdf/tools/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/tools/cli.py b/magic_pdf/tools/cli.py
deleted file mode 100644
index d204b26a4cba7e5088bd7b37807df44ea478fbef..0000000000000000000000000000000000000000
--- a/magic_pdf/tools/cli.py
+++ /dev/null
@@ -1,161 +0,0 @@
-import os
-import shutil
-import tempfile
-from pathlib import Path
-
-import click
-import fitz
-from loguru import logger
-
-import magic_pdf.model as model_config
-from magic_pdf.data.batch_build_dataset import batch_build_dataset
-from magic_pdf.data.data_reader_writer import FileBasedDataReader
-from magic_pdf.data.dataset import Dataset
-from magic_pdf.libs.version import __version__
-from magic_pdf.tools.common import batch_do_parse, do_parse, parse_pdf_methods
-from magic_pdf.utils.office_to_pdf import convert_file_to_pdf
-
-pdf_suffixes = ['.pdf']
-ms_office_suffixes = ['.ppt', '.pptx', '.doc', '.docx']
-image_suffixes = ['.png', '.jpeg', '.jpg']
-
-
-@click.command()
-@click.version_option(__version__,
-                      '--version',
-                      '-v',
-                      help='display the version and exit')
-@click.option(
-    '-p',
-    '--path',
-    'path',
-    type=click.Path(exists=True),
-    required=True,
-    help='local filepath or directory. support PDF, PPT, PPTX, DOC, DOCX, PNG, JPG files',
-)
-@click.option(
-    '-o',
-    '--output-dir',
-    'output_dir',
-    type=click.Path(),
-    required=True,
-    help='output local directory',
-)
-@click.option(
-    '-m',
-    '--method',
-    'method',
-    type=parse_pdf_methods,
-    help="""the method for parsing pdf.
-ocr: using ocr technique to extract information from pdf.
-txt: suitable for the text-based pdf only and outperform ocr.
-auto: automatically choose the best method for parsing pdf from ocr and txt.
-without method specified, auto will be used by default.""",
-    default='auto',
-)
-@click.option(
-    '-l',
-    '--lang',
-    'lang',
-    type=str,
-    help="""
-    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
-    You should input "Abbreviation" with language form url:
-    https://paddlepaddle.github.io/PaddleOCR/latest/en/ppocr/blog/multi_languages.html#5-support-languages-and-abbreviations
-    """,
-    default=None,
-)
-@click.option(
-    '-d',
-    '--debug',
-    'debug_able',
-    type=bool,
-    help='Enables detailed debugging information during the execution of the CLI commands.',
-    default=False,
-)
-@click.option(
-    '-s',
-    '--start',
-    'start_page_id',
-    type=int,
-    help='The starting page for PDF parsing, beginning from 0.',
-    default=0,
-)
-@click.option(
-    '-e',
-    '--end',
-    'end_page_id',
-    type=int,
-    help='The ending page for PDF parsing, beginning from 0.',
-    default=None,
-)
-def cli(path, output_dir, method, lang, debug_able, start_page_id, end_page_id):
-    os.makedirs(output_dir, exist_ok=True)
-    temp_dir = tempfile.mkdtemp()
-    def read_fn(path: Path):
-        if path.suffix in ms_office_suffixes:
-            convert_file_to_pdf(str(path), temp_dir)
-            fn = os.path.join(temp_dir, f'{path.stem}.pdf')
-        elif path.suffix in image_suffixes:
-            with open(str(path), 'rb') as f:
-                bits = f.read()
-            pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
-            fn = os.path.join(temp_dir, f'{path.stem}.pdf')
-            with open(fn, 'wb') as f:
-                f.write(pdf_bytes)
-        elif path.suffix in pdf_suffixes:
-            fn = str(path)
-        else:
-            raise Exception(f'Unknown file suffix: {path.suffix}')
-
-        disk_rw = FileBasedDataReader(os.path.dirname(fn))
-        return disk_rw.read(os.path.basename(fn))
-
-    def parse_doc(doc_path: Path, dataset: Dataset | None = None):
-        try:
-            file_name = str(Path(doc_path).stem)
-            if dataset is None:
-                pdf_data_or_dataset = read_fn(doc_path)
-            else:
-                pdf_data_or_dataset = dataset
-            do_parse(
-                output_dir,
-                file_name,
-                pdf_data_or_dataset,
-                [],
-                method,
-                debug_able,
-                start_page_id=start_page_id,
-                end_page_id=end_page_id,
-                lang=lang
-            )
-
-        except Exception as e:
-            logger.exception(e)
-
-    if os.path.isdir(path):
-        doc_paths = []
-        for doc_path in Path(path).glob('*'):
-            if doc_path.suffix in pdf_suffixes + image_suffixes + ms_office_suffixes:
-                if doc_path.suffix in ms_office_suffixes:
-                    convert_file_to_pdf(str(doc_path), temp_dir)
-                    doc_path = Path(os.path.join(temp_dir, f'{doc_path.stem}.pdf'))
-                elif doc_path.suffix in image_suffixes:
-                    with open(str(doc_path), 'rb') as f:
-                        bits = f.read()
-                        pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
-                    fn = os.path.join(temp_dir, f'{doc_path.stem}.pdf')
-                    with open(fn, 'wb') as f:
-                        f.write(pdf_bytes)
-                    doc_path = Path(fn)
-                doc_paths.append(doc_path)
-        datasets = batch_build_dataset(doc_paths, 4, lang)
-        batch_do_parse(output_dir, [str(doc_path.stem) for doc_path in doc_paths], datasets, method, debug_able, lang=lang)
-    else:
-        parse_doc(Path(path))
-
-    shutil.rmtree(temp_dir)
-
-
-if __name__ == '__main__':
-    cli()
diff --git a/magic_pdf/tools/cli_dev.py b/magic_pdf/tools/cli_dev.py
deleted file mode 100644
index 6973d04a60ccc31cfbfbcb8562324c05aac20e84..0000000000000000000000000000000000000000
--- a/magic_pdf/tools/cli_dev.py
+++ /dev/null
@@ -1,149 +0,0 @@
-import json as json_parse
-import os
-from pathlib import Path
-
-import click
-
-import magic_pdf.model as model_config
-from magic_pdf.data.data_reader_writer import FileBasedDataReader, S3DataReader
-from magic_pdf.libs.config_reader import get_s3_config
-from magic_pdf.libs.path_utils import (parse_s3_range_params, parse_s3path,
-                                       remove_non_official_s3_args)
-from magic_pdf.libs.version import __version__
-from magic_pdf.tools.common import do_parse, parse_pdf_methods
-
-
-def read_s3_path(s3path):
-    bucket, key = parse_s3path(s3path)
-
-    s3_ak, s3_sk, s3_endpoint = get_s3_config(bucket)
-    s3_rw = S3DataReader('', bucket, s3_ak, s3_sk, s3_endpoint, 'auto')
-    may_range_params = parse_s3_range_params(s3path)
-    if may_range_params is None or 2 != len(may_range_params):
-        byte_start, byte_end = 0, -1
-    else:
-        byte_start, byte_end = int(may_range_params[0]), int(
-            may_range_params[1])
-    return s3_rw.read_at(
-        remove_non_official_s3_args(s3path),
-        byte_start,
-        byte_end,
-    )
-
-
-@click.group()
-@click.version_option(__version__, '--version', '-v', help='显示版本信息')
-def cli():
-    pass
-
-
-@cli.command()
-@click.option(
-    '-j',
-    '--jsonl',
-    'jsonl',
-    type=str,
-    help='输入 jsonl 路径，本地或者 s3 上的文件',
-    required=True,
-)
-@click.option(
-    '-m',
-    '--method',
-    'method',
-    type=parse_pdf_methods,
-    help='指定解析方法。txt: 文本型 pdf 解析方法， ocr: 光学识别解析 pdf, auto: 程序智能选择解析方法',
-    default='auto',
-)
-@click.option(
-    '-o',
-    '--output-dir',
-    'output_dir',
-    type=click.Path(),
-    required=True,
-    help='输出到本地目录',
-)
-def jsonl(jsonl, method, output_dir):
-    model_config.__use_inside_model__ = False
-    if jsonl.startswith('s3://'):
-        jso = json_parse.loads(read_s3_path(jsonl).decode('utf-8'))
-    else:
-        with open(jsonl) as f:
-            jso = json_parse.loads(f.readline())
-    os.makedirs(output_dir, exist_ok=True)
-    s3_file_path = jso.get('file_location')
-    if s3_file_path is None:
-        s3_file_path = jso.get('path')
-    pdf_file_name = Path(s3_file_path).stem
-    pdf_data = read_s3_path(s3_file_path)
-
-    print(pdf_file_name, jso, method)
-    do_parse(
-        output_dir,
-        pdf_file_name,
-        pdf_data,
-        jso['doc_layout_result'],
-        method,
-        False,
-        f_dump_content_list=True,
-        f_draw_model_bbox=True,
-    )
-
-
-@cli.command()
-@click.option(
-    '-p',
-    '--pdf',
-    'pdf',
-    type=click.Path(exists=True),
-    required=True,
-    help='本地 PDF 文件',
-)
-@click.option(
-    '-j',
-    '--json',
-    'json_data',
-    type=click.Path(exists=True),
-    required=True,
-    help='本地模型推理出的 json 数据',
-)
-@click.option('-o',
-              '--output-dir',
-              'output_dir',
-              type=click.Path(),
-              required=True,
-              help='本地输出目录')
-@click.option(
-    '-m',
-    '--method',
-    'method',
-    type=parse_pdf_methods,
-    help='指定解析方法。txt: 文本型 pdf 解析方法， ocr: 光学识别解析 pdf, auto: 程序智能选择解析方法',
-    default='auto',
-)
-def pdf(pdf, json_data, output_dir, method):
-    model_config.__use_inside_model__ = False
-    full_pdf_path = os.path.realpath(pdf)
-    os.makedirs(output_dir, exist_ok=True)
-
-    def read_fn(path):
-        disk_rw = FileBasedDataReader(os.path.dirname(path))
-        return disk_rw.read(os.path.basename(path))
-
-    model_json_list = json_parse.loads(read_fn(json_data).decode('utf-8'))
-
-    file_name = str(Path(full_pdf_path).stem)
-    pdf_data = read_fn(full_pdf_path)
-    do_parse(
-        output_dir,
-        file_name,
-        pdf_data,
-        model_json_list,
-        method,
-        False,
-        f_dump_content_list=True,
-        f_draw_model_bbox=True,
-    )
-
-
-if __name__ == '__main__':
-    cli()
diff --git a/magic_pdf/tools/common.py b/magic_pdf/tools/common.py
deleted file mode 100644
index 06c52a8aa425afd3fa754a4a4ae63a49187a0349..0000000000000000000000000000000000000000
--- a/magic_pdf/tools/common.py
+++ /dev/null
@@ -1,340 +0,0 @@
-import os
-
-import click
-import fitz
-from loguru import logger
-
-import magic_pdf.model as model_config
-from magic_pdf.config.enums import SupportedPdfParseMethod
-from magic_pdf.config.make_content_config import DropMode, MakeMode
-from magic_pdf.data.data_reader_writer import FileBasedDataWriter
-from magic_pdf.data.dataset import Dataset, PymuDocDataset
-from magic_pdf.libs.draw_bbox import draw_char_bbox
-from magic_pdf.model.doc_analyze_by_custom_model import (batch_doc_analyze,
-                                                         doc_analyze)
-
-# from io import BytesIO
-# from pypdf import PdfReader, PdfWriter
-
-
-def prepare_env(output_dir, pdf_file_name, method):
-    local_parent_dir = os.path.join(output_dir, pdf_file_name, method)
-
-    local_image_dir = os.path.join(str(local_parent_dir), 'images')
-    local_md_dir = local_parent_dir
-    os.makedirs(local_image_dir, exist_ok=True)
-    os.makedirs(local_md_dir, exist_ok=True)
-    return local_image_dir, local_md_dir
-
-
-# def convert_pdf_bytes_to_bytes_by_pypdf(pdf_bytes, start_page_id=0, end_page_id=None):
-#     # 将字节数据包装在 BytesIO 对象中
-#     pdf_file = BytesIO(pdf_bytes)
-#     # 读取 PDF 的字节数据
-#     reader = PdfReader(pdf_file)
-#     # 创建一个新的 PDF 写入器
-#     writer = PdfWriter()
-#     # 将所有页面添加到新的 PDF 写入器中
-#     end_page_id = end_page_id if end_page_id is not None and end_page_id >= 0 else len(reader.pages) - 1
-#     if end_page_id > len(reader.pages) - 1:
-#         logger.warning("end_page_id is out of range, use pdf_docs length")
-#         end_page_id = len(reader.pages) - 1
-#     for i, page in enumerate(reader.pages):
-#         if start_page_id <= i <= end_page_id:
-#             writer.add_page(page)
-#     # 创建一个字节缓冲区来存储输出的 PDF 数据
-#     output_buffer = BytesIO()
-#     # 将 PDF 写入字节缓冲区
-#     writer.write(output_buffer)
-#     # 获取字节缓冲区的内容
-#     converted_pdf_bytes = output_buffer.getvalue()
-#     return converted_pdf_bytes
-
-
-def convert_pdf_bytes_to_bytes_by_pymupdf(pdf_bytes, start_page_id=0, end_page_id=None):
-    document = fitz.open('pdf', pdf_bytes)
-    output_document = fitz.open()
-    end_page_id = (
-        end_page_id
-        if end_page_id is not None and end_page_id >= 0
-        else len(document) - 1
-    )
-    if end_page_id > len(document) - 1:
-        logger.warning('end_page_id is out of range, use pdf_docs length')
-        end_page_id = len(document) - 1
-    output_document.insert_pdf(document, from_page=start_page_id, to_page=end_page_id)
-    output_bytes = output_document.tobytes()
-    return output_bytes
-
-
-def _do_parse(
-    output_dir,
-    pdf_file_name,
-    pdf_bytes_or_dataset,
-    model_list,
-    parse_method,
-    debug_able=False,
-    f_draw_span_bbox=True,
-    f_draw_layout_bbox=True,
-    f_dump_md=True,
-    f_dump_middle_json=True,
-    f_dump_model_json=True,
-    f_dump_orig_pdf=True,
-    f_dump_content_list=True,
-    f_make_md_mode=MakeMode.MM_MD,
-    f_draw_model_bbox=False,
-    f_draw_line_sort_bbox=False,
-    f_draw_char_bbox=False,
-    start_page_id=0,
-    end_page_id=None,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-):
-    from magic_pdf.operators.models import InferenceResult
-    if debug_able:
-        logger.warning('debug mode is on')
-        f_draw_model_bbox = True
-        f_draw_line_sort_bbox = True
-        # f_draw_char_bbox = True
-
-    if isinstance(pdf_bytes_or_dataset, bytes):
-        pdf_bytes = convert_pdf_bytes_to_bytes_by_pymupdf(
-            pdf_bytes_or_dataset, start_page_id, end_page_id
-        )
-        ds = PymuDocDataset(pdf_bytes, lang=lang)
-    else:
-        ds = pdf_bytes_or_dataset
-    pdf_bytes = ds._raw_data
-    local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
-
-    image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
-    image_dir = str(os.path.basename(local_image_dir))
-
-    if len(model_list) == 0:
-        if model_config.__use_inside_model__:
-            if parse_method == 'auto':
-                if ds.classify() == SupportedPdfParseMethod.TXT:
-                    infer_result = ds.apply(
-                        doc_analyze,
-                        ocr=False,
-                        lang=ds._lang,
-                        layout_model=layout_model,
-                        formula_enable=formula_enable,
-                        table_enable=table_enable,
-                    )
-                    pipe_result = infer_result.pipe_txt_mode(
-                        image_writer, debug_mode=True, lang=ds._lang
-                    )
-                else:
-                    infer_result = ds.apply(
-                        doc_analyze,
-                        ocr=True,
-                        lang=ds._lang,
-                        layout_model=layout_model,
-                        formula_enable=formula_enable,
-                        table_enable=table_enable,
-                    )
-                    pipe_result = infer_result.pipe_ocr_mode(
-                        image_writer, debug_mode=True, lang=ds._lang
-                    )
-
-            elif parse_method == 'txt':
-                infer_result = ds.apply(
-                    doc_analyze,
-                    ocr=False,
-                    lang=ds._lang,
-                    layout_model=layout_model,
-                    formula_enable=formula_enable,
-                    table_enable=table_enable,
-                )
-                pipe_result = infer_result.pipe_txt_mode(
-                    image_writer, debug_mode=True, lang=ds._lang
-                )
-            elif parse_method == 'ocr':
-                infer_result = ds.apply(
-                    doc_analyze,
-                    ocr=True,
-                    lang=ds._lang,
-                    layout_model=layout_model,
-                    formula_enable=formula_enable,
-                    table_enable=table_enable,
-                )
-                pipe_result = infer_result.pipe_ocr_mode(
-                    image_writer, debug_mode=True, lang=ds._lang
-                )
-            else:
-                logger.error('unknown parse method')
-                exit(1)
-        else:
-            logger.error('need model list input')
-            exit(2)
-    else:
-
-        infer_result = InferenceResult(model_list, ds)
-        if parse_method == 'ocr':
-            pipe_result = infer_result.pipe_ocr_mode(
-                image_writer, debug_mode=True, lang=ds._lang
-            )
-        elif parse_method == 'txt':
-            pipe_result = infer_result.pipe_txt_mode(
-                image_writer, debug_mode=True, lang=ds._lang
-            )
-        else:
-            if ds.classify() == SupportedPdfParseMethod.TXT:
-                pipe_result = infer_result.pipe_txt_mode(
-                        image_writer, debug_mode=True, lang=ds._lang
-                    )
-            else:
-                pipe_result = infer_result.pipe_ocr_mode(
-                        image_writer, debug_mode=True, lang=ds._lang
-                    )
-
-
-    if f_draw_model_bbox:
-        infer_result.draw_model(
-            os.path.join(local_md_dir, f'{pdf_file_name}_model.pdf')
-        )
-
-    if f_draw_layout_bbox:
-        pipe_result.draw_layout(
-            os.path.join(local_md_dir, f'{pdf_file_name}_layout.pdf')
-        )
-    if f_draw_span_bbox:
-        pipe_result.draw_span(os.path.join(local_md_dir, f'{pdf_file_name}_spans.pdf'))
-
-    if f_draw_line_sort_bbox:
-        pipe_result.draw_line_sort(
-            os.path.join(local_md_dir, f'{pdf_file_name}_line_sort.pdf')
-        )
-
-    if f_draw_char_bbox:
-        draw_char_bbox(pdf_bytes, local_md_dir, f'{pdf_file_name}_char_bbox.pdf')
-
-    if f_dump_md:
-        pipe_result.dump_md(
-            md_writer,
-            f'{pdf_file_name}.md',
-            image_dir,
-            drop_mode=DropMode.NONE,
-            md_make_mode=f_make_md_mode,
-        )
-
-    if f_dump_middle_json:
-        pipe_result.dump_middle_json(md_writer, f'{pdf_file_name}_middle.json')
-
-    if f_dump_model_json:
-        infer_result.dump_model(md_writer, f'{pdf_file_name}_model.json')
-
-    if f_dump_orig_pdf:
-        md_writer.write(
-            f'{pdf_file_name}_origin.pdf',
-            pdf_bytes,
-        )
-
-    if f_dump_content_list:
-        pipe_result.dump_content_list(
-            md_writer,
-            f'{pdf_file_name}_content_list.json',
-            image_dir
-        )
-
-    logger.info(f'local output dir is {local_md_dir}')
-
-def do_parse(
-    output_dir,
-    pdf_file_name,
-    pdf_bytes_or_dataset,
-    model_list,
-    parse_method,
-    debug_able=False,
-    f_draw_span_bbox=True,
-    f_draw_layout_bbox=True,
-    f_dump_md=True,
-    f_dump_middle_json=True,
-    f_dump_model_json=True,
-    f_dump_orig_pdf=True,
-    f_dump_content_list=True,
-    f_make_md_mode=MakeMode.MM_MD,
-    f_draw_model_bbox=False,
-    f_draw_line_sort_bbox=False,
-    f_draw_char_bbox=False,
-    start_page_id=0,
-    end_page_id=None,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-):
-    parallel_count = 1
-    if os.environ.get('MINERU_PARALLEL_INFERENCE_COUNT'):
-        parallel_count = int(os.environ['MINERU_PARALLEL_INFERENCE_COUNT'])
-
-    if parallel_count > 1:
-        if isinstance(pdf_bytes_or_dataset, bytes):
-            pdf_bytes = convert_pdf_bytes_to_bytes_by_pymupdf(
-                pdf_bytes_or_dataset, start_page_id, end_page_id
-            )
-            ds = PymuDocDataset(pdf_bytes, lang=lang)
-        else:
-            ds = pdf_bytes_or_dataset
-        batch_do_parse(output_dir, [pdf_file_name], [ds], parse_method, debug_able, f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox, lang=lang)
-    else:
-        _do_parse(output_dir, pdf_file_name, pdf_bytes_or_dataset, model_list, parse_method, debug_able, start_page_id=start_page_id, end_page_id=end_page_id, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable,  f_draw_span_bbox=f_draw_span_bbox, f_draw_layout_bbox=f_draw_layout_bbox, f_dump_md=f_dump_md, f_dump_middle_json=f_dump_middle_json, f_dump_model_json=f_dump_model_json, f_dump_orig_pdf=f_dump_orig_pdf, f_dump_content_list=f_dump_content_list, f_make_md_mode=f_make_md_mode, f_draw_model_bbox=f_draw_model_bbox, f_draw_line_sort_bbox=f_draw_line_sort_bbox, f_draw_char_bbox=f_draw_char_bbox)
-
-
-def batch_do_parse(
-    output_dir,
-    pdf_file_names: list[str],
-    pdf_bytes_or_datasets: list[bytes | Dataset],
-    parse_method,
-    debug_able=False,
-    f_draw_span_bbox=True,
-    f_draw_layout_bbox=True,
-    f_dump_md=True,
-    f_dump_middle_json=True,
-    f_dump_model_json=True,
-    f_dump_orig_pdf=True,
-    f_dump_content_list=True,
-    f_make_md_mode=MakeMode.MM_MD,
-    f_draw_model_bbox=False,
-    f_draw_line_sort_bbox=False,
-    f_draw_char_bbox=False,
-    lang=None,
-    layout_model=None,
-    formula_enable=None,
-    table_enable=None,
-):
-    dss = []
-    for v in pdf_bytes_or_datasets:
-        if isinstance(v, bytes):
-            dss.append(PymuDocDataset(v, lang=lang))
-        else:
-            dss.append(v)
-
-    infer_results = batch_doc_analyze(dss, parse_method, lang=lang, layout_model=layout_model, formula_enable=formula_enable, table_enable=table_enable)
-    for idx, infer_result in enumerate(infer_results):
-        _do_parse(
-            output_dir = output_dir,
-            pdf_file_name = pdf_file_names[idx],
-            pdf_bytes_or_dataset = dss[idx],
-            model_list = infer_result.get_infer_res(),
-            parse_method = parse_method,
-            debug_able = debug_able,
-            f_draw_span_bbox = f_draw_span_bbox,
-            f_draw_layout_bbox = f_draw_layout_bbox,
-            f_dump_md=f_dump_md,
-            f_dump_middle_json=f_dump_middle_json,
-            f_dump_model_json=f_dump_model_json,
-            f_dump_orig_pdf=f_dump_orig_pdf,
-            f_dump_content_list=f_dump_content_list,
-            f_make_md_mode=MakeMode.MM_MD,
-            f_draw_model_bbox=f_draw_model_bbox,
-            f_draw_line_sort_bbox=f_draw_line_sort_bbox,
-            f_draw_char_bbox=f_draw_char_bbox,
-            lang=lang,
-        )
-
-
-parse_pdf_methods = click.Choice(['ocr', 'txt', 'auto'])
diff --git a/magic_pdf/utils/__init__.py b/magic_pdf/utils/__init__.py
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/magic_pdf/utils/annotations.py b/magic_pdf/utils/annotations.py
deleted file mode 100644
index 898d88033b724b0083a6a2124b8f805cabadd104..0000000000000000000000000000000000000000
--- a/magic_pdf/utils/annotations.py
+++ /dev/null
@@ -1,11 +0,0 @@
-
-from loguru import logger
-
-
-def ImportPIL(f):
-    try:
-        import PIL  # noqa: F401
-    except ImportError:
-        logger.error('Pillow not installed, please install by pip.')
-        exit(1)
-    return f
diff --git a/magic_pdf/utils/office_to_pdf.py b/magic_pdf/utils/office_to_pdf.py
deleted file mode 100644
index b6d03daec6bb900f470041c096cd8d943d27382c..0000000000000000000000000000000000000000
--- a/magic_pdf/utils/office_to_pdf.py
+++ /dev/null
@@ -1,115 +0,0 @@
-import os
-import subprocess
-import platform
-from pathlib import Path
-import shutil
-
-from loguru import logger
-
-
-class ConvertToPdfError(Exception):
-    def __init__(self, msg):
-        self.msg = msg
-        super().__init__(self.msg)
-
-
-def check_fonts_installed():
-    """Check if required Chinese fonts are installed."""
-    system_type = platform.system()
-
-    if system_type in ['Windows', 'Darwin']:
-        pass
-    else:
-        # Linux: use fc-list
-        try:
-            output = subprocess.check_output(['fc-list', ':lang=zh'], encoding='utf-8')
-            if output.strip():  # 只要有任何输出（非空）
-                return True
-            else:
-                logger.warning(
-                    f"No Chinese fonts were detected, the converted document may not display Chinese content properly."
-                )
-        except Exception:
-            pass
-
-
-def get_soffice_command():
-    """Return the path to LibreOffice's soffice executable depending on the platform."""
-    system_type = platform.system()
-
-    # First check if soffice is in PATH
-    soffice_path = shutil.which('soffice')
-    if soffice_path:
-        return soffice_path
-
-    if system_type == 'Windows':
-        # Check common installation paths
-        possible_paths = [
-            Path(os.environ.get('PROGRAMFILES', 'C:/Program Files')) / 'LibreOffice/program/soffice.exe',
-            Path(os.environ.get('PROGRAMFILES(X86)', 'C:/Program Files (x86)')) / 'LibreOffice/program/soffice.exe',
-            Path('C:/Program Files/LibreOffice/program/soffice.exe'),
-            Path('C:/Program Files (x86)/LibreOffice/program/soffice.exe')
-        ]
-
-        # Check other drives for windows
-        for drive in ['C:', 'D:', 'E:', 'F:', 'G:', 'H:']:
-            possible_paths.append(Path(f"{drive}/LibreOffice/program/soffice.exe"))
-
-        for path in possible_paths:
-            if path.exists():
-                return str(path)
-
-        raise ConvertToPdfError(
-            "LibreOffice not found. Please install LibreOffice from https://www.libreoffice.org/ "
-            "or ensure soffice.exe is in your PATH environment variable."
-        )
-    else:
-        # For Linux/macOS, provide installation instructions if not found
-        try:
-            # Try to find soffice in standard locations
-            possible_paths = [
-                '/usr/bin/soffice',
-                '/usr/local/bin/soffice',
-                '/opt/libreoffice/program/soffice',
-                '/Applications/LibreOffice.app/Contents/MacOS/soffice'
-            ]
-            for path in possible_paths:
-                if os.path.exists(path):
-                    return path
-
-            raise ConvertToPdfError(
-                "LibreOffice not found. Please install it:\n"
-                "  - Ubuntu/Debian: sudo apt-get install libreoffice\n"
-                "  - CentOS/RHEL: sudo yum install libreoffice\n"
-                "  - macOS: brew install libreoffice or download from https://www.libreoffice.org/\n"
-                "  - Or ensure soffice is in your PATH environment variable."
-            )
-        except Exception as e:
-            raise ConvertToPdfError(f"Error locating LibreOffice: {str(e)}")
-
-
-def convert_file_to_pdf(input_path, output_dir):
-    """Convert a single document (ppt, doc, etc.) to PDF."""
-    if not os.path.isfile(input_path):
-        raise FileNotFoundError(f"The input file {input_path} does not exist.")
-
-    os.makedirs(output_dir, exist_ok=True)
-
-    check_fonts_installed()
-
-    soffice_cmd = get_soffice_command()
-
-    cmd = [
-        soffice_cmd,
-        '--headless',
-        '--norestore',
-        '--invisible',
-        '--convert-to', 'pdf',
-        '--outdir', str(output_dir),
-        str(input_path)
-    ]
-
-    process = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
-    if process.returncode != 0:
-        raise ConvertToPdfError(f"LibreOffice convert failed: {process.stderr.decode()}")
diff --git a/magic_pdf/model/sub_modules/language_detection/__init__.py b/mineru/api/__init__.py
similarity index 100%
rename from magic_pdf/model/sub_modules/language_detection/__init__.py
rename to mineru/api/__init__.py
diff --git a/magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py b/mineru/backend/__init__.py
similarity index 100%
rename from magic_pdf/model/sub_modules/language_detection/yolov11/__init__.py
rename to mineru/backend/__init__.py
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py b/mineru/backend/pipeline/__init__.py
similarity index 100%
rename from magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/__init__.py
rename to mineru/backend/pipeline/__init__.py
diff --git a/magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py b/mineru/backend/vlm/__init__.py
similarity index 100%
rename from magic_pdf/model/sub_modules/ocr/paddleocr2pytorch/tools/infer/__init__.py
rename to mineru/backend/vlm/__init__.py
diff --git a/magic_pdf/post_proc/__init__.py b/mineru/cli/__init__.py
similarity index 100%
rename from magic_pdf/post_proc/__init__.py
rename to mineru/cli/__init__.py
diff --git a/mineru/data/__init__.py b/mineru/data/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e17167ceda21a510d4486bf1711c9a72bf414db
--- /dev/null
+++ b/mineru/data/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Opendatalab. All rights reserved.
diff --git a/mineru/libs/__init__.py b/mineru/libs/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e17167ceda21a510d4486bf1711c9a72bf414db
--- /dev/null
+++ b/mineru/libs/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Opendatalab. All rights reserved.
diff --git a/mineru/resources/__init__.py b/mineru/resources/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e17167ceda21a510d4486bf1711c9a72bf414db
--- /dev/null
+++ b/mineru/resources/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Opendatalab. All rights reserved.
diff --git a/mineru/utils/__init__.py b/mineru/utils/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e17167ceda21a510d4486bf1711c9a72bf414db
--- /dev/null
+++ b/mineru/utils/__init__.py
@@ -0,0 +1 @@
+# Copyright (c) Opendatalab. All rights reserved.
diff --git a/projects/gradio_app/app.py b/projects/gradio_app/app.py
index 180fd7e2101f4c8ce49f121a81e54e1349b7e3d0..c9d32c1d9039d33617e5db5b3150122c527e6ada 100644
--- a/projects/gradio_app/app.py
+++ b/projects/gradio_app/app.py
@@ -40,7 +40,7 @@ def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, layout_mode, formula_en
             pdf_data,
             [],
             parse_method,
-            False,
+            True,
             end_page_id=end_page_id,
             layout_model=layout_mode,
             formula_enable=formula_enable,