Merge branch 'dev' into dev-table-model-update

7d2dfc80 · liukaiwen · a0eff3be · 6d571e2e · 7d2dfc80 · 7d2dfc80
Commit 7d2dfc80 authored Oct 28, 2024 by liukaiwen
20 changed files
--- a/magic_pdf/data/data_reader_writer/s3.py
+++ b/magic_pdf/data/data_reader_writer/s3.py
+from magic_pdf.data.data_reader_writer.multi_bucket_s3 import (
+    MultiBucketS3DataReader, MultiBucketS3DataWriter)
+from magic_pdf.data.schemas import S3Config
+
+
+class S3DataReader(MultiBucketS3DataReader):
+    def __init__(
+        self,
+        bucket: str,
+        ak: str,
+        sk: str,
+        endpoint_url: str,
+        addressing_style: str = 'auto',
+    ):
+        """s3 reader client.
+
+        Args:
+            bucket (str): bucket name
+            ak (str): access key
+            sk (str): secret key
+            endpoint_url (str): endpoint url of s3
+            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
+            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
+        """
+        super().__init__(
+            bucket,
+            [
+                S3Config(
+                    bucket_name=bucket,
+                    access_key=ak,
+                    secret_key=sk,
+                    endpoint_url=endpoint_url,
+                    addressing_style=addressing_style,
+                )
+            ],
+        )
+
+
+class S3DataWriter(MultiBucketS3DataWriter):
+    def __init__(
+        self,
+        bucket: str,
+        ak: str,
+        sk: str,
+        endpoint_url: str,
+        addressing_style: str = 'auto',
+    ):
+        """s3 writer client.
+
+        Args:
+            bucket (str): bucket name
+            ak (str): access key
+            sk (str): secret key
+            endpoint_url (str): endpoint url of s3
+            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
+            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
+        """
+        super().__init__(
+            bucket,
+            [
+                S3Config(
+                    bucket_name=bucket,
+                    access_key=ak,
+                    secret_key=sk,
+                    endpoint_url=endpoint_url,
+                    addressing_style=addressing_style,
+                )
+            ],
+        )
--- a/magic_pdf/data/dataset.py
+++ b/magic_pdf/data/dataset.py
+from abc import ABC, abstractmethod
+from typing import Iterator
+
+import fitz
+
+from magic_pdf.config.enums import SupportedPdfParseMethod
+from magic_pdf.data.schemas import PageInfo
+from magic_pdf.data.utils import fitz_doc_to_image
+
+
+class PageableData(ABC):
+    @abstractmethod
+    def get_image(self) -> dict:
+        """Transform data to image."""
+        pass
+
+    @abstractmethod
+    def get_doc(self) -> fitz.Page:
+        """Get the pymudoc page."""
+        pass
+
+    @abstractmethod
+    def get_page_info(self) -> PageInfo:
+        """Get the page info of the page.
+
+        Returns:
+            PageInfo: the page info of this page
+        """
+        pass
+
+
+class Dataset(ABC):
+    @abstractmethod
+    def __len__(self) -> int:
+        """The length of the dataset."""
+        pass
+
+    @abstractmethod
+    def __iter__(self) -> Iterator[PageableData]:
+        """Yield the page data."""
+        pass
+
+    @abstractmethod
+    def supported_methods(self) -> list[SupportedPdfParseMethod]:
+        """The methods that this dataset support.
+
+        Returns:
+            list[SupportedPdfParseMethod]: The supported methods, Valid methods are: OCR, TXT
+        """
+        pass
+
+    @abstractmethod
+    def data_bits(self) -> bytes:
+        """The bits used to create this dataset."""
+        pass
+
+    @abstractmethod
+    def get_page(self, page_id: int) -> PageableData:
+        """Get the page indexed by page_id.
+
+        Args:
+            page_id (int): the index of the page
+
+        Returns:
+            PageableData: the page doc object
+        """
+        pass
+
+
+class PymuDocDataset(Dataset):
+    def __init__(self, bits: bytes):
+        """Initialize the dataset, which wraps the pymudoc documents.
+
+        Args:
+            bits (bytes): the bytes of the pdf
+        """
+        self._records = [Doc(v) for v in fitz.open('pdf', bits)]
+        self._data_bits = bits
+        self._raw_data = bits
+
+    def __len__(self) -> int:
+        """The page number of the pdf."""
+        return len(self._records)
+
+    def __iter__(self) -> Iterator[PageableData]:
+        """Yield the page doc object."""
+        return iter(self._records)
+
+    def supported_methods(self) -> list[SupportedPdfParseMethod]:
+        """The method supported by this dataset.
+
+        Returns:
+            list[SupportedPdfParseMethod]: the supported methods
+        """
+        return [SupportedPdfParseMethod.OCR, SupportedPdfParseMethod.TXT]
+
+    def data_bits(self) -> bytes:
+        """The pdf bits used to create this dataset."""
+        return self._data_bits
+
+    def get_page(self, page_id: int) -> PageableData:
+        """The page doc object.
+
+        Args:
+            page_id (int): the page doc index
+
+        Returns:
+            PageableData: the page doc object
+        """
+        return self._records[page_id]
+
+
+class ImageDataset(Dataset):
+    def __init__(self, bits: bytes):
+        """Initialize the dataset, which wraps the pymudoc documents.
+
+        Args:
+            bits (bytes): the bytes of the photo which will be converted to pdf first. then converted to pymudoc.
+        """
+        pdf_bytes = fitz.open(stream=bits).convert_to_pdf()
+        self._records = [Doc(v) for v in fitz.open('pdf', pdf_bytes)]
+        self._raw_data = bits
+        self._data_bits = pdf_bytes
+
+    def __len__(self) -> int:
+        """The length of the dataset."""
+        return len(self._records)
+
+    def __iter__(self) -> Iterator[PageableData]:
+        """Yield the page object."""
+        return iter(self._records)
+
+    def supported_methods(self):
+        """The method supported by this dataset.
+
+        Returns:
+            list[SupportedPdfParseMethod]: the supported methods
+        """
+        return [SupportedPdfParseMethod.OCR]
+
+    def data_bits(self) -> bytes:
+        """The pdf bits used to create this dataset."""
+        return self._data_bits
+
+    def get_page(self, page_id: int) -> PageableData:
+        """The page doc object.
+
+        Args:
+            page_id (int): the page doc index
+
+        Returns:
+            PageableData: the page doc object
+        """
+        return self._records[page_id]
+
+
+class Doc(PageableData):
+    """Initialized with pymudoc object."""
+    def __init__(self, doc: fitz.Page):
+        self._doc = doc
+
+    def get_image(self):
+        """Return the imge info.
+
+        Returns:
+            dict: {
+                img: np.ndarray,
+                width: int,
+                height: int
+            }
+        """
+        return fitz_doc_to_image(self._doc)
+
+    def get_doc(self) -> fitz.Page:
+        """Get the pymudoc object.
+
+        Returns:
+            fitz.Page: the pymudoc object
+        """
+        return self._doc
+
+    def get_page_info(self) -> PageInfo:
+        """Get the page info of the page.
+
+        Returns:
+            PageInfo: the page info of this page
+        """
+        page_w = self._doc.rect.width
+        page_h = self._doc.rect.height
+        return PageInfo(w=page_w, h=page_h)
+
+    def __getattr__(self, name):
+        if hasattr(self._doc, name):
+            return getattr(self._doc, name)
--- a/magic_pdf/data/io/__init__.py
+++ b/magic_pdf/data/io/__init__.py
--- a/magic_pdf/data/io/base.py
+++ b/magic_pdf/data/io/base.py
+from abc import ABC, abstractmethod
+
+
+class IOReader(ABC):
+    @abstractmethod
+    def read(self, path: str) -> bytes:
+        """Read the file.
+
+        Args:
+            path (str): file path to read
+
+        Returns:
+            bytes: the content of the file
+        """
+        pass
+
+    @abstractmethod
+    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
+        """Read at offset and limit.
+
+        Args:
+            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
+            offset (int, optional): the number of bytes skipped. Defaults to 0.
+            limit (int, optional): the length of bytes want to read. Defaults to -1.
+
+        Returns:
+            bytes: the content of file
+        """
+        pass
+
+
+class IOWriter:
+
+    @abstractmethod
+    def write(self, path: str, data: bytes) -> None:
+        """Write file with data.
+
+        Args:
+            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
+            data (bytes): the data want to write
+        """
+        pass
--- a/magic_pdf/data/io/http.py
+++ b/magic_pdf/data/io/http.py
+
+import io
+
+import requests
+
+from magic_pdf.data.io.base import IOReader, IOWriter
+
+
+class HttpReader(IOReader):
+
+    def read(self, url: str) -> bytes:
+        """Read the file.
+
+        Args:
+            path (str): file path to read
+
+        Returns:
+            bytes: the content of the file
+        """
+        return requests.get(url).content
+
+    def read_at(self, path: str, offset: int = 0, limit: int = -1) -> bytes:
+        """Not Implemented."""
+        raise NotImplementedError
+
+
+class HttpWriter(IOWriter):
+    def write(self, url: str, data: bytes) -> None:
+        """Write file with data.
+
+        Args:
+            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
+            data (bytes): the data want to write
+        """
+        files = {'file': io.BytesIO(data)}
+        response = requests.post(url, files=files)
+        assert 300 > response.status_code and response.status_code > 199
--- a/magic_pdf/data/io/s3.py
+++ b/magic_pdf/data/io/s3.py
+import boto3
+from botocore.config import Config
+
+from magic_pdf.data.io.base import IOReader, IOWriter
+
+
+class S3Reader(IOReader):
+    def __init__(
+        self,
+        bucket: str,
+        ak: str,
+        sk: str,
+        endpoint_url: str,
+        addressing_style: str = 'auto',
+    ):
+        """s3 reader client.
+
+        Args:
+            bucket (str): bucket name
+            ak (str): access key
+            sk (str): secret key
+            endpoint_url (str): endpoint url of s3
+            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
+            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
+        """
+        self._bucket = bucket
+        self._ak = ak
+        self._sk = sk
+        self._s3_client = boto3.client(
+            service_name='s3',
+            aws_access_key_id=ak,
+            aws_secret_access_key=sk,
+            endpoint_url=endpoint_url,
+            config=Config(
+                s3={'addressing_style': addressing_style},
+                retries={'max_attempts': 5, 'mode': 'standard'},
+            ),
+        )
+
+    def read(self, key: str) -> bytes:
+        """Read the file.
+
+        Args:
+            path (str): file path to read
+
+        Returns:
+            bytes: the content of the file
+        """
+        return self.read_at(key)
+
+    def read_at(self, key: str, offset: int = 0, limit: int = -1) -> bytes:
+        """Read at offset and limit.
+
+        Args:
+            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
+            offset (int, optional): the number of bytes skipped. Defaults to 0.
+            limit (int, optional): the length of bytes want to read. Defaults to -1.
+
+        Returns:
+            bytes: the content of file
+        """
+        if limit > -1:
+            range_header = f'bytes={offset}-{offset+limit-1}'
+            res = self._s3_client.get_object(
+                Bucket=self._bucket, Key=key, Range=range_header
+            )
+        else:
+            res = self._s3_client.get_object(
+                Bucket=self._bucket, Key=key, Range=f'bytes={offset}-'
+            )
+        return res['Body'].read()
+
+
+class S3Writer(IOWriter):
+    def __init__(
+        self,
+        bucket: str,
+        ak: str,
+        sk: str,
+        endpoint_url: str,
+        addressing_style: str = 'auto',
+    ):
+        """s3 reader client.
+
+        Args:
+            bucket (str): bucket name
+            ak (str): access key
+            sk (str): secret key
+            endpoint_url (str): endpoint url of s3
+            addressing_style (str, optional): Defaults to 'auto'. Other valid options here are 'path' and 'virtual'
+            refer to https://boto3.amazonaws.com/v1/documentation/api/1.9.42/guide/s3.html
+        """
+        self._bucket = bucket
+        self._ak = ak
+        self._sk = sk
+        self._s3_client = boto3.client(
+            service_name='s3',
+            aws_access_key_id=ak,
+            aws_secret_access_key=sk,
+            endpoint_url=endpoint_url,
+            config=Config(
+                s3={'addressing_style': addressing_style},
+                retries={'max_attempts': 5, 'mode': 'standard'},
+            ),
+        )
+
+    def write(self, key: str, data: bytes):
+        """Write file with data.
+
+        Args:
+            path (str): the path of file, if the path is relative path, it will be joined with parent_dir.
+            data (bytes): the data want to write
+        """
+        self._s3_client.put_object(Bucket=self._bucket, Key=key, Body=data)
--- a/magic_pdf/data/read_api.py
+++ b/magic_pdf/data/read_api.py
+import json
+import os
+from pathlib import Path
+
+from magic_pdf.config.exceptions import EmptyData, InvalidParams
+from magic_pdf.data.data_reader_writer import (FileBasedDataReader,
+                                               MultiBucketS3DataReader)
+from magic_pdf.data.dataset import ImageDataset, PymuDocDataset
+
+
+def read_jsonl(
+    s3_path_or_local: str, s3_client: MultiBucketS3DataReader | None = None
+) -> list[PymuDocDataset]:
+    """Read the jsonl file and return the list of PymuDocDataset.
+
+    Args:
+        s3_path_or_local (str): local file or s3 path
+        s3_client (MultiBucketS3DataReader | None, optional): s3 client that support multiple bucket. Defaults to None.
+
+    Raises:
+        InvalidParams: if s3_path_or_local is s3 path but s3_client is not provided.
+        EmptyData: if no pdf file location is provided in some line of jsonl file.
+        InvalidParams: if the file location is s3 path but s3_client is not provided
+
+    Returns:
+        list[PymuDocDataset]: each line in the jsonl file will be converted to a PymuDocDataset
+    """
+    bits_arr = []
+    if s3_path_or_local.startswith('s3://'):
+        if s3_client is None:
+            raise InvalidParams('s3_client is required when s3_path is provided')
+        jsonl_bits = s3_client.read(s3_path_or_local)
+    else:
+        jsonl_bits = FileBasedDataReader('').read(s3_path_or_local)
+    jsonl_d = [
+        json.loads(line) for line in jsonl_bits.decode().split('\n') if line.strip()
+    ]
+    for d in jsonl_d[:5]:
+        pdf_path = d.get('file_location', '') or d.get('path', '')
+        if len(pdf_path) == 0:
+            raise EmptyData('pdf file location is empty')
+        if pdf_path.startswith('s3://'):
+            if s3_client is None:
+                raise InvalidParams('s3_client is required when s3_path is provided')
+            bits_arr.append(s3_client.read(pdf_path))
+        else:
+            bits_arr.append(FileBasedDataReader('').read(pdf_path))
+    return [PymuDocDataset(bits) for bits in bits_arr]
+
+
+def read_local_pdfs(path: str) -> list[PymuDocDataset]:
+    """Read pdf from path or directory.
+
+    Args:
+        path (str): pdf file path or directory that contains pdf files
+
+    Returns:
+        list[PymuDocDataset]: each pdf file will converted to a PymuDocDataset
+    """
+    if os.path.isdir(path):
+        reader = FileBasedDataReader(path)
+        return [
+            PymuDocDataset(reader.read(doc_path.name))
+            for doc_path in Path(path).glob('*.pdf')
+        ]
+    else:
+        reader = FileBasedDataReader()
+        bits = reader.read(path)
+        return [PymuDocDataset(bits)]
+
+
+def read_local_images(path: str, suffixes: list[str]) -> list[ImageDataset]:
+    """Read images from path or directory.
+
+    Args:
+        path (str): image file path or directory that contains image files
+        suffixes (list[str]): the suffixes of the image files used to filter the files. Example: ['jpg', 'png']
+
+    Returns:
+        list[ImageDataset]: each image file will converted to a ImageDataset
+    """
+    if os.path.isdir(path):
+        imgs_bits = []
+        s_suffixes = set(suffixes)
+        reader = FileBasedDataReader(path)
+        for root, _, files in os.walk(path):
+            for file in files:
+                suffix = file.split('.')
+                if suffix[-1] in s_suffixes:
+                    imgs_bits.append(reader.read(file))
+        return [ImageDataset(bits) for bits in imgs_bits]
+    else:
+        reader = FileBasedDataReader()
+        bits = reader.read(path)
+        return [ImageDataset(bits)]
--- a/magic_pdf/data/schemas.py
+++ b/magic_pdf/data/schemas.py
+
+from pydantic import BaseModel, Field
+
+
+class S3Config(BaseModel):
+    bucket_name: str = Field(description='s3 bucket name', min_length=1)
+    access_key: str = Field(description='s3 access key', min_length=1)
+    secret_key: str = Field(description='s3 secret key', min_length=1)
+    endpoint_url: str = Field(description='s3 endpoint url', min_length=1)
+    addressing_style: str = Field(description='s3 addressing style', default='auto', min_length=1)
+
+
+class PageInfo(BaseModel):
+    w: float = Field(description='the width of page')
+    h: float = Field(description='the height of page')
--- a/magic_pdf/data/utils.py
+++ b/magic_pdf/data/utils.py
+
+import fitz
+import numpy as np
+
+from magic_pdf.utils.annotations import ImportPIL
+
+
+@ImportPIL
+def fitz_doc_to_image(doc, dpi=200) -> dict:
+    """Convert fitz.Document to image, Then convert the image to numpy array.
+
+    Args:
+        doc (_type_): pymudoc page
+        dpi (int, optional): reset the dpi of dpi. Defaults to 200.
+
+    Returns:
+        dict:  {'img': numpy array, 'width': width, 'height': height }
+    """
+    from PIL import Image
+    mat = fitz.Matrix(dpi / 72, dpi / 72)
+    pm = doc.get_pixmap(matrix=mat, alpha=False)
+
+    # If the width or height exceeds 9000 after scaling, do not scale further.
+    if pm.width > 9000 or pm.height > 9000:
+        pm = doc.get_pixmap(matrix=fitz.Matrix(1, 1), alpha=False)
+
+    img = Image.frombytes('RGB', (pm.width, pm.height), pm.samples)
+    img = np.array(img)
+
+    img_dict = {'img': img, 'width': pm.width, 'height': pm.height}
+
+    return img_dict
--- a/magic_pdf/dict2md/ocr_mkcontent.py
+++ b/magic_pdf/dict2md/ocr_mkcontent.py
 import re

-import wordninja
 from loguru import logger

 from magic_pdf.libs.commons import join_path
@@ -25,37 +24,6 @@ def __is_hyphen_at_line_end(line):
    return bool(re.search(r'[A-Za-z]+-\s*$', line))


-def split_long_words(text):
-    segments = text.split(' ')
-    for i in range(len(segments)):
-        words = re.findall(r'\w+|[^\w]', segments[i], re.UNICODE)
-        for j in range(len(words)):
-            if len(words[j]) > 10:
-                words[j] = ' '.join(wordninja.split(words[j]))
-        segments[i] = ''.join(words)
-    return ' '.join(segments)
-
-
-def ocr_mk_mm_markdown_with_para(pdf_info_list: list, img_buket_path):
-    markdown = []
-    for page_info in pdf_info_list:
-        paras_of_layout = page_info.get('para_blocks')
-        page_markdown = ocr_mk_markdown_with_para_core_v2(
-            paras_of_layout, 'mm', img_buket_path)
-        markdown.extend(page_markdown)
-    return '\n\n'.join(markdown)
-
-
-def ocr_mk_nlp_markdown_with_para(pdf_info_dict: list):
-    markdown = []
-    for page_info in pdf_info_dict:
-        paras_of_layout = page_info.get('para_blocks')
-        page_markdown = ocr_mk_markdown_with_para_core_v2(
-            paras_of_layout, 'nlp')
-        markdown.extend(page_markdown)
-    return '\n\n'.join(markdown)
-
-
 def ocr_mk_mm_markdown_with_para_and_pagination(pdf_info_dict: list,
                                                img_buket_path):
    markdown_with_para_and_pagination = []
@@ -76,61 +44,20 @@ def ocr_mk_mm_markdown_with_para_and_pagination(pdf_info_dict: list,
    return markdown_with_para_and_pagination


-def ocr_mk_markdown_with_para_core(paras_of_layout, mode, img_buket_path=''):
-    page_markdown = []
-    for paras in paras_of_layout:
-        for para in paras:
-            para_text = ''
-            for line in para:
-                for span in line['spans']:
-                    span_type = span.get('type')
-                    content = ''
-                    language = ''
-                    if span_type == ContentType.Text:
-                        content = span['content']
-                        language = detect_lang(content)
-                        if (language == 'en'):  # 只对英文长词进行分词处理，中文分词会丢失文本
-                            content = ocr_escape_special_markdown_char(
-                                split_long_words(content))
-                        else:
-                            content = ocr_escape_special_markdown_char(content)
-                    elif span_type == ContentType.InlineEquation:
-                        content = f"${span['content']}$"
-                    elif span_type == ContentType.InterlineEquation:
-                        content = f"\n$$\n{span['content']}\n$$\n"
-                    elif span_type in [ContentType.Image, ContentType.Table]:
-                        if mode == 'mm':
-                            content = f"\n![]({join_path(img_buket_path, span['image_path'])})\n"
-                        elif mode == 'nlp':
-                            pass
-                    if content != '':
-                        if language == 'en':  # 英文语境下 content间需要空格分隔
-                            para_text += content + ' '
-                        else:  # 中文语境下，content间不需要空格分隔
-                            para_text += content
-            if para_text.strip() == '':
-                continue
-            else:
-                page_markdown.append(para_text.strip() + '  ')
-    return page_markdown
-
-
 def ocr_mk_markdown_with_para_core_v2(paras_of_layout,
                                      mode,
                                      img_buket_path='',
-                                      parse_type="auto",
-                                      lang=None
                                      ):
    page_markdown = []
    for para_block in paras_of_layout:
        para_text = ''
        para_type = para_block['type']
        if para_type in [BlockType.Text, BlockType.List, BlockType.Index]:
-            para_text = merge_para_with_text(para_block, parse_type=parse_type, lang=lang)
+            para_text = merge_para_with_text(para_block)
        elif para_type == BlockType.Title:
-            para_text = f'# {merge_para_with_text(para_block, parse_type=parse_type, lang=lang)}'
+            para_text = f'# {merge_para_with_text(para_block)}'
        elif para_type == BlockType.InterlineEquation:
-            para_text = merge_para_with_text(para_block, parse_type=parse_type, lang=lang)
+            para_text = merge_para_with_text(para_block)
        elif para_type == BlockType.Image:
            if mode == 'nlp':
                continue
@@ -143,17 +70,17 @@ def ocr_mk_markdown_with_para_core_v2(paras_of_layout,
                                    para_text += f"\n![]({join_path(img_buket_path, span['image_path'])})  \n"
                for block in para_block['blocks']:  # 2nd.拼image_caption
                    if block['type'] == BlockType.ImageCaption:
-                        para_text += merge_para_with_text(block, parse_type=parse_type, lang=lang)
-                for block in para_block['blocks']:  # 2nd.拼image_caption
+                        para_text += merge_para_with_text(block) + '  \n'
+                for block in para_block['blocks']:  # 3rd.拼image_footnote
                    if block['type'] == BlockType.ImageFootnote:
-                        para_text += merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                        para_text += merge_para_with_text(block) + '  \n'
        elif para_type == BlockType.Table:
            if mode == 'nlp':
                continue
            elif mode == 'mm':
                for block in para_block['blocks']:  # 1st.拼table_caption
                    if block['type'] == BlockType.TableCaption:
-                        para_text += merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                        para_text += merge_para_with_text(block) + '  \n'
                for block in para_block['blocks']:  # 2nd.拼table_body
                    if block['type'] == BlockType.TableBody:
                        for line in block['lines']:
@@ -168,7 +95,7 @@ def ocr_mk_markdown_with_para_core_v2(paras_of_layout,
                                        para_text += f"\n![]({join_path(img_buket_path, span['image_path'])})  \n"
                for block in para_block['blocks']:  # 3rd.拼table_footnote
                    if block['type'] == BlockType.TableFootnote:
-                        para_text += merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                        para_text += merge_para_with_text(block) + '  \n'

        if para_text.strip() == '':
            continue
@@ -191,7 +118,7 @@ def detect_language(text):
        return 'empty'


-def merge_para_with_text(para_block, parse_type="auto", lang=None):
+def merge_para_with_text(para_block):
    para_text = ''
    for i, line in enumerate(para_block['lines']):

@@ -207,21 +134,11 @@ def merge_para_with_text(para_block, parse_type="auto", lang=None):
        if line_text != '':
            line_lang = detect_lang(line_text)
        for span in line['spans']:
+
            span_type = span['type']
            content = ''
            if span_type == ContentType.Text:
-                content = span['content']
-                # language = detect_lang(content)
-                language = detect_language(content)
-                # 判断是否小语种
-                if lang is not None and lang != 'en':
-                    content = ocr_escape_special_markdown_char(content)
-                else:  # 非小语种逻辑
-                    if language == 'en' and parse_type == 'ocr':  # 只对英文长词进行分词处理，中文分词会丢失文本
-                        content = ocr_escape_special_markdown_char(
-                            split_long_words(content))
-                    else:
-                        content = ocr_escape_special_markdown_char(content)
+                content = ocr_escape_special_markdown_char(span['content'])
            elif span_type == ContentType.InlineEquation:
                content = f" ${span['content']}$ "
            elif span_type == ContentType.InterlineEquation:
@@ -242,74 +159,39 @@ def merge_para_with_text(para_block, parse_type="auto", lang=None):
    return para_text


-def para_to_standard_format(para, img_buket_path):
-    para_content = {}
-    if len(para) == 1:
-        para_content = line_to_standard_format(para[0], img_buket_path)
-    elif len(para) > 1:
-        para_text = ''
-        inline_equation_num = 0
-        for line in para:
-            for span in line['spans']:
-                language = ''
-                span_type = span.get('type')
-                content = ''
-                if span_type == ContentType.Text:
-                    content = span['content']
-                    language = detect_lang(content)
-                    if language == 'en':  # 只对英文长词进行分词处理，中文分词会丢失文本
-                        content = ocr_escape_special_markdown_char(
-                            split_long_words(content))
-                    else:
-                        content = ocr_escape_special_markdown_char(content)
-                elif span_type == ContentType.InlineEquation:
-                    content = f"${span['content']}$"
-                    inline_equation_num += 1
-                if language == 'en':  # 英文语境下 content间需要空格分隔
-                    para_text += content + ' '
-                else:  # 中文语境下，content间不需要空格分隔
-                    para_text += content
-        para_content = {
-            'type': 'text',
-            'text': para_text,
-            'inline_equation_num': inline_equation_num,
-        }
-    return para_content
-
-
-def para_to_standard_format_v2(para_block, img_buket_path, page_idx, parse_type="auto", lang=None, drop_reason=None):
+def para_to_standard_format_v2(para_block, img_buket_path, page_idx, drop_reason=None):
    para_type = para_block['type']
    para_content = {}
-    if para_type == BlockType.Text:
+    if para_type in [BlockType.Text, BlockType.List, BlockType.Index]:
        para_content = {
            'type': 'text',
-            'text': merge_para_with_text(para_block, parse_type=parse_type, lang=lang),
+            'text': merge_para_with_text(para_block),
        }
    elif para_type == BlockType.Title:
        para_content = {
            'type': 'text',
-            'text': merge_para_with_text(para_block, parse_type=parse_type, lang=lang),
+            'text': merge_para_with_text(para_block),
            'text_level': 1,
        }
    elif para_type == BlockType.InterlineEquation:
        para_content = {
            'type': 'equation',
-            'text': merge_para_with_text(para_block, parse_type=parse_type, lang=lang),
+            'text': merge_para_with_text(para_block),
            'text_format': 'latex',
        }
    elif para_type == BlockType.Image:
-        para_content = {'type': 'image'}
+        para_content = {'type': 'image', 'img_caption': [], 'img_footnote': []}
        for block in para_block['blocks']:
            if block['type'] == BlockType.ImageBody:
                para_content['img_path'] = join_path(
                    img_buket_path,
                    block['lines'][0]['spans'][0]['image_path'])
            if block['type'] == BlockType.ImageCaption:
-                para_content['img_caption'] = merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                para_content['img_caption'].append(merge_para_with_text(block))
            if block['type'] == BlockType.ImageFootnote:
-                para_content['img_footnote'] = merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                para_content['img_footnote'].append(merge_para_with_text(block))
    elif para_type == BlockType.Table:
-        para_content = {'type': 'table'}
+        para_content = {'type': 'table', 'table_caption': [], 'table_footnote': []}
        for block in para_block['blocks']:
            if block['type'] == BlockType.TableBody:
                if block["lines"][0]["spans"][0].get('latex', ''):
@@ -318,9 +200,9 @@ def para_to_standard_format_v2(para_block, img_buket_path, page_idx, parse_type=
                    para_content['table_body'] = f"\n\n{block['lines'][0]['spans'][0]['html']}\n\n"
                para_content['img_path'] = join_path(img_buket_path, block["lines"][0]["spans"][0]['image_path'])
            if block['type'] == BlockType.TableCaption:
-                para_content['table_caption'] = merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                para_content['table_caption'].append(merge_para_with_text(block))
            if block['type'] == BlockType.TableFootnote:
-                para_content['table_footnote'] = merge_para_with_text(block, parse_type=parse_type, lang=lang)
+                para_content['table_footnote'].append(merge_para_with_text(block))

    para_content['page_idx'] = page_idx

@@ -330,88 +212,11 @@ def para_to_standard_format_v2(para_block, img_buket_path, page_idx, parse_type=
    return para_content


-def make_standard_format_with_para(pdf_info_dict: list, img_buket_path: str):
-    content_list = []
-    for page_info in pdf_info_dict:
-        paras_of_layout = page_info.get('para_blocks')
-        if not paras_of_layout:
-            continue
-        for para_block in paras_of_layout:
-            para_content = para_to_standard_format_v2(para_block,
-                                                      img_buket_path)
-            content_list.append(para_content)
-    return content_list
-
-
-def line_to_standard_format(line, img_buket_path):
-    line_text = ''
-    inline_equation_num = 0
-    for span in line['spans']:
-        if not span.get('content'):
-            if not span.get('image_path'):
-                continue
-            else:
-                if span['type'] == ContentType.Image:
-                    content = {
-                        'type': 'image',
-                        'img_path': join_path(img_buket_path,
-                                              span['image_path']),
-                    }
-                    return content
-                elif span['type'] == ContentType.Table:
-                    content = {
-                        'type': 'table',
-                        'img_path': join_path(img_buket_path,
-                                              span['image_path']),
-                    }
-                    return content
-        else:
-            if span['type'] == ContentType.InterlineEquation:
-                interline_equation = span['content']
-                content = {
-                    'type': 'equation',
-                    'latex': f'$$\n{interline_equation}\n$$'
-                }
-                return content
-            elif span['type'] == ContentType.InlineEquation:
-                inline_equation = span['content']
-                line_text += f'${inline_equation}$'
-                inline_equation_num += 1
-            elif span['type'] == ContentType.Text:
-                text_content = ocr_escape_special_markdown_char(
-                    span['content'])  # 转义特殊符号
-                line_text += text_content
-    content = {
-        'type': 'text',
-        'text': line_text,
-        'inline_equation_num': inline_equation_num,
-    }
-    return content
-
-
-def ocr_mk_mm_standard_format(pdf_info_dict: list):
-    """content_list type         string
-    image/text/table/equation(行间的单独拿出来，行内的和text合并) latex        string
-    latex文本字段。 text         string      纯文本格式的文本数据。 md           string
-    markdown格式的文本数据。 img_path     string      s3://full/path/to/img.jpg."""
-    content_list = []
-    for page_info in pdf_info_dict:
-        blocks = page_info.get('preproc_blocks')
-        if not blocks:
-            continue
-        for block in blocks:
-            for line in block['lines']:
-                content = line_to_standard_format(line)
-                content_list.append(content)
-    return content_list
-
-
 def union_make(pdf_info_dict: list,
               make_mode: str,
               drop_mode: str,
               img_buket_path: str = '',
-               parse_type: str = "auto",
-               lang=None):
+               ):
    output_content = []
    for page_info in pdf_info_dict:
        drop_reason_flag = False
@@ -438,20 +243,20 @@ def union_make(pdf_info_dict: list,
            continue
        if make_mode == MakeMode.MM_MD:
            page_markdown = ocr_mk_markdown_with_para_core_v2(
-                paras_of_layout, 'mm', img_buket_path, parse_type=parse_type, lang=lang)
+                paras_of_layout, 'mm', img_buket_path)
            output_content.extend(page_markdown)
        elif make_mode == MakeMode.NLP_MD:
            page_markdown = ocr_mk_markdown_with_para_core_v2(
-                paras_of_layout, 'nlp', parse_type=parse_type, lang=lang)
+                paras_of_layout, 'nlp')
            output_content.extend(page_markdown)
        elif make_mode == MakeMode.STANDARD_FORMAT:
            for para_block in paras_of_layout:
                if drop_reason_flag:
                    para_content = para_to_standard_format_v2(
-                        para_block, img_buket_path, page_idx, parse_type=parse_type, lang=lang, drop_reason=drop_reason)
+                        para_block, img_buket_path, page_idx)
                else:
                    para_content = para_to_standard_format_v2(
-                        para_block, img_buket_path, page_idx, parse_type=parse_type, lang=lang)
+                        para_block, img_buket_path, page_idx)
                output_content.append(para_content)
    if make_mode in [MakeMode.MM_MD, MakeMode.NLP_MD]:
        return '\n\n'.join(output_content)

--- a/magic_pdf/libs/Constants.py
+++ b/magic_pdf/libs/Constants.py
@@ -10,18 +10,12 @@ block维度自定义字段
 # block中lines是否被删除
 LINES_DELETED = "lines_deleted"

-# struct eqtable
-STRUCT_EQTABLE = "struct_eqtable"
-
 # table recognition max time default value
 TABLE_MAX_TIME_VALUE = 400

 # pp_table_result_max_length
 TABLE_MAX_LEN = 480

-# pp table structure algorithm
-TABLE_MASTER = "TableMaster"
-
 # table master structure dict
 TABLE_MASTER_DICT = "table_master_structure_dict.txt"

@@ -44,3 +38,16 @@ PP_REC_DIRECTORY = ".paddleocr/whl/rec/ch/ch_PP-OCRv4_rec_infer"
 PP_DET_DIRECTORY = ".paddleocr/whl/det/ch/ch_PP-OCRv4_det_infer"


+class MODEL_NAME:
+    # pp table structure algorithm
+    TABLE_MASTER = "tablemaster"
+    # struct eqtable
+    STRUCT_EQTABLE = "struct_eqtable"
+
+    DocLayout_YOLO = "doclayout_yolo"
+
+    LAYOUTLMv3 = "layoutlmv3"
+
+    YOLO_V8_MFD = "yolo_v8_mfd"
+
+    UniMerNet_v2_Small = "unimernet_small"
\ No newline at end of file
--- a/magic_pdf/libs/boxbase.py
+++ b/magic_pdf/libs/boxbase.py
@@ -445,3 +445,38 @@ def get_overlap_area(bbox1, bbox2):

    # The area of overlap area
    return (x_right - x_left) * (y_bottom - y_top)
+
+
+def calculate_vertical_projection_overlap_ratio(block1, block2):
+    """
+    Calculate the proportion of the x-axis covered by the vertical projection of two blocks.
+
+    Args:
+        block1 (tuple): Coordinates of the first block (x0, y0, x1, y1).
+        block2 (tuple): Coordinates of the second block (x0, y0, x1, y1).
+
+    Returns:
+        float: The proportion of the x-axis covered by the vertical projection of the two blocks.
+    """
+    x0_1, _, x1_1, _ = block1
+    x0_2, _, x1_2, _ = block2
+
+    # Calculate the intersection of the x-coordinates
+    x_left = max(x0_1, x0_2)
+    x_right = min(x1_1, x1_2)
+
+    if x_right < x_left:
+        return 0.0
+
+    # Length of the intersection
+    intersection_length = x_right - x_left
+
+    # Length of the x-axis projection of the first block
+    block1_length = x1_1 - x0_1
+
+    if block1_length == 0:
+        return 0.0
+
+    # Proportion of the x-axis covered by the intersection
+    # logger.info(f"intersection_length: {intersection_length}, block1_length: {block1_length}")
+    return intersection_length / block1_length
--- a/magic_pdf/libs/config_reader.py
+++ b/magic_pdf/libs/config_reader.py
-"""
-根据bucket的名字返回对应的s3 AK， SK，endpoint三元组
-
-"""
+"""根据bucket的名字返回对应的s3 AK， SK，endpoint三元组."""

 import json
 import os

 from loguru import logger

+from magic_pdf.libs.Constants import MODEL_NAME
 from magic_pdf.libs.commons import parse_bucket_key

 # 定义配置文件名常量
-CONFIG_FILE_NAME = "magic-pdf.json"
+CONFIG_FILE_NAME = os.getenv('MINERU_TOOLS_CONFIG_JSON', 'magic-pdf.json')


 def read_config():
-    home_dir = os.path.expanduser("~")
-
+    if os.path.isabs(CONFIG_FILE_NAME):
+        config_file = CONFIG_FILE_NAME
+    else:
+        home_dir = os.path.expanduser('~')
        config_file = os.path.join(home_dir, CONFIG_FILE_NAME)

    if not os.path.exists(config_file):
-        raise FileNotFoundError(f"{config_file} not found")
+        raise FileNotFoundError(f'{config_file} not found')

-    with open(config_file, "r", encoding="utf-8") as f:
+    with open(config_file, 'r', encoding='utf-8') as f:
        config = json.load(f)
    return config


 def get_s3_config(bucket_name: str):
-    """
-    ~/magic-pdf.json 读出来
-    """
+    """~/magic-pdf.json 读出来."""
    config = read_config()

-    bucket_info = config.get("bucket_info")
+    bucket_info = config.get('bucket_info')
    if bucket_name not in bucket_info:
-        access_key, secret_key, storage_endpoint = bucket_info["[default]"]
+        access_key, secret_key, storage_endpoint = bucket_info['[default]']
    else:
        access_key, secret_key, storage_endpoint = bucket_info[bucket_name]

    if access_key is None or secret_key is None or storage_endpoint is None:
-        raise Exception(f"ak, sk or endpoint not found in {CONFIG_FILE_NAME}")
+        raise Exception(f'ak, sk or endpoint not found in {CONFIG_FILE_NAME}')

    # logger.info(f"get_s3_config: ak={access_key}, sk={secret_key}, endpoint={storage_endpoint}")

@@ -49,7 +47,7 @@ def get_s3_config(bucket_name: str):

 def get_s3_config_dict(path: str):
    access_key, secret_key, storage_endpoint = get_s3_config(get_bucket_name(path))
-    return {"ak": access_key, "sk": secret_key, "endpoint": storage_endpoint}
+    return {'ak': access_key, 'sk': secret_key, 'endpoint': storage_endpoint}


 def get_bucket_name(path):
@@ -59,20 +57,20 @@ def get_bucket_name(path):

 def get_local_models_dir():
    config = read_config()
-    models_dir = config.get("models-dir")
+    models_dir = config.get('models-dir')
    if models_dir is None:
        logger.warning(f"'models-dir' not found in {CONFIG_FILE_NAME}, use '/tmp/models' as default")
-        return "/tmp/models"
+        return '/tmp/models'
    else:
        return models_dir


 def get_local_layoutreader_model_dir():
    config = read_config()
-    layoutreader_model_dir = config.get("layoutreader-model-dir")
+    layoutreader_model_dir = config.get('layoutreader-model-dir')
    if layoutreader_model_dir is None or not os.path.exists(layoutreader_model_dir):
-        home_dir = os.path.expanduser("~")
-        layoutreader_at_modelscope_dir_path = os.path.join(home_dir, ".cache/modelscope/hub/ppaanngggg/layoutreader")
+        home_dir = os.path.expanduser('~')
+        layoutreader_at_modelscope_dir_path = os.path.join(home_dir, '.cache/modelscope/hub/ppaanngggg/layoutreader')
        logger.warning(f"'layoutreader-model-dir' not exists, use {layoutreader_at_modelscope_dir_path} as default")
        return layoutreader_at_modelscope_dir_path
    else:
@@ -81,23 +79,43 @@ def get_local_layoutreader_model_dir():

 def get_device():
    config = read_config()
-    device = config.get("device-mode")
+    device = config.get('device-mode')
    if device is None:
        logger.warning(f"'device-mode' not found in {CONFIG_FILE_NAME}, use 'cpu' as default")
-        return "cpu"
+        return 'cpu'
    else:
        return device


 def get_table_recog_config():
    config = read_config()
-    table_config = config.get("table-config")
+    table_config = config.get('table-config')
    if table_config is None:
        logger.warning(f"'table-config' not found in {CONFIG_FILE_NAME}, use 'False' as default")
-        return json.loads('{"is_table_recog_enable": false, "max_time": 400}')
+        return json.loads(f'{{"model": "{MODEL_NAME.TABLE_MASTER}","enable": false, "max_time": 400}}')
    else:
        return table_config


+def get_layout_config():
+    config = read_config()
+    layout_config = config.get("layout-config")
+    if layout_config is None:
+        logger.warning(f"'layout-config' not found in {CONFIG_FILE_NAME}, use '{MODEL_NAME.LAYOUTLMv3}' as default")
+        return json.loads(f'{{"model": "{MODEL_NAME.LAYOUTLMv3}"}}')
+    else:
+        return layout_config
+
+
+def get_formula_config():
+    config = read_config()
+    formula_config = config.get("formula-config")
+    if formula_config is None:
+        logger.warning(f"'formula-config' not found in {CONFIG_FILE_NAME}, use 'True' as default")
+        return json.loads(f'{{"mfd_model": "{MODEL_NAME.YOLO_V8_MFD}","mfr_model": "{MODEL_NAME.UniMerNet_v2_Small}","enable": true}}')
+    else:
+        return formula_config
+
+
 if __name__ == "__main__":
    ak, sk, endpoint = get_s3_config("llm-raw")
--- a/magic_pdf/libs/draw_bbox.py
+++ b/magic_pdf/libs/draw_bbox.py
+from magic_pdf.data.dataset import PymuDocDataset
 from magic_pdf.libs.commons import fitz  # PyMuPDF
 from magic_pdf.libs.Constants import CROSS_PAGE
 from magic_pdf.libs.ocr_content_type import BlockType, CategoryId, ContentType
@@ -62,7 +63,7 @@ def draw_bbox_with_number(i, bbox_list, page, rgb_config, fill_config, draw_bbox
                    overlay=True,
                )  # Draw the rectangle
        page.insert_text(
-            (x1+2, y0 + 10), str(j + 1), fontsize=10, color=new_rgb
+            (x1 + 2, y0 + 10), str(j + 1), fontsize=10, color=new_rgb
        )  # Insert the index in the top left corner of the rectangle


@@ -86,7 +87,7 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
        texts = []
        interequations = []
        lists = []
-        indexs = []
+        indices = []

        for dropped_bbox in page['discarded_blocks']:
            page_dropped_list.append(dropped_bbox['bbox'])
@@ -122,7 +123,7 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
            elif block['type'] == BlockType.List:
                lists.append(bbox)
            elif block['type'] == BlockType.Index:
-                indexs.append(bbox)
+                indices.append(bbox)

        tables_list.append(tables)
        tables_body_list.append(tables_body)
@@ -136,45 +137,61 @@ def draw_layout_bbox(pdf_info, pdf_bytes, out_path, filename):
        texts_list.append(texts)
        interequations_list.append(interequations)
        lists_list.append(lists)
-        indexs_list.append(indexs)
+        indexs_list.append(indices)

    layout_bbox_list = []

+    table_type_order = {
+        'table_caption': 1,
+        'table_body': 2,
+        'table_footnote': 3
+    }
    for page in pdf_info:
        page_block_list = []
        for block in page['para_blocks']:
+            if block['type'] in [
+                BlockType.Text,
+                BlockType.Title,
+                BlockType.InterlineEquation,
+                BlockType.List,
+                BlockType.Index,
+            ]:
                bbox = block['bbox']
                page_block_list.append(bbox)
+            elif block['type'] in [BlockType.Image]:
+                for sub_block in block['blocks']:
+                    bbox = sub_block['bbox']
+                    page_block_list.append(bbox)
+            elif block['type'] in [BlockType.Table]:
+                sorted_blocks = sorted(block['blocks'], key=lambda x: table_type_order[x['type']])
+                for sub_block in sorted_blocks:
+                    bbox = sub_block['bbox']
+                    page_block_list.append(bbox)
+
        layout_bbox_list.append(page_block_list)

    pdf_docs = fitz.open('pdf', pdf_bytes)

    for i, page in enumerate(pdf_docs):

-        draw_bbox_without_number(i, dropped_bbox_list, page, [158, 158, 158],
-                                 True)
-        draw_bbox_without_number(i, tables_list, page, [153, 153, 0],
-                                 True)  # color !
-        draw_bbox_without_number(i, tables_body_list, page, [204, 204, 0],
-                                 True)
-        draw_bbox_without_number(i, tables_caption_list, page, [255, 255, 102],
-                                 True)
-        draw_bbox_without_number(i, tables_footnote_list, page,
-                                 [229, 255, 204], True)
-        draw_bbox_without_number(i, imgs_list, page, [51, 102, 0], True)
+        draw_bbox_without_number(i, dropped_bbox_list, page, [158, 158, 158], True)
+        # draw_bbox_without_number(i, tables_list, page, [153, 153, 0], True)  # color !
+        draw_bbox_without_number(i, tables_body_list, page, [204, 204, 0], True)
+        draw_bbox_without_number(i, tables_caption_list, page, [255, 255, 102], True)
+        draw_bbox_without_number(i, tables_footnote_list, page, [229, 255, 204], True)
+        # draw_bbox_without_number(i, imgs_list, page, [51, 102, 0], True)
        draw_bbox_without_number(i, imgs_body_list, page, [153, 255, 51], True)
-        draw_bbox_without_number(i, imgs_caption_list, page, [102, 178, 255],
-                                 True)
-        draw_bbox_without_number(i, imgs_footnote_list, page, [255, 178, 102],
-                              True),
+        draw_bbox_without_number(i, imgs_caption_list, page, [102, 178, 255], True)
+        draw_bbox_without_number(i, imgs_footnote_list, page, [255, 178, 102], True),
        draw_bbox_without_number(i, titles_list, page, [102, 102, 255], True)
        draw_bbox_without_number(i, texts_list, page, [153, 0, 76], True)
-        draw_bbox_without_number(i, interequations_list, page, [0, 255, 0],
-                                 True)
+        draw_bbox_without_number(i, interequations_list, page, [0, 255, 0], True)
        draw_bbox_without_number(i, lists_list, page, [40, 169, 92], True)
        draw_bbox_without_number(i, indexs_list, page, [40, 169, 92], True)

-        draw_bbox_with_number(i, layout_bbox_list, page, [255, 0, 0], False, draw_bbox=False)
+        draw_bbox_with_number(
+            i, layout_bbox_list, page, [255, 0, 0], False, draw_bbox=False
+        )

    # Save the PDF
    pdf_docs.save(f'{out_path}/{filename}_layout.pdf')
@@ -237,6 +254,8 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename):
                BlockType.Text,
                BlockType.Title,
                BlockType.InterlineEquation,
+                BlockType.List,
+                BlockType.Index,
            ]:
                for line in block['lines']:
                    for span in line['spans']:
@@ -273,7 +292,7 @@ def draw_model_bbox(model_list: list, pdf_bytes, out_path, filename):
    texts_list = []
    interequations_list = []
    pdf_docs = fitz.open('pdf', pdf_bytes)
-    magic_model = MagicModel(model_list, pdf_docs)
+    magic_model = MagicModel(model_list, PymuDocDataset(pdf_bytes))
    for i in range(len(model_list)):
        page_dropped_list = []
        tables_body, tables_caption, tables_footnote = [], [], []
@@ -299,8 +318,7 @@ def draw_model_bbox(model_list: list, pdf_bytes, out_path, filename):
                imgs_body.append(bbox)
            elif layout_det['category_id'] == CategoryId.ImageCaption:
                imgs_caption.append(bbox)
-            elif layout_det[
-                'category_id'] == CategoryId.InterlineEquation_YOLO:
+            elif layout_det['category_id'] == CategoryId.InterlineEquation_YOLO:
                interequations.append(bbox)
            elif layout_det['category_id'] == CategoryId.Abandon:
                page_dropped_list.append(bbox)
@@ -319,18 +337,15 @@ def draw_model_bbox(model_list: list, pdf_bytes, out_path, filename):
        imgs_footnote_list.append(imgs_footnote)

    for i, page in enumerate(pdf_docs):
-        draw_bbox_with_number(i, dropped_bbox_list, page, [158, 158, 158],
-                              True)  # color !
+        draw_bbox_with_number(
+            i, dropped_bbox_list, page, [158, 158, 158], True
+        )  # color !
        draw_bbox_with_number(i, tables_body_list, page, [204, 204, 0], True)
-        draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102],
-                              True)
-        draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204],
-                              True)
+        draw_bbox_with_number(i, tables_caption_list, page, [255, 255, 102], True)
+        draw_bbox_with_number(i, tables_footnote_list, page, [229, 255, 204], True)
        draw_bbox_with_number(i, imgs_body_list, page, [153, 255, 51], True)
-        draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255],
-                              True)
-        draw_bbox_with_number(i, imgs_footnote_list, page, [255, 178, 102],
-                              True)
+        draw_bbox_with_number(i, imgs_caption_list, page, [102, 178, 255], True)
+        draw_bbox_with_number(i, imgs_footnote_list, page, [255, 178, 102], True)
        draw_bbox_with_number(i, titles_list, page, [102, 102, 255], True)
        draw_bbox_with_number(i, texts_list, page, [153, 0, 76], True)
        draw_bbox_with_number(i, interequations_list, page, [0, 255, 0], True)
@@ -345,19 +360,23 @@ def draw_line_sort_bbox(pdf_info, pdf_bytes, out_path, filename):
    for page in pdf_info:
        page_line_list = []
        for block in page['preproc_blocks']:
-            if block['type'] in ['text', 'title', 'interline_equation']:
+            if block['type'] in [BlockType.Text, BlockType.Title, BlockType.InterlineEquation]:
                for line in block['lines']:
                    bbox = line['bbox']
                    index = line['index']
                    page_line_list.append({'index': index, 'bbox': bbox})
-            if block['type'] in ['table', 'image']:
-                bbox = block['bbox']
-                index = block['index']
+            if block['type'] in [BlockType.Image, BlockType.Table]:
+                for sub_block in block['blocks']:
+                    if sub_block['type'] in [BlockType.ImageBody, BlockType.TableBody]:
+                        for line in sub_block['virtual_lines']:
+                            bbox = line['bbox']
+                            index = line['index']
+                            page_line_list.append({'index': index, 'bbox': bbox})
+                    elif sub_block['type'] in [BlockType.ImageCaption, BlockType.TableCaption, BlockType.ImageFootnote, BlockType.TableFootnote]:
+                        for line in sub_block['lines']:
+                            bbox = line['bbox']
+                            index = line['index']
                            page_line_list.append({'index': index, 'bbox': bbox})
-            # for line in block['lines']:
-            #     bbox = line['bbox']
-            #     index = line['index']
-            #     page_line_list.append({'index': index, 'bbox': bbox})
        sorted_bboxes = sorted(page_line_list, key=lambda x: x['index'])
        layout_bbox_list.append(sorted_bbox['bbox'] for sorted_bbox in sorted_bboxes)
    pdf_docs = fitz.open('pdf', pdf_bytes)

--- a/magic_pdf/model/doc_analyze_by_custom_model.py
+++ b/magic_pdf/model/doc_analyze_by_custom_model.py
@@ -5,7 +5,8 @@ import numpy as np
 from loguru import logger

 from magic_pdf.libs.clean_memory import clean_memory
-from magic_pdf.libs.config_reader import get_local_models_dir, get_device, get_table_recog_config
+from magic_pdf.libs.config_reader import get_local_models_dir, get_device, get_table_recog_config, get_layout_config, \
+    get_formula_config
 from magic_pdf.model.model_list import MODEL
 import magic_pdf.model as model_config

@@ -68,14 +69,17 @@ class ModelSingleton:
            cls._instance = super().__new__(cls)
        return cls._instance

-    def get_model(self, ocr: bool, show_log: bool, lang=None):
-        key = (ocr, show_log, lang)
+    def get_model(self, ocr: bool, show_log: bool, lang=None, layout_model=None, formula_enable=None, table_enable=None):
+        key = (ocr, show_log, lang, layout_model, formula_enable, table_enable)
        if key not in self._models:
-            self._models[key] = custom_model_init(ocr=ocr, show_log=show_log, lang=lang)
+            self._models[key] = custom_model_init(ocr=ocr, show_log=show_log, lang=lang, layout_model=layout_model,
+                                                  formula_enable=formula_enable, table_enable=table_enable)
        return self._models[key]


-def custom_model_init(ocr: bool = False, show_log: bool = False, lang=None):
+def custom_model_init(ocr: bool = False, show_log: bool = False, lang=None,
+                      layout_model=None, formula_enable=None, table_enable=None):
+
    model = None

    if model_config.__model_mode__ == "lite":
@@ -95,14 +99,30 @@ def custom_model_init(ocr: bool = False, show_log: bool = False, lang=None):
            # 从配置文件读取model-dir和device
            local_models_dir = get_local_models_dir()
            device = get_device()
+
+            layout_config = get_layout_config()
+            if layout_model is not None:
+                layout_config["model"] = layout_model
+
+            formula_config = get_formula_config()
+            if formula_enable is not None:
+                formula_config["enable"] = formula_enable
+
            table_config = get_table_recog_config()
-            model_input = {"ocr": ocr,
+            if table_enable is not None:
+                table_config["enable"] = table_enable
+
+            model_input = {
+                            "ocr": ocr,
                            "show_log": show_log,
                            "models_dir": local_models_dir,
                            "device": device,
                            "table_config": table_config,
+                            "layout_config": layout_config,
+                            "formula_config": formula_config,
                            "lang": lang,
            }
+
            custom_model = CustomPEKModel(**model_input)
        else:
            logger.error("Not allow model_name!")
@@ -117,10 +137,14 @@ def custom_model_init(ocr: bool = False, show_log: bool = False, lang=None):


 def doc_analyze(pdf_bytes: bytes, ocr: bool = False, show_log: bool = False,
-                start_page_id=0, end_page_id=None, lang=None):
+                start_page_id=0, end_page_id=None, lang=None,
+                layout_model=None, formula_enable=None, table_enable=None):
+
+    if lang == "":
+        lang = None

    model_manager = ModelSingleton()
-    custom_model = model_manager.get_model(ocr, show_log, lang)
+    custom_model = model_manager.get_model(ocr, show_log, lang, layout_model, formula_enable, table_enable)

    with fitz.open("pdf", pdf_bytes) as doc:
        pdf_page_num = doc.page_count

--- a/magic_pdf/model/magic_model.py
+++ b/magic_pdf/model/magic_model.py
 import json

+from magic_pdf.data.dataset import Dataset
 from magic_pdf.libs.boxbase import (_is_in, _is_part_overlap, bbox_distance,
                                    bbox_relative_pos, box_area, calculate_iou,
                                    calculate_overlap_area_in_bbox1_area_ratio,
@@ -9,6 +10,7 @@ from magic_pdf.libs.coordinate_transform import get_scale_ratio
 from magic_pdf.libs.local_math import float_gt
 from magic_pdf.libs.ModelBlockTypeEnum import ModelBlockTypeEnum
 from magic_pdf.libs.ocr_content_type import CategoryId, ContentType
+from magic_pdf.pre_proc.remove_bbox_overlap import _remove_overlap_between_bbox
 from magic_pdf.rw.AbsReaderWriter import AbsReaderWriter
 from magic_pdf.rw.DiskReaderWriter import DiskReaderWriter

@@ -24,7 +26,7 @@ class MagicModel:
            need_remove_list = []
            page_no = model_page_info['page_info']['page_no']
            horizontal_scale_ratio, vertical_scale_ratio = get_scale_ratio(
-                model_page_info, self.__docs[page_no]
+                model_page_info, self.__docs.get_page(page_no)
            )
            layout_dets = model_page_info['layout_dets']
            for layout_det in layout_dets:
@@ -99,7 +101,7 @@ class MagicModel:
            for need_remove in need_remove_list:
                layout_dets.remove(need_remove)

-    def __init__(self, model_list: list, docs: fitz.Document):
+    def __init__(self, model_list: list, docs: Dataset):
        self.__model_list = model_list
        self.__docs = docs
        """为所有模型数据添加bbox信息(缩放，poly->bbox)"""
@@ -123,7 +125,7 @@ class MagicModel:
            l1 = bbox1[2] - bbox1[0]
            l2 = bbox2[2] - bbox2[0]

-        if l2 > l1 and (l2 - l1) / l1 > 0.5:
+        if l2 > l1 and (l2 - l1) / l1 > 0.3:
            return float('inf')

        return bbox_distance(bbox1, bbox2)
@@ -213,9 +215,8 @@ class MagicModel:
        筛选出所有和 merged bbox 有 overlap 且 overlap 面积大于 object 的面积的 subjects。
        再求出筛选出的 subjects 和 object 的最短距离
        """
-        def search_overlap_between_boxes(
-            subject_idx, object_idx
-        ):
+
+        def search_overlap_between_boxes(subject_idx, object_idx):
            idxes = [subject_idx, object_idx]
            x0s = [all_bboxes[idx]['bbox'][0] for idx in idxes]
            y0s = [all_bboxes[idx]['bbox'][1] for idx in idxes]
@@ -243,9 +244,9 @@ class MagicModel:
            for other_object in other_objects:
                ratio = max(
                    ratio,
-                    get_overlap_area(
-                        merged_bbox, other_object['bbox']
-                    ) * 1.0 / box_area(all_bboxes[object_idx]['bbox'])
+                    get_overlap_area(merged_bbox, other_object['bbox'])
+                    * 1.0
+                    / box_area(all_bboxes[object_idx]['bbox']),
                )
                if ratio >= MERGE_BOX_OVERLAP_AREA_RATIO:
                    break
@@ -363,12 +364,17 @@ class MagicModel:
                if all_bboxes[j]['category_id'] == subject_category_id:
                    subject_idx, object_idx = j, i

-                if search_overlap_between_boxes(subject_idx, object_idx) >= MERGE_BOX_OVERLAP_AREA_RATIO:
+                if (
+                    search_overlap_between_boxes(subject_idx, object_idx)
+                    >= MERGE_BOX_OVERLAP_AREA_RATIO
+                ):
                    dis[i][j] = float('inf')
                    dis[j][i] = dis[i][j]
                    continue

-                dis[i][j] = self._bbox_distance(all_bboxes[subject_idx]['bbox'], all_bboxes[object_idx]['bbox'])
+                dis[i][j] = self._bbox_distance(
+                    all_bboxes[subject_idx]['bbox'], all_bboxes[object_idx]['bbox']
+                )
                dis[j][i] = dis[i][j]

        used = set()
@@ -584,6 +590,245 @@ class MagicModel:
                with_caption_subject.add(j)
        return ret, total_subject_object_dis

+    def __tie_up_category_by_distance_v2(
+        self, page_no, subject_category_id, object_category_id
+    ):
+
+        AXIS_MULPLICITY = 0.5
+        subjects = self.__reduct_overlap(
+            list(
+                map(
+                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
+                    filter(
+                        lambda x: x['category_id'] == subject_category_id,
+                        self.__model_list[page_no]['layout_dets'],
+                    ),
+                )
+            )
+        )
+
+        objects = self.__reduct_overlap(
+            list(
+                map(
+                    lambda x: {'bbox': x['bbox'], 'score': x['score']},
+                    filter(
+                        lambda x: x['category_id'] == object_category_id,
+                        self.__model_list[page_no]['layout_dets'],
+                    ),
+                )
+            )
+        )
+        M = len(objects)
+
+        subjects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
+        objects.sort(key=lambda x: x['bbox'][0] ** 2 + x['bbox'][1] ** 2)
+
+        sub_obj_map_h = {i: [] for i in range(len(subjects))}
+
+        dis_by_directions = {
+            'top': [[-1, float('inf')]] * M,
+            'bottom': [[-1, float('inf')]] * M,
+            'left': [[-1, float('inf')]] * M,
+            'right': [[-1, float('inf')]] * M,
+        }
+
+        for i, obj in enumerate(objects):
+            l_x_axis, l_y_axis = (
+                obj['bbox'][2] - obj['bbox'][0],
+                obj['bbox'][3] - obj['bbox'][1],
+            )
+            axis_unit = min(l_x_axis, l_y_axis)
+            for j, sub in enumerate(subjects):
+
+                bbox1, bbox2, _ = _remove_overlap_between_bbox(
+                    objects[i]['bbox'], subjects[j]['bbox']
+                )
+                left, right, bottom, top = bbox_relative_pos(bbox1, bbox2)
+                flags = [left, right, bottom, top]
+                if sum([1 if v else 0 for v in flags]) > 1:
+                    continue
+
+                if left:
+                    if dis_by_directions['left'][i][1] > bbox_distance(
+                        obj['bbox'], sub['bbox']
+                    ):
+                        dis_by_directions['left'][i] = [
+                            j,
+                            bbox_distance(obj['bbox'], sub['bbox']),
+                        ]
+                if right:
+                    if dis_by_directions['right'][i][1] > bbox_distance(
+                        obj['bbox'], sub['bbox']
+                    ):
+                        dis_by_directions['right'][i] = [
+                            j,
+                            bbox_distance(obj['bbox'], sub['bbox']),
+                        ]
+                if bottom:
+                    if dis_by_directions['bottom'][i][1] > bbox_distance(
+                        obj['bbox'], sub['bbox']
+                    ):
+                        dis_by_directions['bottom'][i] = [
+                            j,
+                            bbox_distance(obj['bbox'], sub['bbox']),
+                        ]
+                if top:
+                    if dis_by_directions['top'][i][1] > bbox_distance(
+                        obj['bbox'], sub['bbox']
+                    ):
+                        dis_by_directions['top'][i] = [
+                            j,
+                            bbox_distance(obj['bbox'], sub['bbox']),
+                        ]
+            if dis_by_directions['left'][i][1] != float('inf') or dis_by_directions[
+                'right'
+            ][i][1] != float('inf'):
+                if dis_by_directions['left'][i][1] != float(
+                    'inf'
+                ) and dis_by_directions['right'][i][1] != float('inf'):
+                    if AXIS_MULPLICITY * axis_unit >= abs(
+                        dis_by_directions['left'][i][1]
+                        - dis_by_directions['right'][i][1]
+                    ):
+                        left_sub_bbox = subjects[dis_by_directions['left'][i][0]][
+                            'bbox'
+                        ]
+                        right_sub_bbox = subjects[dis_by_directions['right'][i][0]][
+                            'bbox'
+                        ]
+
+                        left_sub_bbox_y_axis = left_sub_bbox[3] - left_sub_bbox[1]
+                        right_sub_bbox_y_axis = right_sub_bbox[3] - right_sub_bbox[1]
+
+                        if (
+                            abs(left_sub_bbox_y_axis - l_y_axis)
+                            + dis_by_directions['left'][i][0]
+                            > abs(right_sub_bbox_y_axis - l_y_axis)
+                            + dis_by_directions['right'][i][0]
+                        ):
+                            left_or_right = dis_by_directions['right'][i]
+                        else:
+                            left_or_right = dis_by_directions['left'][i]
+                    else:
+                        left_or_right = dis_by_directions['left'][i]
+                        if left_or_right[1] > dis_by_directions['right'][i][1]:
+                            left_or_right = dis_by_directions['right'][i]
+                else:
+                    left_or_right = dis_by_directions['left'][i]
+                    if left_or_right[1] == float('inf'):
+                        left_or_right = dis_by_directions['right'][i]
+            else:
+                left_or_right = [-1, float('inf')]
+
+            if dis_by_directions['top'][i][1] != float('inf') or dis_by_directions[
+                'bottom'
+            ][i][1] != float('inf'):
+                if dis_by_directions['top'][i][1] != float('inf') and dis_by_directions[
+                    'bottom'
+                ][i][1] != float('inf'):
+                    if AXIS_MULPLICITY * axis_unit >= abs(
+                        dis_by_directions['top'][i][1]
+                        - dis_by_directions['bottom'][i][1]
+                    ):
+                        top_bottom = subjects[dis_by_directions['bottom'][i][0]]['bbox']
+                        bottom_top = subjects[dis_by_directions['top'][i][0]]['bbox']
+
+                        top_bottom_x_axis = top_bottom[2] - top_bottom[0]
+                        bottom_top_x_axis = bottom_top[2] - bottom_top[0]
+                        if abs(top_bottom_x_axis - l_x_axis) + dis_by_directions['bottom'][i][1] > abs(
+                            bottom_top_x_axis - l_x_axis
+                        ) + dis_by_directions['top'][i][1]:
+                            top_or_bottom = dis_by_directions['top'][i]
+                        else:
+                            top_or_bottom = dis_by_directions['bottom'][i]
+                    else:
+                        top_or_bottom = dis_by_directions['top'][i]
+                        if top_or_bottom[1] > dis_by_directions['bottom'][i][1]:
+                            top_or_bottom = dis_by_directions['bottom'][i]
+                else:
+                    top_or_bottom = dis_by_directions['top'][i]
+                    if top_or_bottom[1] == float('inf'):
+                        top_or_bottom = dis_by_directions['bottom'][i]
+            else:
+                top_or_bottom = [-1, float('inf')]
+
+            if left_or_right[1] != float('inf') or top_or_bottom[1] != float('inf'):
+                if left_or_right[1] != float('inf') and top_or_bottom[1] != float(
+                    'inf'
+                ):
+                    if AXIS_MULPLICITY * axis_unit >= abs(
+                        left_or_right[1] - top_or_bottom[1]
+                    ):
+                        y_axis_bbox = subjects[left_or_right[0]]['bbox']
+                        x_axis_bbox = subjects[top_or_bottom[0]]['bbox']
+
+                        if (
+                            abs((x_axis_bbox[2] - x_axis_bbox[0]) - l_x_axis) / l_x_axis
+                            > abs((y_axis_bbox[3] - y_axis_bbox[1]) - l_y_axis)
+                            / l_y_axis
+                        ):
+                            sub_obj_map_h[left_or_right[0]].append(i)
+                        else:
+                            sub_obj_map_h[top_or_bottom[0]].append(i)
+                    else:
+                        if left_or_right[1] > top_or_bottom[1]:
+                            sub_obj_map_h[top_or_bottom[0]].append(i)
+                        else:
+                            sub_obj_map_h[left_or_right[0]].append(i)
+                else:
+                    if left_or_right[1] != float('inf'):
+                        sub_obj_map_h[left_or_right[0]].append(i)
+                    else:
+                        sub_obj_map_h[top_or_bottom[0]].append(i)
+        ret = []
+        for i in sub_obj_map_h.keys():
+            ret.append(
+                {
+                    'sub_bbox': {
+                        'bbox': subjects[i]['bbox'],
+                        'score': subjects[i]['score'],
+                    },
+                    'obj_bboxes': [
+                        {'score': objects[j]['score'], 'bbox': objects[j]['bbox']}
+                        for j in sub_obj_map_h[i]
+                    ],
+                    'sub_idx': i,
+                }
+            )
+        return ret
+
+    def get_imgs_v2(self, page_no: int):
+        with_captions = self.__tie_up_category_by_distance_v2(page_no, 3, 4)
+        with_footnotes = self.__tie_up_category_by_distance_v2(
+            page_no, 3, CategoryId.ImageFootnote
+        )
+        ret = []
+        for v in with_captions:
+            record = {
+                'image_body': v['sub_bbox'],
+                'image_caption_list': v['obj_bboxes'],
+            }
+            filter_idx = v['sub_idx']
+            d = next(filter(lambda x: x['sub_idx'] == filter_idx, with_footnotes))
+            record['image_footnote_list'] = d['obj_bboxes']
+            ret.append(record)
+        return ret
+
+    def get_tables_v2(self, page_no: int) -> list:
+        with_captions = self.__tie_up_category_by_distance_v2(page_no, 5, 6)
+        with_footnotes = self.__tie_up_category_by_distance_v2(page_no, 5, 7)
+        ret = []
+        for v in with_captions:
+            record = {
+                'table_body': v['sub_bbox'],
+                'table_caption_list': v['obj_bboxes'],
+            }
+            filter_idx = v['sub_idx']
+            d = next(filter(lambda x: x['sub_idx'] == filter_idx, with_footnotes))
+            record['table_footnote_list'] = d['obj_bboxes']
+            ret.append(record)
+        return ret
+
    def get_imgs(self, page_no: int):
        with_captions, _ = self.__tie_up_category_by_distance(page_no, 3, 4)
        with_footnotes, _ = self.__tie_up_category_by_distance(
@@ -717,10 +962,10 @@ class MagicModel:

    def get_page_size(self, page_no: int):  # 获取页面宽高
        # 获取当前页的page对象
-        page = self.__docs[page_no]
+        page = self.__docs.get_page(page_no).get_page_info()
        # 获取当前页的宽高
-        page_w = page.rect.width
-        page_h = page.rect.height
+        page_w = page.w
+        page_h = page.h
        return page_w, page_h

    def __get_blocks_by_type(

--- a/magic_pdf/model/mfr_cudagraph.py
+++ b/magic_pdf/model/mfr_cudagraph.py
+from typing import Optional, Tuple, Union
+import torch
+from torch import nn
+import os
+from unimernet.common.config import Config
+import unimernet.tasks as tasks
+import argparse
+from transformers.modeling_outputs import BaseModelOutputWithPastAndCrossAttentions
+from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_causal_attention_mask
+
+class PatchedMBartLearnedPositionalEmbedding(nn.Module):
+
+    def __init__(self, origin: nn.Module):
+        super().__init__()
+        self.offset = origin.offset
+        self.embedding = nn.Embedding(origin.num_embeddings, origin.embedding_dim)
+        self.embedding.weight.data = origin.weight.data
+
+    def forward(self, input_ids: torch.Tensor, past_key_values_length: int = 0):
+        """`input_ids' shape is expected to be [bsz x seqlen]."""
+
+        bsz, seq_len = input_ids.shape[:2]
+        positions = torch.arange(0, seq_len, dtype=torch.long, device=self.embedding.weight.device
+        )
+        positions += past_key_values_length
+        positions = positions.expand(bsz, -1)
+
+        return self.embedding(positions + self.offset)
+
+
+class PatchedMBartDecoder(nn.Module):
+    def __init__(self, origin: nn.Module, kvlen: torch.LongTensor):
+        super().__init__()
+        self.origin = origin
+        self.kvlen = kvlen
+
+        self.config = origin.config
+        self.embed_tokens = origin.embed_tokens
+        self.embed_scale = origin.embed_scale
+        self._use_flash_attention_2 = origin._use_flash_attention_2
+        self.embed_positions = origin.embed_positions
+        self.counting_context_weight = getattr(origin, 'counting_context_weight', None)
+        self.layernorm_embedding = origin.layernorm_embedding
+        self.layers = origin.layers
+        self.layer_norm = origin.layer_norm
+
+        self.patched_embed_positions = PatchedMBartLearnedPositionalEmbedding(self.embed_positions)
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        count_pred: Optional[torch.FloatTensor] = None,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        encoder_attention_mask: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        cross_attn_head_mask: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPastAndCrossAttentions]:
+        run_origin = False
+        if past_key_values is None:
+            run_origin = True
+        elif past_key_values[0][0].size(-2) < attention_mask.size(-1):
+            run_origin = True
+
+        if run_origin:
+            return self.origin(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                count_pred=count_pred,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                head_mask=head_mask,
+                cross_attn_head_mask=cross_attn_head_mask,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                use_cache=use_cache,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+            )
+
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+        elif input_ids is not None:
+            input = input_ids
+            input_shape = input.size()
+            input_ids = input_ids.view(-1, input_shape[-1])
+        elif inputs_embeds is not None:
+            input_shape = inputs_embeds.size()[:-1]
+            input = inputs_embeds[:, :, -1]
+        else:
+            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+
+        # past_key_values_length
+        past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
+
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids) * self.embed_scale
+
+        if self._use_flash_attention_2:
+            # 2d mask is passed through the layers
+            attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+        else:
+            # 4d mask is passed through the layers
+            attention_mask = _prepare_4d_causal_attention_mask(
+                attention_mask, input_shape, inputs_embeds, past_key_values_length
+            )
+
+        # expand encoder attention mask
+        if encoder_hidden_states is not None and encoder_attention_mask is not None:
+            if self._use_flash_attention_2:
+                encoder_attention_mask = encoder_attention_mask if 0 in encoder_attention_mask else None
+            else:
+                # [bsz, seq_len] -> [bsz, 1, tgt_seq_len, src_seq_len]
+                encoder_attention_mask = _prepare_4d_attention_mask(
+                    encoder_attention_mask, inputs_embeds.dtype, tgt_len=input_shape[-1]
+                )
+
+        # embed positions
+        positions = self.patched_embed_positions(input, self.kvlen)
+
+        hidden_states = inputs_embeds + positions.to(inputs_embeds.device)
+
+        # TODO: add counting context weight to hidden_states
+        if count_pred is not None:
+            count_context_weight = self.counting_context_weight(count_pred)
+            hidden_states = hidden_states + 0.5 * count_context_weight.unsqueeze(1)
+        hidden_states = self.layernorm_embedding(hidden_states)
+
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        all_cross_attentions = () if (output_attentions and encoder_hidden_states is not None) else None
+        next_decoder_cache = () if use_cache else None
+
+        # check if head_mask/cross_attn_head_mask has a correct number of layers specified if desired
+        for attn_mask, mask_name in zip([head_mask, cross_attn_head_mask], ["head_mask", "cross_attn_head_mask"]):
+            if attn_mask is not None:
+                if attn_mask.size()[0] != len(self.layers):
+                    raise ValueError(
+                        f"The `{mask_name}` should be specified for {len(self.layers)} layers, but it is for"
+                        f" {attn_mask.size()[0]}."
+                    )
+        for idx, decoder_layer in enumerate(self.layers):
+            # add LayerDrop (see https://arxiv.org/abs/1909.11556 for description)
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+
+            past_key_value = past_key_values[idx] if past_key_values is not None else None
+            layer_outputs = decoder_layer(
+                hidden_states,
+                attention_mask=attention_mask,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                layer_head_mask=(head_mask[idx] if head_mask is not None else None),
+                cross_attn_layer_head_mask=(
+                    cross_attn_head_mask[idx] if cross_attn_head_mask is not None else None
+                ),
+                past_key_value=past_key_value,
+                output_attentions=output_attentions,
+                use_cache=use_cache,
+            )
+            hidden_states = layer_outputs[0]
+
+            if use_cache:
+                next_decoder_cache += (layer_outputs[3 if output_attentions else 1],)
+
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+
+                if encoder_hidden_states is not None:
+                    all_cross_attentions += (layer_outputs[2],)
+
+        hidden_states = self.layer_norm(hidden_states)
+
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+
+        next_cache = next_decoder_cache if use_cache else None
+        if not return_dict:
+            return tuple(
+                v
+                for v in [hidden_states, next_cache, all_hidden_states, all_self_attns, all_cross_attentions]
+                if v is not None
+            )
+        return BaseModelOutputWithPastAndCrossAttentions(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+            cross_attentions=all_cross_attentions,
+        )
+
+
+class PatchedMBartAttention(nn.Module):
+
+    def __init__(self, origin: nn.Module, kvlen: torch.LongTensor):
+        super().__init__()
+        self.embed_dim = origin.embed_dim
+        self.num_heads = origin.num_heads
+        self.dropout = origin.dropout
+        self.head_dim = origin.head_dim
+        self.config = origin.config
+
+        self.scaling = origin.scaling
+        self.is_decoder = origin.is_decoder
+        self.is_causal = origin.is_causal
+
+        self.k_proj = origin.k_proj
+        self.v_proj = origin.v_proj
+        self.q_proj = origin.q_proj
+        self.out_proj = origin.out_proj
+        self.kvlen = kvlen
+
+    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        key_value_states: Optional[torch.Tensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        layer_head_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        """Input shape: Batch x Time x Channel"""
+
+        # if key_value_states are provided this layer is used as a cross-attention layer
+        # for the decoder
+        is_cross_attention = key_value_states is not None
+
+        bsz, tgt_len, _ = hidden_states.size()
+
+        # get query proj
+        query_states = self.q_proj(hidden_states) * self.scaling
+        # get key, value proj
+        # `past_key_value[0].shape[2] == key_value_states.shape[1]`
+        # is checking that the `sequence_length` of the `past_key_value` is the same as
+        # the provided `key_value_states` to support prefix tuning
+        if (
+            is_cross_attention
+            and past_key_value is not None
+            and past_key_value[0].shape[2] == key_value_states.shape[1]
+        ):
+            # reuse k,v, cross_attentions
+            key_states = past_key_value[0]
+            value_states = past_key_value[1]
+        elif is_cross_attention:
+            # cross_attentions
+            key_states = self._shape(self.k_proj(key_value_states), -1, bsz)
+            value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
+        elif past_key_value is not None:
+            # reuse k, v, self_attention
+            key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+            value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+
+            if past_key_value[0].size(-2) < attention_mask.size(-1):
+                key_states = torch.cat([past_key_value[0], key_states], dim=2)
+                value_states = torch.cat([past_key_value[1], value_states], dim=2)
+            else:
+                past_key_value[0][:, :, self.kvlen[None]] = key_states
+                past_key_value[1][:, :, self.kvlen[None]] = value_states
+                key_states = past_key_value[0]
+                value_states = past_key_value[1]
+        else:
+            # self_attention
+            key_states = self._shape(self.k_proj(hidden_states), -1, bsz)
+            value_states = self._shape(self.v_proj(hidden_states), -1, bsz)
+
+        if self.is_decoder:
+            past_key_value = (key_states, value_states)
+
+        proj_shape = (bsz * self.num_heads, -1, self.head_dim)
+        query_states = self._shape(query_states, tgt_len, bsz).view(*proj_shape)
+        key_states = key_states.reshape(*proj_shape)
+        value_states = value_states.reshape(*proj_shape)
+
+        src_len = key_states.size(1)
+        attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
+
+        if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
+            raise ValueError(
+                f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
+                f" {attn_weights.size()}"
+            )
+
+        if attention_mask is not None:
+            if attention_mask.size() != (bsz, 1, tgt_len, src_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
+                )
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+
+        if layer_head_mask is not None:
+            if layer_head_mask.size() != (self.num_heads,):
+                raise ValueError(
+                    f"Head mask for a single layer should be of size {(self.num_heads,)}, but is"
+                    f" {layer_head_mask.size()}"
+                )
+            attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if output_attentions:
+            # this operation is a bit awkward, but it's required to
+            # make sure that attn_weights keeps its gradient.
+            # In order to do so, attn_weights have to be reshaped
+            # twice and have to be reused in the following
+            attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
+        else:
+            attn_weights_reshaped = None
+
+        attn_probs = attn_weights
+
+        attn_output = torch.bmm(attn_probs, value_states)
+
+        if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
+            raise ValueError(
+                f"`attn_output` should be of size {(bsz * self.num_heads, tgt_len, self.head_dim)}, but is"
+                f" {attn_output.size()}"
+            )
+
+        attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
+        attn_output = attn_output.transpose(1, 2)
+
+        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
+        # partitioned across GPUs when using tensor-parallelism.
+        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+
+        # attn_output = self.out_proj(attn_output)
+        attn_output = self.out_proj(attn_output)
+
+        return attn_output, attn_weights_reshaped, past_key_value
+
+
+class PatchedMBartSqueezeAttention(nn.Module):
+
+    def __init__(self, origin: nn.Module, kvlen: torch.LongTensor):
+        super().__init__()
+        self.embed_dim = origin.embed_dim
+        self.num_heads = origin.num_heads
+        self.dropout = origin.dropout
+        self.head_dim = origin.head_dim
+        self.squeeze_head_dim=origin.squeeze_head_dim
+        self.config = origin.config
+
+        self.scaling = origin.scaling
+        self.is_decoder = origin.is_decoder
+        self.scaling = origin.scaling
+
+        self.q_proj = origin.q_proj
+        self.k_proj = origin.k_proj
+        self.v_proj = origin.v_proj
+        self.out_proj = origin.out_proj
+        self.kvlen = kvlen
+
+    def _shape_qk(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.squeeze_head_dim).transpose(1, 2).contiguous()
+
+    def _shape_v(self, tensor: torch.Tensor, seq_len: int, bsz: int):
+        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        key_value_states: Optional[torch.Tensor] = None,
+        past_key_value: Optional[Tuple[torch.Tensor]] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        layer_head_mask: Optional[torch.Tensor] = None,
+        output_attentions: bool = False,
+    ) -> Tuple[torch.Tensor, Optional[torch.Tensor], Optional[Tuple[torch.Tensor]]]:
+        """Input shape: Batch x Time x Channel"""
+
+        # if key_value_states are provided this layer is used as a cross-attention layer
+        # for the decoder
+        is_cross_attention = key_value_states is not None
+
+        bsz, tgt_len, _ = hidden_states.size()
+
+        # get query proj
+        query_states = self.q_proj(hidden_states) * self.scaling
+        # get key, value proj
+        # `past_key_value[0].shape[2] == key_value_states.shape[1]`
+        # is checking that the `sequence_length` of the `past_key_value` is the same as
+        # the provided `key_value_states` to support prefix tuning
+        if (
+            is_cross_attention
+            and past_key_value is not None
+            and past_key_value[0].shape[2] == key_value_states.shape[1]
+        ):
+            # reuse k,v, cross_attentions
+            key_states = past_key_value[0]
+            value_states = past_key_value[1]
+        elif is_cross_attention:
+            # cross_attentions
+            key_states = self._shape_qk(self.k_proj(key_value_states), -1, bsz)
+            value_states = self._shape_v(self.v_proj(key_value_states), -1, bsz)
+        elif past_key_value is not None:
+            # reuse k, v, self_attention
+            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
+            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
+
+            if past_key_value[0].size(-2) < attention_mask.size(-1):
+                key_states = torch.cat([past_key_value[0], key_states], dim=2)
+                value_states = torch.cat([past_key_value[1], value_states], dim=2)
+            else:
+                past_key_value[0][:, :, self.kvlen[None]] = key_states
+                past_key_value[1][:, :, self.kvlen[None]] = value_states
+                key_states = past_key_value[0]
+                value_states = past_key_value[1]
+        else:
+            # self_attention
+            key_states = self._shape_qk(self.k_proj(hidden_states), -1, bsz)
+            value_states = self._shape_v(self.v_proj(hidden_states), -1, bsz)
+
+        if self.is_decoder:
+            # if cross_attention save Tuple(torch.Tensor, torch.Tensor) of all cross attention key/value_states.
+            # Further calls to cross_attention layer can then reuse all cross-attention
+            # key/value_states (first "if" case)
+            # if uni-directional self-attention (decoder) save Tuple(torch.Tensor, torch.Tensor) of
+            # all previous decoder key/value_states. Further calls to uni-directional self-attention
+            # can concat previous decoder key/value_states to current projected key/value_states (third "elif" case)
+            # if encoder bi-directional self-attention `past_key_value` is always `None`
+            past_key_value = (key_states, value_states)
+
+        proj_shape = (bsz * self.num_heads, -1, self.squeeze_head_dim)
+        value_shape = (bsz * self.num_heads, -1, self.head_dim)
+        query_states = self._shape_qk(query_states, tgt_len, bsz).view(*proj_shape)
+        key_states = key_states.reshape(*proj_shape)
+        value_states = value_states.reshape(*value_shape)
+
+        src_len = key_states.size(1)
+        attn_weights = torch.bmm(query_states, key_states.transpose(1, 2))
+
+        if attn_weights.size() != (bsz * self.num_heads, tgt_len, src_len):
+            raise ValueError(
+                f"Attention weights should be of size {(bsz * self.num_heads, tgt_len, src_len)}, but is"
+                f" {attn_weights.size()}"
+            )
+
+        if attention_mask is not None:
+            if attention_mask.size() != (bsz, 1, tgt_len, src_len):
+                raise ValueError(
+                    f"Attention mask should be of size {(bsz, 1, tgt_len, src_len)}, but is {attention_mask.size()}"
+                )
+            attn_weights = attn_weights.view(bsz, self.num_heads, tgt_len, src_len) + attention_mask
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        attn_weights = nn.functional.softmax(attn_weights, dim=-1)
+
+        if layer_head_mask is not None:
+            if layer_head_mask.size() != (self.num_heads,):
+                raise ValueError(
+                    f"Head mask for a single layer should be of size {(self.num_heads,)}, but is"
+                    f" {layer_head_mask.size()}"
+                )
+            attn_weights = layer_head_mask.view(1, -1, 1, 1) * attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights.view(bsz * self.num_heads, tgt_len, src_len)
+
+        if output_attentions:
+            # this operation is a bit awkward, but it's required to
+            # make sure that attn_weights keeps its gradient.
+            # In order to do so, attn_weights have to be reshaped
+            # twice and have to be reused in the following
+            attn_weights_reshaped = attn_weights.view(bsz, self.num_heads, tgt_len, src_len)
+            attn_weights = attn_weights_reshaped.view(bsz * self.num_heads, tgt_len, src_len)
+        else:
+            attn_weights_reshaped = None
+
+        attn_probs = nn.functional.dropout(attn_weights, p=self.dropout, training=self.training)
+
+        attn_output = torch.bmm(attn_probs, value_states)
+
+        if attn_output.size() != (bsz * self.num_heads, tgt_len, self.head_dim):
+            raise ValueError(
+                f"`attn_output` should be of size {(bsz * self.num_heads, tgt_len, self.head_dim)}, but is"
+                f" {attn_output.size()}"
+            )
+
+        attn_output = attn_output.view(bsz, self.num_heads, tgt_len, self.head_dim)
+        attn_output = attn_output.transpose(1, 2)
+
+        # Use the `embed_dim` from the config (stored in the class) rather than `hidden_state` because `attn_output` can be
+        # partitioned across GPUs when using tensor-parallelism.
+        attn_output = attn_output.reshape(bsz, tgt_len, self.embed_dim)
+
+        attn_output = self.out_proj(attn_output)
+
+        return attn_output, attn_weights_reshaped, past_key_value
+
+def patch_model(model: nn.Module, kvlen: torch.LongTensor):
+    for name, child in model.named_children():
+        cls_name = type(child).__name__
+        if cls_name == 'MBartAttention':
+            patched_child = PatchedMBartAttention(child, kvlen)
+            model.register_module(name, patched_child)
+        elif cls_name == 'MBartSqueezeAttention':
+            patched_child = PatchedMBartSqueezeAttention(child, kvlen)
+            model.register_module(name, patched_child)
+        else:
+            patch_model(child, kvlen)
+
+    cls_name = type(model).__name__
+    if cls_name == 'CustomMBartDecoder':
+        model = PatchedMBartDecoder(model, kvlen)
+    return model
+
+
+def next_power_of_2(n: int):
+    """Return the smallest power of 2 greater than or equal to n."""
+    n -= 1
+    n |= n >> 1
+    n |= n >> 2
+    n |= n >> 4
+    n |= n >> 8
+    n |= n >> 16
+    n |= n >> 32
+    n += 1
+    return n
+
+
+def get_graph_key(batch_size: int, kvlens: int):
+    batch_size = next_power_of_2(batch_size)
+    kvlens = next_power_of_2(kvlens)
+
+    batch_size = max(8, batch_size)
+    kvlens = max(32, kvlens)
+
+    return batch_size, kvlens
+
+
+class GraphRunnerImpl:
+
+    def __init__(self, model: nn.Module, graph: torch.cuda.CUDAGraph, input_buffers: dict, output_buffers: dict):
+        self.model = model
+        self.graph = graph
+        self.input_buffers = input_buffers
+        self.output_buffers = output_buffers
+
+    @staticmethod
+    def extract_input_buffers(input_buffers: dict, batch_size: int, kvlens: int):
+        input_ids = input_buffers['input_ids'][:batch_size]
+        attention_mask = input_buffers['attention_mask'][:batch_size, :kvlens]
+        encoder_hidden_states = input_buffers['encoder_hidden_states'][:batch_size]
+        kvlen=input_buffers['kvlen']
+
+        past_key_values = []
+        for past_key_value in input_buffers['past_key_values']:
+            k0 = past_key_value[0][:batch_size, :, :kvlens]
+            v0 = past_key_value[1][:batch_size, :, :kvlens]
+            k1 = past_key_value[2][:batch_size]
+            v1 = past_key_value[3][:batch_size]
+            past_key_values.append((k0, v0, k1, v1))
+
+        input_buffers = dict(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            past_key_values=past_key_values,
+            kvlen=kvlen,
+        )
+        return input_buffers
+
+    @staticmethod
+    def fill_input_buffers(
+        input_buffer: dict,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+        ):
+        batch_size = input_ids.size(0)
+        kvlens = attention_mask.size(1)
+
+        input_buffer['input_ids'][:batch_size] = input_ids
+
+        if input_buffer['attention_mask'].data_ptr() != attention_mask.data_ptr():
+            input_buffer['attention_mask'].fill_(0)
+        input_buffer['attention_mask'][:batch_size, :kvlens] = attention_mask
+        input_buffer['encoder_hidden_states'][:batch_size] = encoder_hidden_states
+
+        if past_key_values is not None:
+            for buf_kv, kv in zip(input_buffer['past_key_values'], past_key_values):
+                idx = 0
+                if buf_kv[idx].data_ptr() != kv[idx].data_ptr():
+                    buf_kv[idx].fill_(0)
+                    buf_kv[idx][:batch_size, :, :kvlens-1] = kv[idx]
+                idx = 1
+                if buf_kv[idx].data_ptr() != kv[idx].data_ptr():
+                    buf_kv[idx].fill_(0)
+                    buf_kv[idx][:batch_size, :, :kvlens-1] = kv[idx]
+
+                idx = 2
+                if buf_kv[idx].data_ptr() != kv[idx].data_ptr():
+                    buf_kv[idx].fill_(0)
+                    buf_kv[idx][:batch_size] = kv[idx]
+                idx = 3
+                if buf_kv[idx].data_ptr() != kv[idx].data_ptr():
+                    buf_kv[idx].fill_(0)
+                    buf_kv[idx][:batch_size] = kv[idx]
+
+        input_buffer['kvlen'].fill_(kvlens - 1)
+
+    @classmethod
+    @torch.inference_mode()
+    def capture(cls,
+                model: nn.Module,
+                input_buffers: dict,
+                pool,
+                warmup: bool = False,
+                input_ids: torch.LongTensor = None,
+                attention_mask: Optional[torch.Tensor] = None,
+                count_pred: Optional[torch.FloatTensor] = None,
+                encoder_hidden_states: Optional[torch.FloatTensor] = None,
+                encoder_attention_mask: Optional[torch.LongTensor] = None,
+                head_mask: Optional[torch.Tensor] = None,
+                cross_attn_head_mask: Optional[torch.Tensor] = None,
+                past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+                inputs_embeds: Optional[torch.FloatTensor] = None,
+                use_cache: Optional[bool] = None,
+                output_attentions: Optional[bool] = None,
+                output_hidden_states: Optional[bool] = None,
+                return_dict: Optional[bool] = None,):
+        batch_size = input_ids.size(0)
+        kvlens = attention_mask.size(1)
+
+        graph_key = get_graph_key(batch_size, kvlens)
+        batch_size = graph_key[0]
+        kvlens = graph_key[1]
+
+        input_buffers = cls.extract_input_buffers(input_buffers,
+                                                  batch_size=batch_size,
+                                                  kvlens=kvlens)
+        cls.fill_input_buffers(input_buffers,
+                               input_ids,
+                               attention_mask,
+                               encoder_hidden_states,
+                               past_key_values)
+
+        input_ids = input_buffers['input_ids']
+        attention_mask = input_buffers['attention_mask']
+        encoder_hidden_states = input_buffers['encoder_hidden_states']
+        past_key_values = input_buffers['past_key_values']
+
+        if warmup:
+            # warmup
+            model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                count_pred=count_pred,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                head_mask=head_mask,
+                cross_attn_head_mask=cross_attn_head_mask,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                use_cache=use_cache,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict)
+
+        graph = torch.cuda.CUDAGraph()
+        with torch.cuda.graph(graph,
+                              pool=pool):
+            outputs = model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                count_pred=count_pred,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                head_mask=head_mask,
+                cross_attn_head_mask=cross_attn_head_mask,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                use_cache=use_cache,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict)
+
+        output_buffers = dict(
+            last_hidden_state=outputs['last_hidden_state'],
+            past_key_values=outputs['past_key_values'],
+            )
+
+        return GraphRunnerImpl(model, graph, input_buffers, output_buffers)
+
+    def __call__(self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        count_pred: Optional[torch.FloatTensor] = None,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        encoder_attention_mask: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        cross_attn_head_mask: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        ):
+        batch_size = input_ids.size(0)
+        kvlens = attention_mask.size(1)
+        self.fill_input_buffers(self.input_buffers,
+                               input_ids,
+                               attention_mask,
+                               encoder_hidden_states,
+                               past_key_values)
+
+        self.graph.replay()
+
+        last_hidden_state = self.output_buffers['last_hidden_state'][:batch_size]
+
+        past_key_values = []
+        for past_key_value in self.output_buffers['past_key_values']:
+            k0 = past_key_value[0][:batch_size, :, :kvlens]
+            v0 = past_key_value[1][:batch_size, :, :kvlens]
+            k1 = past_key_value[2][:batch_size]
+            v1 = past_key_value[3][:batch_size]
+            past_key_values.append((k0, v0, k1, v1))
+
+        outputs = BaseModelOutputWithPastAndCrossAttentions(
+            last_hidden_state=last_hidden_state,
+            past_key_values=past_key_values,
+        )
+        return outputs
+
+class GraphRunner(nn.Module):
+
+    def __init__(self, model: nn.Module, max_batchs: int, max_kvlens: int, dtype:torch.dtype = torch.float16, device: torch.device = 'cuda'):
+        super().__init__()
+
+        self.kvlen = torch.tensor(0, dtype=torch.long, device=device)
+        model = patch_model(model.to(dtype), self.kvlen)
+        self.model = model
+        self.max_batchs = max_batchs
+        self.max_kvlens = max_kvlens
+        self.device = device
+
+        self.input_buffers = None
+
+        self.impl_map = dict()
+        self.graph_pool_handle = torch.cuda.graph_pool_handle()
+        self.warmuped = False
+
+    def create_buffers(self, encoder_kvlens: int, dtype: torch.dtype):
+        max_batchs = self.max_batchs
+        max_kvlens = self.max_kvlens
+        device = self.device
+        config = self.model.config
+
+        d_model = config.d_model
+        decoder_layers = config.decoder_layers
+        num_heads = config.decoder_attention_heads
+
+        head_dim = d_model // num_heads
+        self_attn = self.model.layers[0].self_attn
+        qk_head_dim = getattr(self_attn, 'squeeze_head_dim', head_dim)
+
+        input_ids = torch.ones((max_batchs, 1), dtype=torch.int64, device=device)
+        attention_mask = torch.zeros((max_batchs, max_kvlens), dtype=torch.int64, device=device)
+        encoder_hidden_states = torch.zeros((max_batchs, encoder_kvlens, d_model), dtype=dtype, device=device)
+
+        past_key_values = []
+        for _ in range(decoder_layers):
+            k0 = torch.zeros((max_batchs, num_heads, max_kvlens, qk_head_dim), dtype=dtype, device=device)
+            v0 = torch.zeros((max_batchs, num_heads, max_kvlens, head_dim), dtype=dtype, device=device)
+            k1 = torch.zeros((max_batchs, num_heads, encoder_kvlens, qk_head_dim), dtype=dtype, device=device)
+            v1 = torch.zeros((max_batchs, num_heads, encoder_kvlens, head_dim), dtype=dtype, device=device)
+
+            past_key_values.append((k0, v0, k1, v1))
+
+        self.input_buffers = dict(
+            input_ids=input_ids,
+            attention_mask=attention_mask,
+            encoder_hidden_states=encoder_hidden_states,
+            past_key_values=past_key_values,
+            kvlen=self.kvlen
+        )
+
+    @torch.inference_mode()
+    def forward(self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        count_pred: Optional[torch.FloatTensor] = None,
+        encoder_hidden_states: Optional[torch.FloatTensor] = None,
+        encoder_attention_mask: Optional[torch.LongTensor] = None,
+        head_mask: Optional[torch.Tensor] = None,
+        cross_attn_head_mask: Optional[torch.Tensor] = None,
+        past_key_values: Optional[Tuple[Tuple[torch.FloatTensor]]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+        ):
+        batch_size, qlens = input_ids.size()
+        kvlens = attention_mask.size(1)
+
+        eager_mode = False
+
+        if qlens != 1:
+            eager_mode = True
+
+        if past_key_values is None:
+            eager_mode = True
+        else:
+            for past_key_value in past_key_values:
+                if past_key_value is None:
+                    eager_mode = True
+                    break
+
+        if batch_size >= self.max_batchs or kvlens >= self.max_kvlens:
+            eager_mode = True
+
+        if eager_mode:
+            return self.model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                count_pred=count_pred,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                head_mask=head_mask,
+                cross_attn_head_mask=cross_attn_head_mask,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                use_cache=use_cache,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,)
+
+        # create buffer if not exists.
+        if self.input_buffers is None:
+            encoder_kvlens = encoder_hidden_states.size(1)
+            self.create_buffers(encoder_kvlens=encoder_kvlens, dtype=encoder_hidden_states.dtype)
+
+        graph_key = get_graph_key(batch_size, kvlens)
+        if graph_key not in self.impl_map:
+            warmup = False
+            if not self.warmuped:
+                warmup = True
+                self.warmuped = True
+            impl = GraphRunnerImpl.capture(
+                self.model,
+                self.input_buffers,
+                self.graph_pool_handle,
+                warmup=warmup,
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                count_pred=count_pred,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                head_mask=head_mask,
+                cross_attn_head_mask=cross_attn_head_mask,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                use_cache=use_cache,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+            )
+            self.impl_map[graph_key] = impl
+        impl = self.impl_map[graph_key]
+
+        ret = impl(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                count_pred=count_pred,
+                encoder_hidden_states=encoder_hidden_states,
+                encoder_attention_mask=encoder_attention_mask,
+                head_mask=head_mask,
+                cross_attn_head_mask=cross_attn_head_mask,
+                past_key_values=past_key_values,
+                inputs_embeds=inputs_embeds,
+                use_cache=use_cache,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+        )
+        return ret
\ No newline at end of file
--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -6,6 +6,7 @@ import shutil
 from magic_pdf.libs.Constants import *
 from magic_pdf.libs.clean_memory import clean_memory
 from magic_pdf.model.model_list import AtomicModel
+from .mfr_cudagraph import GraphRunner

 os.environ['NO_ALBUMENTATIONS_UPDATE'] = '1'  # 禁止albumentations检查更新
 os.environ['YOLO_VERBOSE'] = 'False'  # disable yolo logger
@@ -26,6 +27,7 @@ try:
    from unimernet.common.config import Config
    import unimernet.tasks as tasks
    from unimernet.processors import load_processor
+    from doclayout_yolo import YOLOv10

 except ImportError as e:
    logger.exception(e)
@@ -42,7 +44,7 @@ from magic_pdf.model.ppTableModel import ppTableModel


 def table_model_init(table_model_type, model_path, max_time, _device_='cpu'):
-    if table_model_type == STRUCT_EQTABLE:
+    if table_model_type == MODEL_NAME.STRUCT_EQTABLE:
        table_model = StructTableModel(model_path, max_time=max_time, device=_device_)
    else:
        config = {
@@ -68,6 +70,11 @@ def mfr_model_init(weight_dir, cfg_path, _device_='cpu'):
    model = task.build_model(cfg)
    model.to(_device_)
    model.eval()
+    model = model.to(_device_)
+    if 'cuda' in _device_:
+        decoder_runner = GraphRunner(model.model.model.decoder.model.decoder, max_batchs=128, max_kvlens=256,
+                                     device=_device_)
+        model.model.model.decoder.model.decoder = decoder_runner
    vis_processor = load_processor('formula_image_eval', cfg.config.datasets.formula_rec_eval.vis_processor.eval)
    mfr_transform = transforms.Compose([vis_processor, ])
    return [model, mfr_transform]
@@ -78,11 +85,16 @@ def layout_model_init(weight, config_file, device):
    return model


-def ocr_model_init(show_log: bool = False, det_db_box_thresh=0.3, lang=None):
+def doclayout_yolo_model_init(weight):
+    model = YOLOv10(weight)
+    return model
+
+
+def ocr_model_init(show_log: bool = False, det_db_box_thresh=0.3, lang=None, use_dilation=True, det_db_unclip_ratio=1.8):
    if lang is not None:
-        model = ModifiedPaddleOCR(show_log=show_log, det_db_box_thresh=det_db_box_thresh, lang=lang)
+        model = ModifiedPaddleOCR(show_log=show_log, det_db_box_thresh=det_db_box_thresh, lang=lang, use_dilation=use_dilation, det_db_unclip_ratio=det_db_unclip_ratio)
    else:
-        model = ModifiedPaddleOCR(show_log=show_log, det_db_box_thresh=det_db_box_thresh)
+        model = ModifiedPaddleOCR(show_log=show_log, det_db_box_thresh=det_db_box_thresh, use_dilation=use_dilation, det_db_unclip_ratio=det_db_unclip_ratio)
    return model


@@ -115,19 +127,27 @@ class AtomModelSingleton:
        return cls._instance

    def get_atom_model(self, atom_model_name: str, **kwargs):
-        if atom_model_name not in self._models:
-            self._models[atom_model_name] = atom_model_init(model_name=atom_model_name, **kwargs)
-        return self._models[atom_model_name]
+        lang = kwargs.get("lang", None)
+        layout_model_name = kwargs.get("layout_model_name", None)
+        key = (atom_model_name, layout_model_name, lang)
+        if key not in self._models:
+            self._models[key] = atom_model_init(model_name=atom_model_name, **kwargs)
+        return self._models[key]


 def atom_model_init(model_name: str, **kwargs):

    if model_name == AtomicModel.Layout:
+        if kwargs.get("layout_model_name") == MODEL_NAME.LAYOUTLMv3:
            atom_model = layout_model_init(
                kwargs.get("layout_weights"),
                kwargs.get("layout_config_file"),
                kwargs.get("device")
            )
+        elif kwargs.get("layout_model_name") == MODEL_NAME.DocLayout_YOLO:
+            atom_model = doclayout_yolo_model_init(
+                kwargs.get("doclayout_yolo_weights"),
+            )
    elif model_name == AtomicModel.MFD:
        atom_model = mfd_model_init(
            kwargs.get("mfd_weights")
@@ -146,7 +166,7 @@ def atom_model_init(model_name: str, **kwargs):
        )
    elif model_name == AtomicModel.Table:
        atom_model = table_model_init(
-            kwargs.get("table_model_type"),
+            kwargs.get("table_model_name"),
            kwargs.get("table_model_path"),
            kwargs.get("table_max_time"),
            kwargs.get("device")
@@ -194,23 +214,35 @@ class CustomPEKModel:
        with open(config_path, "r", encoding='utf-8') as f:
            self.configs = yaml.load(f, Loader=yaml.FullLoader)
        # 初始化解析配置
-        self.apply_layout = kwargs.get("apply_layout", self.configs["config"]["layout"])
-        self.apply_formula = kwargs.get("apply_formula", self.configs["config"]["formula"])
+
+        # layout config
+        self.layout_config = kwargs.get("layout_config")
+        self.layout_model_name = self.layout_config.get("model", MODEL_NAME.DocLayout_YOLO)
+
+        # formula config
+        self.formula_config = kwargs.get("formula_config")
+        self.mfd_model_name = self.formula_config.get("mfd_model", MODEL_NAME.YOLO_V8_MFD)
+        self.mfr_model_name = self.formula_config.get("mfr_model", MODEL_NAME.UniMerNet_v2_Small)
+        self.apply_formula = self.formula_config.get("enable", True)
+
        # table config
-        self.table_config = kwargs.get("table_config", self.configs["config"]["table_config"])
-        self.apply_table = self.table_config.get("is_table_recog_enable", False)
+        self.table_config = kwargs.get("table_config")
+        self.apply_table = self.table_config.get("enable", False)
        self.table_max_time = self.table_config.get("max_time", TABLE_MAX_TIME_VALUE)
-        self.table_model_type = self.table_config.get("model", TABLE_MASTER)
+        self.table_model_name = self.table_config.get("model", MODEL_NAME.TABLE_MASTER)
+
+        # ocr config
        self.apply_ocr = ocr
        self.lang = kwargs.get("lang", None)
+
        logger.info(
-            "DocAnalysis init, this may take some times. apply_layout: {}, apply_formula: {}, apply_ocr: {}, apply_table: {}, lang: {}".format(
-                self.apply_layout, self.apply_formula, self.apply_ocr, self.apply_table, self.lang
+            "DocAnalysis init, this may take some times, layout_model: {}, apply_formula: {}, apply_ocr: {}, "
+            "apply_table: {}, table_model: {}, lang: {}".format(
+                self.layout_model_name, self.apply_formula, self.apply_ocr, self.apply_table, self.table_model_name, self.lang
            )
        )
-        assert self.apply_layout, "DocAnalysis must contain layout model."
        # 初始化解析方案
-        self.device = kwargs.get("device", self.configs["config"]["device"])
+        self.device = kwargs.get("device", "cpu")
        logger.info("using device: {}".format(self.device))
        models_dir = kwargs.get("models_dir", os.path.join(root_dir, "resources", "models"))
        logger.info("using models_dir: {}".format(models_dir))
@@ -219,17 +251,16 @@ class CustomPEKModel:

        # 初始化公式识别
        if self.apply_formula:
+
            # 初始化公式检测模型
-            # self.mfd_model = mfd_model_init(str(os.path.join(models_dir, self.configs["weights"]["mfd"])))
            self.mfd_model = atom_model_manager.get_atom_model(
                atom_model_name=AtomicModel.MFD,
-                mfd_weights=str(os.path.join(models_dir, self.configs["weights"]["mfd"]))
+                mfd_weights=str(os.path.join(models_dir, self.configs["weights"][self.mfd_model_name]))
            )
+
            # 初始化公式解析模型
-            mfr_weight_dir = str(os.path.join(models_dir, self.configs["weights"]["mfr"]))
+            mfr_weight_dir = str(os.path.join(models_dir, self.configs["weights"][self.mfr_model_name]))
            mfr_cfg_path = str(os.path.join(model_config_dir, "UniMERNet", "demo.yaml"))
-            # self.mfr_model, mfr_vis_processors = mfr_model_init(mfr_weight_dir, mfr_cfg_path, _device_=self.device)
-            # self.mfr_transform = transforms.Compose([mfr_vis_processors, ])
            self.mfr_model, self.mfr_transform = atom_model_manager.get_atom_model(
                atom_model_name=AtomicModel.MFR,
                mfr_weight_dir=mfr_weight_dir,
@@ -238,17 +269,20 @@ class CustomPEKModel:
            )

        # 初始化layout模型
-        # self.layout_model = Layoutlmv3_Predictor(
-        #     str(os.path.join(models_dir, self.configs['weights']['layout'])),
-        #     str(os.path.join(model_config_dir, "layoutlmv3", "layoutlmv3_base_inference.yaml")),
-        #     device=self.device
-        # )
+        if self.layout_model_name == MODEL_NAME.LAYOUTLMv3:
            self.layout_model = atom_model_manager.get_atom_model(
                atom_model_name=AtomicModel.Layout,
-            layout_weights=str(os.path.join(models_dir, self.configs['weights']['layout'])),
+                layout_model_name=MODEL_NAME.LAYOUTLMv3,
+                layout_weights=str(os.path.join(models_dir, self.configs['weights'][self.layout_model_name])),
                layout_config_file=str(os.path.join(model_config_dir, "layoutlmv3", "layoutlmv3_base_inference.yaml")),
                device=self.device
            )
+        elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
+            self.layout_model = atom_model_manager.get_atom_model(
+                atom_model_name=AtomicModel.Layout,
+                layout_model_name=MODEL_NAME.DocLayout_YOLO,
+                doclayout_yolo_weights=str(os.path.join(models_dir, self.configs['weights'][self.layout_model_name]))
+            )
        # 初始化ocr
        if self.apply_ocr:

@@ -261,12 +295,10 @@ class CustomPEKModel:
            )
        # init table model
        if self.apply_table:
-            table_model_dir = self.configs["weights"][self.table_model_type]
-            # self.table_model = table_model_init(self.table_model_type, str(os.path.join(models_dir, table_model_dir)),
-            #                                     max_time=self.table_max_time, _device_=self.device)
+            table_model_dir = self.configs["weights"][self.table_model_name]
            self.table_model = atom_model_manager.get_atom_model(
                atom_model_name=AtomicModel.Table,
-                table_model_type=self.table_model_type,
+                table_model_name=self.table_model_name,
                table_model_path=str(os.path.join(models_dir, table_model_dir)),
                table_max_time=self.table_max_time,
                device=self.device
@@ -294,7 +326,21 @@ class CustomPEKModel:

        # layout检测
        layout_start = time.time()
+        if self.layout_model_name == MODEL_NAME.LAYOUTLMv3:
+            # layoutlmv3
            layout_res = self.layout_model(image, ignore_catids=[])
+        elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
+            # doclayout_yolo
+            layout_res = []
+            doclayout_yolo_res = self.layout_model.predict(image, imgsz=1024, conf=0.25, iou=0.45, verbose=True, device=self.device)[0]
+            for xyxy, conf, cla in zip(doclayout_yolo_res.boxes.xyxy.cpu(), doclayout_yolo_res.boxes.conf.cpu(), doclayout_yolo_res.boxes.cls.cpu()):
+                xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
+                new_item = {
+                    'category_id': int(cla.item()),
+                    'poly': [xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax],
+                    'score': round(float(conf.item()), 3),
+                }
+                layout_res.append(new_item)
        layout_cost = round(time.time() - layout_start, 2)
        logger.info(f"layout detection time: {layout_cost}")

@@ -303,7 +349,7 @@ class CustomPEKModel:
        if self.apply_formula:
            # 公式检测
            mfd_start = time.time()
-            mfd_res = self.mfd_model.predict(image, imgsz=1888, conf=0.25, iou=0.45, verbose=True)[0]
+            mfd_res = self.mfd_model.predict(image, imgsz=1888, conf=0.25, iou=0.45, verbose=True, device=self.device)[0]
            logger.info(f"mfd time: {round(time.time() - mfd_start, 2)}")
            for xyxy, conf, cla in zip(mfd_res.boxes.xyxy.cpu(), mfd_res.boxes.conf.cpu(), mfd_res.boxes.cls.cpu()):
                xmin, ymin, xmax, ymax = [int(p.item()) for p in xyxy]
@@ -315,7 +361,6 @@ class CustomPEKModel:
                }
                layout_res.append(new_item)
                latex_filling_list.append(new_item)
-                # bbox_img = get_croped_image(pil_img, [xmin, ymin, xmax, ymax])
                bbox_img = pil_img.crop((xmin, ymin, xmax, ymax))
                mf_image_list.append(bbox_img)

@@ -417,7 +462,7 @@ class CustomPEKModel:
                # logger.info("------------------table recognition processing begins-----------------")
                latex_code = None
                html_code = None
-                if self.table_model_type == STRUCT_EQTABLE:
+                if self.table_model_name == MODEL_NAME.STRUCT_EQTABLE:
                    with torch.no_grad():
                        latex_code = self.table_model.image2latex(new_image)[0]
                else:

--- a/magic_pdf/model/ppTableModel.py
+++ b/magic_pdf/model/ppTableModel.py
@@ -52,11 +52,11 @@ class ppTableModel(object):
        rec_model_dir = os.path.join(model_dir, REC_MODEL_DIR)
        rec_char_dict_path = os.path.join(model_dir, REC_CHAR_DICT)
        device = kwargs.get("device", "cpu")
-        use_gpu = True if device == "cuda" else False
+        use_gpu = True if device.startswith("cuda") else False
        config = {
            "use_gpu": use_gpu,
            "table_max_len": kwargs.get("table_max_len", TABLE_MAX_LEN),
-            "table_algorithm": TABLE_MASTER,
+            "table_algorithm": "TableMaster",
            "table_model_dir": table_model_dir,
            "table_char_dict_path": table_char_dict_path,
            "det_model_dir": det_model_dir,

--- a/magic_pdf/para/para_split_v3.py
+++ b/magic_pdf/para/para_split_v3.py
@@ -15,6 +15,9 @@ class ListLineTag:


 def __process_blocks(blocks):
+    # 对所有block预处理
+    # 1.通过title和interline_equation将block分组
+    # 2.bbox边界根据line信息重置

    result = []
    current_group = []
@@ -47,12 +50,16 @@ def __process_blocks(blocks):
    return result


-def __is_list_block(block):
+def __is_list_or_index_block(block):
    # 一个block如果是list block 应该同时满足以下特征
    # 1.block内有多个line 2.block 内有多个line左侧顶格写 3.block内有多个line 右侧不顶格（狗牙状）
    # 1.block内有多个line 2.block 内有多个line左侧顶格写 3.多个line以endflag结尾
    # 1.block内有多个line 2.block 内有多个line左侧顶格写 3.block内有多个line 左侧不顶格
-    if len(block['lines']) >= 3:
+
+    # index block 是一种特殊的list block
+    # 一个block如果是index block 应该同时满足以下特征
+    # 1.block内有多个line 2.block 内有多个line两侧均顶格写 3.line的开头或者结尾均为数字
+    if len(block['lines']) >= 2:
        first_line = block['lines'][0]
        line_height = first_line['bbox'][3] - first_line['bbox'][1]
        block_weight = block['bbox_fs'][2] - block['bbox_fs'][0]
@@ -60,7 +67,19 @@ def __is_list_block(block):
        left_close_num = 0
        left_not_close_num = 0
        right_not_close_num = 0
+        right_close_num = 0
        lines_text_list = []
+
+        multiple_para_flag = False
+        last_line = block['lines'][-1]
+        # 如果首行左边不顶格而右边顶格,末行左边顶格而右边不顶格 （第一行可能可以右边不顶格）
+        if (first_line['bbox'][0] - block['bbox_fs'][0] > line_height / 2 and
+                # block['bbox_fs'][2] - first_line['bbox'][2] < line_height and
+                abs(last_line['bbox'][0] - block['bbox_fs'][0]) < line_height / 2 and
+                block['bbox_fs'][2] - last_line['bbox'][2] > line_height
+        ):
+            multiple_para_flag = True
+
        for line in block['lines']:

            line_text = ""
@@ -73,13 +92,17 @@ def __is_list_block(block):
            lines_text_list.append(line_text)

            # 计算line左侧顶格数量是否大于2，是否顶格用abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height/2 来判断
-            if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height/2:
+            if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:
                left_close_num += 1
            elif line['bbox'][0] - block['bbox_fs'][0] > line_height:
                # logger.info(f"{line_text}, {block['bbox_fs']}, {line['bbox']}")
                left_not_close_num += 1

-            # 计算右侧是否不顶格，拍脑袋用0.3block宽度做阈值
+            # 计算右侧是否顶格
+            if abs(block['bbox_fs'][2] - line['bbox'][2]) < line_height:
+                right_close_num += 1
+            else:
+                # 右侧不顶格情况下是否有一段距离，拍脑袋用0.3block宽度做阈值
                closed_area = 0.3 * block_weight
                # closed_area = 5 * line_height
                if block['bbox_fs'][2] - line['bbox'][2] > closed_area:
@@ -87,96 +110,100 @@ def __is_list_block(block):

        # 判断lines_text_list中的元素是否有超过80%都以LIST_END_FLAG结尾
        line_end_flag = False
-        if len(lines_text_list) > 0:
+        # 判断lines_text_list中的元素是否有超过80%都以数字开头或都以数字结尾
+        line_num_flag = False
+        num_start_count = 0
        num_end_count = 0
+        flag_end_count = 0
+        if len(lines_text_list) > 0:
            for line_text in lines_text_list:
                if len(line_text) > 0:
                    if line_text[-1] in LIST_END_FLAG:
+                        flag_end_count += 1
+                    if line_text[0].isdigit():
+                        num_start_count += 1
+                    if line_text[-1].isdigit():
                        num_end_count += 1

-            if num_end_count / len(lines_text_list) >= 0.8:
+            if flag_end_count / len(lines_text_list) >= 0.8:
                line_end_flag = True

-        if left_close_num >= 2 and (right_not_close_num >= 2 or line_end_flag or left_not_close_num >= 2):
+            if num_start_count / len(lines_text_list) >= 0.8 or num_end_count / len(lines_text_list) >= 0.8:
+                line_num_flag = True
+
+        # 有的目录右侧不贴边, 目前认为左边或者右边有一边全贴边，且符合数字规则极为index
+        if ((left_close_num/len(block['lines']) >= 0.8 or right_close_num/len(block['lines']) >= 0.8)
+                and line_num_flag
+        ):
+            for line in block['lines']:
+                line[ListLineTag.IS_LIST_START_LINE] = True
+            return BlockType.Index
+
+        elif left_close_num >= 2 and (
+                right_not_close_num >= 2 or line_end_flag or left_not_close_num >= 2) and not multiple_para_flag:
+            # 处理一种特殊的没有缩进的list，所有行都贴左边，通过右边的空隙判断是否是item尾
+            if left_close_num / len(block['lines']) > 0.9:
+                # 这种是每个item只有一行，且左边都贴边的短item list
+                if flag_end_count == 0 and right_close_num / len(block['lines']) < 0.5:
                    for line in block['lines']:
                        if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:
                            line[ListLineTag.IS_LIST_START_LINE] = True
-                if abs(block['bbox_fs'][2] - line['bbox'][2]) > line_height:
+                # 这种是大部分line item 都有结束标识符的情况，按结束标识符区分不同item
+                elif line_end_flag:
+                    for i, line in enumerate(block['lines']):
+                        if lines_text_list[i][-1] in LIST_END_FLAG:
                            line[ListLineTag.IS_LIST_END_LINE] = True
-
-            return True
+                            if i + 1 < len(block['lines']):
+                                block['lines'][i+1][ListLineTag.IS_LIST_START_LINE] = True
+                # line item基本没有结束标识符，而且也没有缩进，按右侧空隙判断哪些是item end
                else:
-            return False
+                    line_start_flag = False
+                    for i, line in enumerate(block['lines']):
+                        if line_start_flag:
+                            line[ListLineTag.IS_LIST_START_LINE] = True
+                            line_start_flag = False
+                        elif abs(block['bbox_fs'][2] - line['bbox'][2]) > line_height:
+                            line[ListLineTag.IS_LIST_END_LINE] = True
+                            line_start_flag = True
+            # 一种有缩进的特殊有序list,start line 左侧不贴边且以数字开头，end line 以 IS_LIST_END_LINE 结尾且数量和start line 一致
+            elif num_start_count >= 2 and num_start_count == flag_end_count:  # 简单一点先不考虑左侧不贴边的情况
+                for i, line in enumerate(block['lines']):
+                    if lines_text_list[i][0].isdigit():
+                        line[ListLineTag.IS_LIST_START_LINE] = True
+                    if lines_text_list[i][-1] in LIST_END_FLAG:
+                        line[ListLineTag.IS_LIST_END_LINE] = True
            else:
-        return False
-
-
-def __is_index_block(block):
-    # 一个block如果是index block 应该同时满足以下特征
-    # 1.block内有多个line 2.block 内有多个line两侧均顶格写 3.line的开头或者结尾均为数字
-    if len(block['lines']) >= 3:
-        first_line = block['lines'][0]
-        line_height = first_line['bbox'][3] - first_line['bbox'][1]
-
-        left_close_num = 0
-        right_close_num = 0
-
-        lines_text_list = []
+                # 正常有缩进的list处理
                for line in block['lines']:
-
-            # 计算line左侧顶格数量是否大于2，是否顶格用abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height/2 来判断
                    if abs(block['bbox_fs'][0] - line['bbox'][0]) < line_height / 2:
-                left_close_num += 1
-
-            # 计算右侧是否不顶格
-            if abs(block['bbox_fs'][2] - line['bbox'][2]) < line_height / 2:
-                right_close_num += 1
-
-            line_text = ""
-
-            for span in line['spans']:
-                span_type = span['type']
-                if span_type == ContentType.Text:
-                    line_text += span['content'].strip()
-
-            lines_text_list.append(line_text)
-
-        # 判断lines_text_list中的元素是否有超过80%都以数字开头或都以数字结尾
-        line_num_flag = False
-        if len(lines_text_list) > 0:
-            num_start_count = 0
-            num_end_count = 0
-            for line_text in lines_text_list:
-                if len(line_text) > 0:
-                    if line_text[0].isdigit():
-                        num_start_count += 1
-                    if line_text[-1].isdigit():
-                        num_end_count += 1
-
-            if num_start_count / len(lines_text_list) >= 0.8 or num_end_count / len(lines_text_list) >= 0.8:
-                line_num_flag = True
-
-        if left_close_num >= 2 and right_close_num >= 2 and line_num_flag:
-            for line in block['lines']:
                        line[ListLineTag.IS_LIST_START_LINE] = True
+                    if abs(block['bbox_fs'][2] - line['bbox'][2]) > line_height:
+                        line[ListLineTag.IS_LIST_END_LINE] = True

-            return True
+            return BlockType.List
        else:
-            return False
+            return BlockType.Text
    else:
-        return False
+        return BlockType.Text


 def __merge_2_text_blocks(block1, block2):
    if len(block1['lines']) > 0:
        first_line = block1['lines'][0]
        line_height = first_line['bbox'][3] - first_line['bbox'][1]
-        if abs(block1['bbox_fs'][0] - first_line['bbox'][0]) < line_height/2:
+        block1_weight = block1['bbox'][2] - block1['bbox'][0]
+        block2_weight = block2['bbox'][2] - block2['bbox'][0]
+        min_block_weight = min(block1_weight, block2_weight)
+        if abs(block1['bbox_fs'][0] - first_line['bbox'][0]) < line_height / 2:
            last_line = block2['lines'][-1]
            if len(last_line['spans']) > 0:
                last_span = last_line['spans'][-1]
                line_height = last_line['bbox'][3] - last_line['bbox'][1]
-                if abs(block2['bbox_fs'][2] - last_line['bbox'][2]) < line_height and not last_span['content'].endswith(LINE_STOP_FLAG):
+                if (abs(block2['bbox_fs'][2] - last_line['bbox'][2]) < line_height and
+                        not last_span['content'].endswith(LINE_STOP_FLAG) and
+                        # 两个block宽度差距超过2倍也不合并
+                        abs(block1_weight - block2_weight) < min_block_weight
+                ):
                    if block1['page_num'] != block2['page_num']:
                        for line in block1['lines']:
                            for span in line['spans']:
@@ -189,7 +216,6 @@ def __merge_2_text_blocks(block1, block2):


 def __merge_2_list_blocks(block1, block2):
-
    if block1['page_num'] != block2['page_num']:
        for line in block1['lines']:
            for span in line['spans']:
@@ -201,33 +227,47 @@ def __merge_2_list_blocks(block1, block2):
    return block1, block2


+def __is_list_group(text_blocks_group):
+    # list group的特征是一个group内的所有block都满足以下条件
+    # 1.每个block都不超过3行 2. 每个block 的左边界都比较接近(逻辑简单点先不加这个规则)
+    for block in text_blocks_group:
+        if len(block['lines']) > 3:
+            return False
+    return True
+
+
 def __para_merge_page(blocks):
    page_text_blocks_groups = __process_blocks(blocks)
    for text_blocks_group in page_text_blocks_groups:

        if len(text_blocks_group) > 0:
-            # 需要先在合并前对所有block判断是否为list block
+            # 需要先在合并前对所有block判断是否为list or index block
            for block in text_blocks_group:
-                if __is_list_block(block):
-                    block['type'] = BlockType.List
-                elif __is_index_block(block):
-                    block['type'] = BlockType.Index
+                block_type = __is_list_or_index_block(block)
+                block['type'] = block_type
+                # logger.info(f"{block['type']}:{block}")

        if len(text_blocks_group) > 1:
+
+            # 在合并前判断这个group 是否是一个 list group
+            is_list_group = __is_list_group(text_blocks_group)
+
            # 倒序遍历
-            for i in range(len(text_blocks_group)-1, -1, -1):
+            for i in range(len(text_blocks_group) - 1, -1, -1):
                current_block = text_blocks_group[i]

                # 检查是否有前一个块
                if i - 1 >= 0:
                    prev_block = text_blocks_group[i - 1]

-                    if current_block['type'] == 'text' and prev_block['type'] == 'text':
+                    if current_block['type'] == 'text' and prev_block['type'] == 'text' and not is_list_group:
                        __merge_2_text_blocks(current_block, prev_block)
-                    if current_block['type'] == BlockType.List and prev_block['type'] == BlockType.List:
-                        __merge_2_list_blocks(current_block, prev_block)
-                    if current_block['type'] == BlockType.Index and prev_block['type'] == BlockType.Index:
+                    elif (
+                            (current_block['type'] == BlockType.List and prev_block['type'] == BlockType.List) or
+                            (current_block['type'] == BlockType.Index and prev_block['type'] == BlockType.Index)
+                    ):
                        __merge_2_list_blocks(current_block, prev_block)
+
        else:
            continue

@@ -249,7 +289,7 @@ def para_split(pdf_info_dict, debug_mode=False):


 if __name__ == '__main__':
-    input_blocks = [{'type': 'text', 'bbox': [19, 79, 285, 95], 'lines': [{'bbox': [21.360000610351562, 81.50750732421875, 287.69000244140625, 93.62750244140625], 'spans': [{'bbox': [21.360000610351562, 81.62750244140625, 170.3000030517578, 93.62750244140625], 'content': '嘉和美康（688246）/计算机', 'type': 'text', 'score': 1.0}, {'bbox': [170.3000030517578, 81.62750244140625, 176.3000030517578, 93.62750244140625], 'content': ' ', 'type': 'text', 'score': 1.0}, {'bbox': [181.22000122070312, 81.50750732421875, 281.8052062988281, 93.50750732421875], 'content': '证券研究报告/公司点评', 'type': 'text', 'score': 1.0}, {'bbox': [281.69000244140625, 81.50750732421875, 287.69000244140625, 93.50750732421875], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 0}], 'index': 0, 'page_num': 'page_0', 'bbox_fs': [21.360000610351562, 81.50750732421875, 287.69000244140625, 93.62750244140625]}, {'type': 'title', 'bbox': [18, 109, 124, 123], 'lines': [{'bbox': [21.360000610351562, 101.70799255371094, 98.47967529296875, 116.21743774414062], 'spans': [{'bbox': [21.360000610351562, 101.70799255371094, 98.47967529296875, 116.21743774414062], 'content': '[Table_Industry] ', 'type': 'text', 'score': 1.0}], 'index': 1}, {'bbox': [21.1200008392334, 110.3074951171875, 129.5640106201172, 122.3074951171875], 'spans': [{'bbox': [21.1200008392334, 110.3074951171875, 129.5640106201172, 122.3074951171875], 'content': '评级：买入（维持）', 'type': 'text', 'score': 1.0}], 'index': 2}], 'index': 1.5, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [20, 126, 117, 137], 'lines': [{'bbox': [21.1200008392334, 127.40557861328125, 116.18000030517578, 136.40557861328125], 'spans': [{'bbox': [21.1200008392334, 127.40557861328125, 116.18000030517578, 136.40557861328125], 'content': '市场价格：16.62 元/股', 'type': 'text', 'score': 1.0}], 'index': 3}], 'index': 3, 'page_num': 'page_0', 'bbox_fs': [21.1200008392334, 127.40557861328125, 116.18000030517578, 136.40557861328125]}, {'type': 'text', 'bbox': [19, 144, 158, 172], 'lines': [{'bbox': [21.1200008392334, 144.1099853515625, 86.88600158691406, 156.50299072265625], 'spans': [{'bbox': [21.1200008392334, 146.005615234375, 84.33599853515625, 155.005615234375], 'content': '分析师：闻学臣', 'type': 'text', 'score': 1.0}, {'bbox': [84.38400268554688, 144.1099853515625, 86.88600158691406, 156.50299072265625], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 4}, {'bbox': [21.1200008392334, 159.7099609375, 157.9219970703125, 172.10296630859375], 'spans': [{'bbox': [21.1200008392334, 161.6055908203125, 84.33599853515625, 170.6055908203125], 'content': '执业证书编号：', 'type': 'text', 'score': 1.0}, {'bbox': [84.50399780273438, 159.7099609375, 155.45095825195312, 172.10296630859375], 'content': 'S0740519090007', 'type': 'text', 'score': 1.0}, {'bbox': [155.4199981689453, 159.7099609375, 157.9219970703125, 172.10296630859375], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 5}], 'index': 4.5, 'page_num': 'page_0', 'bbox_fs': [21.1200008392334, 144.1099853515625, 157.9219970703125, 172.10296630859375]}, {'type': 'text', 'bbox': [18, 194, 157, 241], 'lines': [{'bbox': [21.1200008392334, 193.86497497558594, 86.88600158691406, 206.23097229003906], 'spans': [{'bbox': [21.1200008392334, 195.80560302734375, 84.33599853515625, 204.80560302734375], 'content': '分析师：何柄谕', 'type': 'text', 'score': 1.0}, {'bbox': [84.38400268554688, 193.86497497558594, 86.88600158691406, 206.23097229003906], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 6}, {'bbox': [21.1200008392334, 211.07000732421875, 157.9219970703125, 223.4630126953125], 'spans': [{'bbox': [21.1200008392334, 212.96563720703125, 84.33599853515625, 221.96563720703125], 'content': '执业证书编号：', 'type': 'text', 'score': 1.0}, {'bbox': [84.50399780273438, 211.07000732421875, 155.44796752929688, 223.4630126953125], 'content': 'S0740519090003', 'type': 'text', 'score': 1.0}, {'bbox': [155.4199981689453, 211.07000732421875, 157.9219970703125, 223.4630126953125], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 7}, {'bbox': [21.1200008392334, 228.0649871826172, 126.84199523925781, 240.4309844970703], 'spans': [{'bbox': [21.1200008392334, 228.0649871826172, 43.73700714111328, 240.4309844970703], 'content': 'Email', 'type': 'text', 'score': 1.0}, {'bbox': [43.79999923706055, 230.005615234375, 52.79999923706055, 239.005615234375], 'content': '：', 'type': 'text', 'score': 1.0}, {'bbox': [52.68000030517578, 228.0649871826172, 124.41200256347656, 240.4309844970703], 'content': 'heby@zts.com.cn', 'type': 'text', 'score': 1.0}, {'bbox': [124.33999633789062, 228.0649871826172, 126.84199523925781, 240.4309844970703], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 8}], 'index': 7, 'page_num': 'page_0', 'bbox_fs': [21.1200008392334, 193.86497497558594, 157.9219970703125, 240.4309844970703]}, {'type': 'table', 'bbox': [18, 338, 169, 418], 'blocks': [{'bbox': [18, 356, 169, 418], 'type': 'table_body', 'lines': [{'bbox': [18, 356, 169, 418], 'spans': [{'bbox': [18, 356, 169, 418], 'score': 0.8198961019515991, 'type': 'table', 'image_path': '4123619a2e8de87ebe695a4e7703d09d957670491c939b1050c96bbf4104210e.jpg'}]}]}, {'bbox': [19, 338, 70, 352], 'type': 'table_caption', 'lines': [{'bbox': [21.1200008392334, 335.9779968261719, 85.39967346191406, 350.4874267578125], 'spans': [{'bbox': [21.1200008392334, 335.9779968261719, 85.39967346191406, 350.4874267578125], 'content': '[Table_Profit] ', 'type': 'text', 'score': 1.0}]}]}], 'index': 9.5, 'page_num': 'page_0'}, {'type': 'image', 'bbox': [19, 426, 163, 558], 'blocks': [{'bbox': [21, 452, 163, 558], 'type': 'image_body', 'lines': [{'bbox': [21, 452, 163, 558], 'spans': [{'bbox': [21, 452, 163, 558], 'score': 0.9999651312828064, 'type': 'image', 'image_path': '0e63ab24cdc2ac4cb0c46bf1ff7b9f094c092b9c5707810cbc2b7e30964cf8a1.jpg'}]}]}, {'bbox': [19, 426, 160, 440], 'type': 'image_caption', 'lines': [{'bbox': [21.1200008392334, 427.8774719238281, 165.74000549316406, 439.8774719238281], 'spans': [{'bbox': [21.1200008392334, 427.8774719238281, 165.74000549316406, 439.8774719238281], 'content': '股价与行业-市场走势对比 ', 'type': 'text', 'score': 1.0}]}]}], 'index': 11.5, 'page_num': 'page_0'}, {'type': 'title', 'bbox': [20, 569, 70, 583], 'lines': [{'bbox': [21.1200008392334, 570.70751953125, 75.38400268554688, 582.70751953125], 'spans': [{'bbox': [21.1200008392334, 570.70751953125, 75.38400268554688, 582.70751953125], 'content': '相关报告 ', 'type': 'text', 'score': 1.0}], 'index': 13}], 'index': 13, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [20, 586, 168, 629], 'lines': [{'bbox': [21.1200008392334, 585.9849853515625, 166.1840057373047, 598.3509521484375], 'spans': [{'bbox': [21.1200008392334, 585.9849853515625, 28.661998748779297, 598.3509521484375], 'content': '1 ', 'type': 'text', 'score': 1.0}, {'bbox': [30.239999771118164, 587.9255981445312, 83.76300048828125, 596.9255981445312], 'content': '《嘉和美康（', 'type': 'text', 'score': 1.0}, {'bbox': [83.78399658203125, 585.9849853515625, 113.72698211669922, 598.3509521484375], 'content': '688246', 'type': 'text', 'score': 1.0}, {'bbox': [113.77999877929688, 587.9255981445312, 131.3000030517578, 596.9255981445312], 'content': '）：', 'type': 'text', 'score': 1.0}, {'bbox': [130.82000732421875, 585.9849853515625, 140.74400329589844, 598.3509521484375], 'content': '24', 'type': 'text', 'score': 1.0}, {'bbox': [140.74400329589844, 587.9255981445312, 151.94000244140625, 596.9255981445312], 'content': ' 年', 'type': 'text', 'score': 1.0}, {'bbox': [154.22000122070312, 585.9849853515625, 166.1840057373047, 598.3509521484375], 'content': 'Q1', 'type': 'text', 'score': 1.0}], 'index': 14}, {'bbox': [21.1200008392334, 603.525634765625, 165.1199951171875, 612.525634765625], 'spans': [{'bbox': [21.1200008392334, 603.525634765625, 165.1199951171875, 612.525634765625], 'content': '收入显著改善，医疗大模型产品落地', 'type': 'text', 'score': 1.0}], 'index': 15}, {'bbox': [21.1200008392334, 617.1849975585938, 50.62199783325195, 629.5509643554688], 'spans': [{'bbox': [21.1200008392334, 619.1256103515625, 48.119998931884766, 628.1256103515625], 'content': '良好》', 'type': 'text', 'score': 1.0}, {'bbox': [48.119998931884766, 617.1849975585938, 50.62199783325195, 629.5509643554688], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 16}], 'index': 15, 'page_num': 'page_0', 'bbox_fs': [21.1200008392334, 585.9849853515625, 166.1840057373047, 629.5509643554688]}, {'type': 'text', 'bbox': [19, 648, 167, 677], 'lines': [{'bbox': [21.1200008392334, 648.385009765625, 166.21701049804688, 660.7509765625], 'spans': [{'bbox': [21.1200008392334, 648.385009765625, 28.662002563476562, 660.7509765625], 'content': '2 ', 'type': 'text', 'score': 1.0}, {'bbox': [30.1200008392334, 650.3256225585938, 83.51700592041016, 659.3256225585938], 'content': '《嘉和美康（', 'type': 'text', 'score': 1.0}, {'bbox': [83.54399871826172, 648.385009765625, 113.48698425292969, 660.7509765625], 'content': '688246', 'type': 'text', 'score': 1.0}, {'bbox': [113.54000091552734, 650.3256225585938, 166.21701049804688, 659.3256225585938], 'content': '）：收入逐季', 'type': 'text', 'score': 1.0}], 'index': 17}, {'bbox': [21.1200008392334, 663.9849853515625, 153.6020050048828, 676.3509521484375], 'spans': [{'bbox': [21.1200008392334, 665.9255981445312, 111.12000274658203, 674.9255981445312], 'content': '度加速，继续加大医疗', 'type': 'text', 'score': 1.0}, {'bbox': [113.41999816894531, 663.9849853515625, 121.9219970703125, 676.3509521484375], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [121.9219970703125, 665.9255981445312, 151.10299682617188, 674.9255981445312], 'content': ' 投入》', 'type': 'text', 'score': 1.0}, {'bbox': [151.10000610351562, 663.9849853515625, 153.6020050048828, 676.3509521484375], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 18}], 'index': 17.5, 'page_num': 'page_0', 'bbox_fs': [21.1200008392334, 648.385009765625, 166.21701049804688, 676.3509521484375]}, {'type': 'text', 'bbox': [19, 695, 167, 738], 'lines': [{'bbox': [21.1200008392334, 695.1849975585938, 166.21701049804688, 707.5509643554688], 'spans': [{'bbox': [21.1200008392334, 695.1849975585938, 28.661998748779297, 707.5509643554688], 'content': '3 ', 'type': 'text', 'score': 1.0}, {'bbox': [30.1200008392334, 697.1256103515625, 83.51700592041016, 706.1256103515625], 'content': '《嘉和美康（', 'type': 'text', 'score': 1.0}, {'bbox': [83.54399871826172, 695.1849975585938, 113.48698425292969, 707.5509643554688], 'content': '688246', 'type': 'text', 'score': 1.0}, {'bbox': [113.54000091552734, 697.1256103515625, 166.21701049804688, 706.1256103515625], 'content': '）：回购彰显', 'type': 'text', 'score': 1.0}], 'index': 19}, {'bbox': [21.1200008392334, 710.7849731445312, 160.22000122070312, 723.1509399414062], 'spans': [{'bbox': [21.1200008392334, 712.7255859375, 138.1199951171875, 721.7255859375], 'content': '公司发展信心，公司加大医疗', 'type': 'text', 'score': 1.0}, {'bbox': [140.4199981689453, 710.7849731445312, 148.9219970703125, 723.1509399414062], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [148.9219970703125, 712.7255859375, 160.22000122070312, 721.7255859375], 'content': ' 投', 'type': 'text', 'score': 1.0}], 'index': 20}, {'bbox': [21.1200008392334, 726.4049682617188, 41.62199783325195, 738.7709350585938], 'spans': [{'bbox': [21.1200008392334, 728.3455810546875, 39.12000274658203, 737.3455810546875], 'content': '入》', 'type': 'text', 'score': 1.0}, {'bbox': [39.119998931884766, 726.4049682617188, 41.62199783325195, 738.7709350585938], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 21}], 'index': 20, 'page_num': 'page_0', 'bbox_fs': [21.1200008392334, 695.1849975585938, 166.21701049804688, 738.7709350585938]}, {'type': 'text', 'bbox': [427, 80, 506, 94], 'lines': [{'bbox': [429.54998779296875, 81.50750732421875, 509.739990234375, 93.50750732421875], 'spans': [{'bbox': [429.54998779296875, 81.50750732421875, 503.8600158691406, 93.50750732421875], 'content': '2024 年8 月28 日', 'type': 'text', 'score': 1.0}, {'bbox': [503.739990234375, 81.50750732421875, 509.739990234375, 93.50750732421875], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 22}], 'index': 22, 'page_num': 'page_0', 'bbox_fs': [429.54998779296875, 81.50750732421875, 509.739990234375, 93.50750732421875]}, {'type': 'table', 'bbox': [184, 108, 568, 273], 'blocks': [{'bbox': [184, 124, 568, 249], 'type': 'table_body', 'lines': [{'bbox': [184, 124, 568, 249], 'spans': [{'bbox': [184, 124, 568, 249], 'score': 0.9999539852142334, 'type': 'table', 'image_path': 'feabef6394c4fd70ba64aece3701cd1fc49a0b7deb4ea0693dd63131f182fb9c.jpg'}]}]}, {'bbox': [184, 108, 295, 122], 'type': 'table_caption', 'lines': [{'bbox': [186.5, 110.3074951171875, 294.9320068359375, 122.3074951171875], 'spans': [{'bbox': [186.5, 110.3074951171875, 294.9320068359375, 122.3074951171875], 'content': '公司盈利预测及估值', 'type': 'text', 'score': 1.0}]}]}, {'bbox': [184, 262, 344, 273], 'type': 'table_footnote', 'lines': [{'bbox': [186.5, 262.17498779296875, 343.1300048828125, 274.5409851074219], 'spans': [{'bbox': [186.5, 264.1156005859375, 213.5, 273.1156005859375], 'content': '备注：', 'type': 'text', 'score': 1.0}, {'bbox': [213.52999877929688, 264.1156005859375, 240.52999877929688, 273.1156005859375], 'content': '股价为', 'type': 'text', 'score': 1.0}, {'bbox': [242.80999755859375, 262.17498779296875, 262.8139953613281, 274.5409851074219], 'content': '2024', 'type': 'text', 'score': 1.0}, {'bbox': [262.8139953613281, 264.1156005859375, 274.1300048828125, 273.1156005859375], 'content': ' 年', 'type': 'text', 'score': 1.0}, {'bbox': [276.4100036621094, 262.17498779296875, 281.41400146484375, 274.5409851074219], 'content': '8', 'type': 'text', 'score': 1.0}, {'bbox': [281.41400146484375, 264.1156005859375, 292.6099853515625, 273.1156005859375], 'content': ' 月', 'type': 'text', 'score': 1.0}, {'bbox': [294.8900146484375, 262.17498779296875, 304.93402099609375, 274.5409851074219], 'content': '27', 'type': 'text', 'score': 1.0}, {'bbox': [304.93402099609375, 264.1156005859375, 343.1300048828125, 273.1156005859375], 'content': ' 日收盘价', 'type': 'text', 'score': 1.0}]}]}], 'index': 24, 'page_num': 'page_0'}, {'type': 'title', 'bbox': [180, 285, 230, 300], 'lines': [{'bbox': [186.5, 277.7750244140625, 189.0019989013672, 290.1410217285156], 'spans': [{'bbox': [186.5, 277.7750244140625, 189.0019989013672, 290.1410217285156], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 26}, {'bbox': [180.86000061035156, 280.41796875, 183.79568481445312, 294.9273986816406], 'spans': [{'bbox': [180.86000061035156, 280.41796875, 183.79568481445312, 294.9273986816406], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 27}, {'bbox': [180.86000061035156, 287.09747314453125, 235.1300048828125, 299.09747314453125], 'spans': [{'bbox': [180.86000061035156, 287.09747314453125, 235.1300048828125, 299.09747314453125], 'content': '投资要点 ', 'type': 'text', 'score': 1.0}], 'index': 28}], 'index': 27, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [198, 302, 578, 331], 'lines': [{'bbox': [201.88999938964844, 302.3030090332031, 575.02001953125, 315.988037109375], 'spans': [{'bbox': [201.88999938964844, 304.45062255859375, 292.0099792480469, 314.41064453125], 'content': '投资事件：公司发布', 'type': 'text', 'score': 1.0}, {'bbox': [294.6499938964844, 302.3030090332031, 316.8507995605469, 315.988037109375], 'content': '2024', 'type': 'text', 'score': 1.0}, {'bbox': [316.8507995605469, 304.45062255859375, 429.3785705566406, 314.41064453125], 'content': ' 年中报：营业收入规模达', 'type': 'text', 'score': 1.0}, {'bbox': [432.07000732421875, 302.3030090332031, 451.5318298339844, 315.988037109375], 'content': '3.00', 'type': 'text', 'score': 1.0}, {'bbox': [451.5318298339844, 304.45062255859375, 524.1190795898438, 314.41064453125], 'content': ' 亿元，同比增长', 'type': 'text', 'score': 1.0}, {'bbox': [525, 303, 556, 314], 'score': 0.82, 'content': '2.92\\%', 'type': 'inline_equation'}, {'bbox': [555.0999755859375, 304.45062255859375, 575.02001953125, 314.41064453125], 'content': '，归', 'type': 'text', 'score': 1.0}], 'index': 29}, {'bbox': [201.88999938964844, 317.9029846191406, 329.118896484375, 331.6676940917969], 'spans': [{'bbox': [201.88999938964844, 320.05059814453125, 271.7195739746094, 330.0106201171875], 'content': '母净利润为亏损', 'type': 'text', 'score': 1.0}, {'bbox': [274.3699951171875, 317.9029846191406, 293.69873046875, 331.5880126953125], 'content': '0.27', 'type': 'text', 'score': 1.0}, {'bbox': [293.69873046875, 320.05059814453125, 326.31951904296875, 330.0106201171875], 'content': ' 亿元。', 'type': 'text', 'score': 1.0}, {'bbox': [326.3500061035156, 317.9527893066406, 329.118896484375, 331.6676940917969], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 30}], 'index': 29.5, 'page_num': 'page_0', 'bbox_fs': [201.88999938964844, 302.3030090332031, 575.02001953125, 331.6676940917969]}, {'type': 'text', 'bbox': [199, 349, 576, 425], 'lines': [{'bbox': [201.88999938964844, 351.2506103515625, 574.9908447265625, 361.21063232421875], 'spans': [{'bbox': [201.88999938964844, 351.2506103515625, 574.9908447265625, 361.21063232421875], 'content': '收入小幅增长，毛利率改善。报告期内，公司医疗临床业务、医疗数据业务等业务板', 'type': 'text', 'score': 1.0}], 'index': 31}, {'bbox': [201.88999938964844, 364.7029724121094, 577.1592407226562, 378.38800048828125], 'spans': [{'bbox': [201.88999938964844, 366.8505859375, 331.8081970214844, 376.81060791015625], 'content': '块平稳发展，整体收入规模达', 'type': 'text', 'score': 1.0}, {'bbox': [334.3900146484375, 364.7029724121094, 353.71875, 378.38800048828125], 'content': '3.00', 'type': 'text', 'score': 1.0}, {'bbox': [353.71875, 366.8505859375, 426.17950439453125, 376.81060791015625], 'content': ' 亿元，同比增长', 'type': 'text', 'score': 1.0}, {'bbox': [427, 365, 457, 377], 'score': 0.92, 'content': '2.92\\%', 'type': 'inline_equation'}, {'bbox': [457.17999267578125, 366.8505859375, 577.1592407226562, 376.81060791015625], 'content': '，整体收入实现平稳增长。', 'type': 'text', 'score': 1.0}], 'index': 32}, {'bbox': [201.88999938964844, 382.4505920410156, 580.0416259765625, 392.4106140136719], 'spans': [{'bbox': [201.88999938964844, 382.4505920410156, 580.0416259765625, 392.4106140136719], 'content': '由于公司优化产品结构，改进实施交付管理，公司业务毛利空间有所提升。报告期内，', 'type': 'text', 'score': 1.0}], 'index': 33}, {'bbox': [201.88999938964844, 395.9229736328125, 574.8645629882812, 409.6080017089844], 'spans': [{'bbox': [201.88999938964844, 398.0705871582031, 291.7491149902344, 408.0306091308594], 'content': '公司综合毛利率达到', 'type': 'text', 'score': 1.0}, {'bbox': [293, 397, 328, 409], 'score': 0.89, 'content': '48.03\\%', 'type': 'inline_equation'}, {'bbox': [328.2699890136719, 398.0705871582031, 386.6952819824219, 408.0306091308594], 'content': '，去年同期为', 'type': 'text', 'score': 1.0}, {'bbox': [388, 397, 423, 409], 'score': 0.89, 'content': '45.52\\%', 'type': 'inline_equation'}, {'bbox': [423.30999755859375, 398.0705871582031, 471.7752990722656, 408.0306091308594], 'content': '，同比提升', 'type': 'text', 'score': 1.0}, {'bbox': [474.3399963378906, 395.9229736328125, 493.80181884765625, 409.6080017089844], 'content': '2.51', 'type': 'text', 'score': 1.0}, {'bbox': [493.80181884765625, 398.0705871582031, 574.8645629882812, 408.0306091308594], 'content': ' 个百分点，公司毛', 'type': 'text', 'score': 1.0}], 'index': 34}, {'bbox': [201.88999938964844, 411.5229797363281, 279.6589050292969, 425.2080078125], 'spans': [{'bbox': [201.88999938964844, 413.67059326171875, 271.7195739746094, 423.630615234375], 'content': '利率明显改善。', 'type': 'text', 'score': 1.0}, {'bbox': [271.7300109863281, 411.5229797363281, 279.6589050292969, 425.2080078125], 'content': '  ', 'type': 'text', 'score': 1.0}], 'index': 35}], 'index': 33, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [199, 427, 577, 503], 'lines': [{'bbox': [201.88999938964844, 429.2705993652344, 574.9743041992188, 439.2306213378906], 'spans': [{'bbox': [201.88999938964844, 429.2705993652344, 574.9743041992188, 439.2306213378906], 'content': '降本增效成效显著，管理、销售费用率下降。报告期内，公司注重内控管理、人员能', 'type': 'text', 'score': 1.0}], 'index': 36}, {'bbox': [201.88999938964844, 442.7229919433594, 575.1400146484375, 456.40802001953125], 'spans': [{'bbox': [201.88999938964844, 444.87060546875, 530.7092895507812, 454.83062744140625], 'content': '效提升，加强管理方式优化及费用控制，公司运营管理方面降本增效明显。', 'type': 'text', 'score': 1.0}, {'bbox': [530.3800048828125, 442.7229919433594, 552.600830078125, 456.40802001953125], 'content': '2024', 'type': 'text', 'score': 1.0}, {'bbox': [552.600830078125, 444.87060546875, 575.1400146484375, 454.83062744140625], 'content': ' 年上', 'type': 'text', 'score': 1.0}], 'index': 37}, {'bbox': [201.88999938964844, 458.3229675292969, 575.1334838867188, 472.00799560546875], 'spans': [{'bbox': [201.88999938964844, 460.4705810546875, 310.71295166015625, 470.43060302734375], 'content': '半年，公司销售费用率为', 'type': 'text', 'score': 1.0}, {'bbox': [312, 459, 348, 471], 'score': 0.91, 'content': '16.39\\%', 'type': 'inline_equation'}, {'bbox': [347.3500061035156, 460.4705810546875, 406.1438293457031, 470.43060302734375], 'content': '，去年同期为', 'type': 'text', 'score': 1.0}, {'bbox': [407, 459, 443, 471], 'score': 0.9, 'content': '17.57\\%', 'type': 'inline_equation'}, {'bbox': [442.75, 460.4705810546875, 501.5438232421875, 470.43060302734375], 'content': '，同比下降个', 'type': 'text', 'score': 1.0}, {'bbox': [504.2200012207031, 458.3229675292969, 523.5487670898438, 472.00799560546875], 'content': '1.18', 'type': 'text', 'score': 1.0}, {'bbox': [523.5487670898438, 460.4705810546875, 575.1334838867188, 470.43060302734375], 'content': ' 百分点；管', 'type': 'text', 'score': 1.0}], 'index': 38}, {'bbox': [201.88999938964844, 473.9229736328125, 575.0936279296875, 487.6080017089844], 'spans': [{'bbox': [201.88999938964844, 476.0705871582031, 251.79959106445312, 486.0306091308594], 'content': '理费用率为', 'type': 'text', 'score': 1.0}, {'bbox': [253, 474, 288, 487], 'score': 0.89, 'content': '16.21\\%', 'type': 'inline_equation'}, {'bbox': [288.2900085449219, 476.0705871582031, 346.8248596191406, 486.0306091308594], 'content': '，去年同期为', 'type': 'text', 'score': 1.0}, {'bbox': [348, 474, 384, 487], 'score': 0.89, 'content': '17.79\\%', 'type': 'inline_equation'}, {'bbox': [383.3500061035156, 476.0705871582031, 431.9348449707031, 486.0306091308594], 'content': '，同比下降', 'type': 'text', 'score': 1.0}, {'bbox': [434.5899963378906, 473.9229736328125, 453.9187316894531, 487.6080017089844], 'content': '1.58', 'type': 'text', 'score': 1.0}, {'bbox': [453.9187316894531, 476.0705871582031, 575.0936279296875, 486.0306091308594], 'content': ' 个百分点。公司管理费用率', 'type': 'text', 'score': 1.0}], 'index': 39}, {'bbox': [201.88999938964844, 489.5727844238281, 434.7189025878906, 503.2876892089844], 'spans': [{'bbox': [201.88999938964844, 491.67059326171875, 431.7367858886719, 501.630615234375], 'content': '及销售费用率均实现下降，公司运营效率明显提升。', 'type': 'text', 'score': 1.0}, {'bbox': [431.95001220703125, 489.5727844238281, 434.7189025878906, 503.2876892089844], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 40}], 'index': 38, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [199, 505, 577, 628], 'lines': [{'bbox': [201.88999938964844, 505.1727600097656, 575.0682983398438, 518.8876953125], 'spans': [{'bbox': [201.88999938964844, 507.27056884765625, 241.9491424560547, 517.2305908203125], 'content': '公司加大', 'type': 'text', 'score': 1.0}, {'bbox': [245.2100067138672, 505.1727600097656, 255.1788787841797, 518.8876953125], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [255.1788787841797, 507.27056884765625, 328.44818115234375, 517.2305908203125], 'content': ' 投入力度，医疗', 'type': 'text', 'score': 1.0}, {'bbox': [331.75, 505.1727600097656, 341.7189025878906, 518.8876953125], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [341.7189025878906, 507.27056884765625, 575.0682983398438, 517.2305908203125], 'content': ' 产品落地情况良好。公司继续加大研发投入力度，尤', 'type': 'text', 'score': 1.0}], 'index': 41}, {'bbox': [201.88999938964844, 520.7230224609375, 575.057861328125, 534.4080200195312], 'spans': [{'bbox': [201.88999938964844, 522.87060546875, 241.83958435058594, 532.8306274414062], 'content': '其是医疗', 'type': 'text', 'score': 1.0}, {'bbox': [244.97000122070312, 520.7230224609375, 254.45889282226562, 534.4080200195312], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [254.45889282226562, 522.87060546875, 407.5176696777344, 532.8306274414062], 'content': ' 投入力度。报告期内，公司新申请', 'type': 'text', 'score': 1.0}, {'bbox': [410.8299865722656, 520.7230224609375, 421.8876953125, 534.4080200195312], 'content': '26', 'type': 'text', 'score': 1.0}, {'bbox': [421.8876953125, 522.87060546875, 575.057861328125, 532.8306274414062], 'content': ' 项发明专利，主要集中在医疗数据', 'type': 'text', 'score': 1.0}], 'index': 42}, {'bbox': [201.88999938964844, 536.322998046875, 574.8896484375, 550.0079956054688], 'spans': [{'bbox': [201.88999938964844, 538.4705810546875, 231.77001953125, 548.4306030273438], 'content': '利用和', 'type': 'text', 'score': 1.0}, {'bbox': [234.77000427246094, 536.322998046875, 244.13890075683594, 550.0079956054688], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [244.13890075683594, 538.4705810546875, 306.98907470703125, 548.4306030273438], 'content': ' 领域，并获得', 'type': 'text', 'score': 1.0}, {'bbox': [309.8900146484375, 536.322998046875, 315.4277648925781, 550.0079956054688], 'content': '1', 'type': 'text', 'score': 1.0}, {'bbox': [315.4277648925781, 538.4705810546875, 368.31951904296875, 548.4306030273438], 'content': ' 项核心技术', 'type': 'text', 'score': 1.0}, {'bbox': [368.3500061035156, 538.013427734375, 371.6667785644531, 549.140625], 'content': '“', 'type': 'text', 'score': 1.0}, {'bbox': [371.7099914550781, 538.4705810546875, 521.548095703125, 548.4306030273438], 'content': '大模型辅助电子病历自动生成技术', 'type': 'text', 'score': 1.0}, {'bbox': [521.6199951171875, 538.013427734375, 524.936767578125, 549.140625], 'content': '”', 'type': 'text', 'score': 1.0}, {'bbox': [524.97998046875, 538.4705810546875, 574.8896484375, 548.4306030273438], 'content': '。依托公司', 'type': 'text', 'score': 1.0}], 'index': 43}, {'bbox': [201.88999938964844, 551.9229736328125, 574.925048828125, 565.6080322265625], 'spans': [{'bbox': [201.88999938964844, 551.9229736328125, 211.25889587402344, 565.6080322265625], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [211.25889587402344, 554.0706176757812, 332.65252685546875, 564.0306396484375], 'content': ' 技术的积累，公司推出医疗', 'type': 'text', 'score': 1.0}, {'bbox': [335.2300109863281, 551.9229736328125, 344.5989074707031, 565.6080322265625], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [344.5989074707031, 554.0706176757812, 574.925048828125, 564.0306396484375], 'content': ' 应用开发平台，打造全院智慧化服务接入底座，实现', 'type': 'text', 'score': 1.0}], 'index': 44}, {'bbox': [201.88999938964844, 567.552978515625, 574.8896484375, 581.238037109375], 'spans': [{'bbox': [201.88999938964844, 569.7006225585938, 291.7491149902344, 579.66064453125], 'content': '多技术框架、多业务', 'type': 'text', 'score': 1.0}, {'bbox': [294.4100036621094, 567.552978515625, 303.7789001464844, 581.238037109375], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [303.7789001464844, 569.7006225585938, 356.19952392578125, 579.66064453125], 'content': ' 应用接入。', 'type': 'text', 'score': 1.0}, {'bbox': [356.3500061035156, 567.552978515625, 378.5508117675781, 581.238037109375], 'content': '2024', 'type': 'text', 'score': 1.0}, {'bbox': [378.5508117675781, 569.7006225585938, 391.0299987792969, 579.66064453125], 'content': ' 年', 'type': 'text', 'score': 1.0}, {'bbox': [393.54998779296875, 567.552978515625, 399.0877380371094, 581.238037109375], 'content': '7', 'type': 'text', 'score': 1.0}, {'bbox': [399.0877380371094, 569.7006225585938, 531.5186157226562, 579.66064453125], 'content': ' 月，公司与北医三院联合发布', 'type': 'text', 'score': 1.0}, {'bbox': [531.5800170898438, 569.2434692382812, 534.8967895507812, 580.3706665039062], 'content': '“', 'type': 'text', 'score': 1.0}, {'bbox': [534.9400024414062, 569.7006225585938, 574.8896484375, 579.66064453125], 'content': '三生大模', 'type': 'text', 'score': 1.0}], 'index': 45}, {'bbox': [201.88999938964844, 583.1529541015625, 575.1400146484375, 596.8380126953125], 'spans': [{'bbox': [201.88999938964844, 585.3005981445312, 211.85000610351562, 595.2606201171875], 'content': '型', 'type': 'text', 'score': 1.0}, {'bbox': [211.85000610351562, 584.8434448242188, 215.16676330566406, 595.9706420898438], 'content': '”', 'type': 'text', 'score': 1.0}, {'bbox': [215.2100067138672, 585.3005981445312, 540.5800170898438, 595.2606201171875], 'content': '，以大模型为底座的多业务场景得到落地验证并且应用效果良好，比如新型', 'type': 'text', 'score': 1.0}, {'bbox': [543.219970703125, 583.1529541015625, 552.5888671875, 596.8380126953125], 'content': 'AI', 'type': 'text', 'score': 1.0}, {'bbox': [552.5888671875, 585.3005981445312, 575.1400146484375, 595.2606201171875], 'content': ' 产品', 'type': 'text', 'score': 1.0}], 'index': 46}, {'bbox': [201.88999938964844, 600.9005737304688, 575.1082763671875, 610.860595703125], 'spans': [{'bbox': [201.88999938964844, 600.9005737304688, 575.1082763671875, 610.860595703125], 'content': '可以将医务人员曾经数小时的病历书写工作缩减至半小时内完成，大幅提升书写内容', 'type': 'text', 'score': 1.0}], 'index': 47}, {'bbox': [201.88999938964844, 614.4027709960938, 304.618896484375, 628.1177368164062], 'spans': [{'bbox': [201.88999938964844, 616.5006103515625, 301.7195129394531, 626.4606323242188], 'content': '的准确率及工作效率。', 'type': 'text', 'score': 1.0}, {'bbox': [301.8500061035156, 614.4027709960938, 304.618896484375, 628.1177368164062], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 48}], 'index': 44.5, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [200, 646, 577, 690], 'lines': [{'bbox': [201.88999938964844, 645.552978515625, 574.8973999023438, 659.238037109375], 'spans': [{'bbox': [201.88999938964844, 647.7006225585938, 310.00994873046875, 657.66064453125], 'content': '投资建议：我们预计公司', 'type': 'text', 'score': 1.0}, {'bbox': [312.5299987792969, 645.552978515625, 384.72003173828125, 659.238037109375], 'content': '2024/2025/2026', 'type': 'text', 'score': 1.0}, {'bbox': [384.72003173828125, 647.7006225585938, 447.2890625, 657.66064453125], 'content': ' 年收入分别为', 'type': 'text', 'score': 1.0}, {'bbox': [449.8299865722656, 645.552978515625, 526.9088745117188, 659.238037109375], 'content': '9.03/11.48/14.47 ', 'type': 'text', 'score': 1.0}, {'bbox': [526.9000244140625, 647.7006225585938, 574.8973999023438, 657.66064453125], 'content': '亿元，净利', 'type': 'text', 'score': 1.0}], 'index': 49}, {'bbox': [201.88999938964844, 661.1529541015625, 574.98876953125, 674.8380126953125], 'spans': [{'bbox': [201.88999938964844, 663.3005981445312, 241.7300262451172, 673.2606201171875], 'content': '润分别为', 'type': 'text', 'score': 1.0}, {'bbox': [241.85000610351562, 661.1529541015625, 311.3388977050781, 674.8380126953125], 'content': ' 0.95/1.20/1.60 ', 'type': 'text', 'score': 1.0}, {'bbox': [311.3299865722656, 663.3005981445312, 361.239501953125, 673.2606201171875], 'content': '亿元，对应', 'type': 'text', 'score': 1.0}, {'bbox': [364.989990234375, 661.1529541015625, 378.35333251953125, 674.8380126953125], 'content': 'PE', 'type': 'text', 'score': 1.0}, {'bbox': [378.35333251953125, 663.3005981445312, 411.8995361328125, 673.2606201171875], 'content': ' 分别为', 'type': 'text', 'score': 1.0}, {'bbox': [411.9100036621094, 661.1529541015625, 481.42193603515625, 674.8380126953125], 'content': '  24.1/19.0/14.3', 'type': 'text', 'score': 1.0}, {'bbox': [481.42193603515625, 663.3005981445312, 574.98876953125, 673.2606201171875], 'content': ' 倍。考虑公司业绩高', 'type': 'text', 'score': 1.0}], 'index': 50}, {'bbox': [201.88999938964844, 676.802734375, 431.35888671875, 690.5177001953125], 'spans': [{'bbox': [201.88999938964844, 678.9005737304688, 371.7577209472656, 688.860595703125], 'content': '增长以及估值处于较低水平，给予公司', 'type': 'text', 'score': 1.0}, {'bbox': [371.8299865722656, 678.4434204101562, 375.1467590332031, 689.5706176757812], 'content': '“', 'type': 'text', 'score': 1.0}, {'bbox': [375.19000244140625, 678.9005737304688, 395.1099853515625, 688.860595703125], 'content': '买入', 'type': 'text', 'score': 1.0}, {'bbox': [395.1099853515625, 678.4434204101562, 398.4267578125, 689.5706176757812], 'content': '”', 'type': 'text', 'score': 1.0}, {'bbox': [398.4700012207031, 678.9005737304688, 428.45953369140625, 688.860595703125], 'content': '评级。', 'type': 'text', 'score': 1.0}, {'bbox': [428.5899963378906, 676.802734375, 431.35888671875, 690.5177001953125], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 51}], 'index': 50, 'page_num': 'page_0'}, {'type': 'text', 'bbox': [200, 708, 404, 721], 'lines': [{'bbox': [201.88999938964844, 708.0027465820312, 404.9588928222656, 721.7177124023438], 'spans': [{'bbox': [201.88999938964844, 710.1005859375, 402.00811767578125, 720.0606079101562], 'content': '风险提示：业务发展不及预期，政策推进缓慢', 'type': 'text', 'score': 1.0}, {'bbox': [402.19000244140625, 708.0027465820312, 404.9588928222656, 721.7177124023438], 'content': ' ', 'type': 'text', 'score': 1.0}], 'index': 52}], 'index': 52, 'page_num': 'page_0'}]
+    input_blocks = []
    # 调用函数
    groups = __process_blocks(input_blocks)
    for group_index, group in enumerate(groups):