Merge pull request #1272 from myhloli/add-llm-aided

perf(layout): optimize layout detection for PDF extraction

Merge pull request #1272 from myhloli/add-llm-aided
perf(layout): optimize layout detection for PDF extraction
5bbd07a1 · Xiaomeng Zhao · GitHub · 56b0e18b · 6a75d7dc · 5bbd07a1
Unverified Commit 5bbd07a1 authored Dec 12, 2024 by Xiaomeng Zhao Committed by GitHub Dec 12, 2024
Show whitespace changes
Inline Side-by-side

Showing with 23 additions and 20 deletions

magic_pdf/model/pdf_extract_kit.py magic_pdf/model/pdf_extract_kit.py +22 -20

magic_pdf/post_proc/__init__.py magic_pdf/post_proc/__init__.py +1 -0

No files found.
--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -171,6 +171,10 @@ class CustomPEKModel:
    def __call__(self, image):
+        pil_img = Image.fromarray(image)
+        width, height = pil_img.size
+        # logger.info(f'width: {width}, height: {height}')
        # layout检测
        layout_start = time.time()
        layout_res = []
@@ -179,11 +183,9 @@ class CustomPEKModel:
            layout_res = self.layout_model(image, ignore_catids=[])
        elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
            # doclayout_yolo
-            img_pil = Image.fromarray(image)
+            if height > width:
-            width, height = img_pil.size
-            # logger.info(f'width: {width}, height: {height}')
                input_res = {"poly":[0,0,width,0,width,height,0,height]}
-            new_image, useful_list = crop_img(input_res, img_pil, crop_paste_x=width//2, crop_paste_y=0)
+                new_image, useful_list = crop_img(input_res, pil_img, crop_paste_x=width//2, crop_paste_y=0)
                paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
                layout_res = self.layout_model.predict(new_image)
                for res in layout_res:
@@ -197,12 +199,12 @@ class CustomPEKModel:
                    p7 = p7 - paste_x + xmin
                    p8 = p8 - paste_y + ymin
                    res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
+            else:
+                layout_res = self.layout_model.predict(image)
        layout_cost = round(time.time() - layout_start, 2)
        logger.info(f'layout detection time: {layout_cost}')
-        pil_img = Image.fromarray(image)
        if self.apply_formula:
            # 公式检测
            mfd_start = time.time()

--- a/magic_pdf/post_proc/__init__.py
+++ b/magic_pdf/post_proc/__init__.py
+# Copyright (c) Opendatalab. All rights reserved.