Merge pull request #1272 from myhloli/add-llm-aided

perf(layout): optimize layout detection for PDF extraction

Merge pull request #1272 from myhloli/add-llm-aided
perf(layout): optimize layout detection for PDF extraction
5bbd07a1 · Xiaomeng Zhao · GitHub · 56b0e18b · 6a75d7dc · 5bbd07a1
Unverified Commit 5bbd07a1 authored Dec 12, 2024 by Xiaomeng Zhao Committed by GitHub Dec 12, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 23 additions and 20 deletions

magic_pdf/model/pdf_extract_kit.py magic_pdf/model/pdf_extract_kit.py +22 -20

magic_pdf/post_proc/__init__.py magic_pdf/post_proc/__init__.py +1 -0

No files found.
--- a/magic_pdf/model/pdf_extract_kit.py
+++ b/magic_pdf/model/pdf_extract_kit.py
@@ -171,6 +171,10 @@ class CustomPEKModel:
    def __call__(self, image):
+        pil_img = Image.fromarray(image)
+        width, height = pil_img.size
+        # logger.info(f'width: {width}, height: {height}')
        # layout检测
        layout_start = time.time()
        layout_res = []
@@ -179,30 +183,28 @@ class CustomPEKModel:
            layout_res = self.layout_model(image, ignore_catids=[])
        elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
            # doclayout_yolo
-            img_pil = Image.fromarray(image)
+            if height > width:
-            width, height = img_pil.size
+                input_res = {"poly":[0,0,width,0,width,height,0,height]}
-            # logger.info(f'width: {width}, height: {height}')
+                new_image, useful_list = crop_img(input_res, pil_img, crop_paste_x=width//2, crop_paste_y=0)
-            input_res = {"poly":[0,0,width,0,width,height,0,height]}
+                paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
-            new_image, useful_list = crop_img(input_res, img_pil, crop_paste_x=width//2, crop_paste_y=0)
+                layout_res = self.layout_model.predict(new_image)
-            paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
+                for res in layout_res:
-            layout_res = self.layout_model.predict(new_image)
+                    p1, p2, p3, p4, p5, p6, p7, p8 = res['poly']
-            for res in layout_res:
+                    p1 = p1 - paste_x + xmin
-                p1, p2, p3, p4, p5, p6, p7, p8 = res['poly']
+                    p2 = p2 - paste_y + ymin
-                p1 = p1 - paste_x + xmin
+                    p3 = p3 - paste_x + xmin
-                p2 = p2 - paste_y + ymin
+                    p4 = p4 - paste_y + ymin
-                p3 = p3 - paste_x + xmin
+                    p5 = p5 - paste_x + xmin
-                p4 = p4 - paste_y + ymin
+                    p6 = p6 - paste_y + ymin
-                p5 = p5 - paste_x + xmin
+                    p7 = p7 - paste_x + xmin
-                p6 = p6 - paste_y + ymin
+                    p8 = p8 - paste_y + ymin
-                p7 = p7 - paste_x + xmin
+                    res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
-                p8 = p8 - paste_y + ymin
+            else:
-                res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
+                layout_res = self.layout_model.predict(image)
        layout_cost = round(time.time() - layout_start, 2)
        logger.info(f'layout detection time: {layout_cost}')
-        pil_img = Image.fromarray(image)
        if self.apply_formula:
            # 公式检测
            mfd_start = time.time()

--- a/magic_pdf/post_proc/__init__.py
+++ b/magic_pdf/post_proc/__init__.py
+# Copyright (c) Opendatalab. All rights reserved.