Commit 6a75d7dc authored by myhloli's avatar myhloli
Browse files

perf(layout): optimize layout detection for PDF extraction

- Add initial setup for layout detection
- Implement conditional cropping for tall images
- Skip cropping for wide images to improve performance
- Reuse Image object across layout detection steps
parent 56b0e18b
...@@ -171,6 +171,10 @@ class CustomPEKModel: ...@@ -171,6 +171,10 @@ class CustomPEKModel:
def __call__(self, image): def __call__(self, image):
pil_img = Image.fromarray(image)
width, height = pil_img.size
# logger.info(f'width: {width}, height: {height}')
# layout检测 # layout检测
layout_start = time.time() layout_start = time.time()
layout_res = [] layout_res = []
...@@ -179,30 +183,28 @@ class CustomPEKModel: ...@@ -179,30 +183,28 @@ class CustomPEKModel:
layout_res = self.layout_model(image, ignore_catids=[]) layout_res = self.layout_model(image, ignore_catids=[])
elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO: elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
# doclayout_yolo # doclayout_yolo
img_pil = Image.fromarray(image) if height > width:
width, height = img_pil.size input_res = {"poly":[0,0,width,0,width,height,0,height]}
# logger.info(f'width: {width}, height: {height}') new_image, useful_list = crop_img(input_res, pil_img, crop_paste_x=width//2, crop_paste_y=0)
input_res = {"poly":[0,0,width,0,width,height,0,height]} paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
new_image, useful_list = crop_img(input_res, img_pil, crop_paste_x=width//2, crop_paste_y=0) layout_res = self.layout_model.predict(new_image)
paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list for res in layout_res:
layout_res = self.layout_model.predict(new_image) p1, p2, p3, p4, p5, p6, p7, p8 = res['poly']
for res in layout_res: p1 = p1 - paste_x + xmin
p1, p2, p3, p4, p5, p6, p7, p8 = res['poly'] p2 = p2 - paste_y + ymin
p1 = p1 - paste_x + xmin p3 = p3 - paste_x + xmin
p2 = p2 - paste_y + ymin p4 = p4 - paste_y + ymin
p3 = p3 - paste_x + xmin p5 = p5 - paste_x + xmin
p4 = p4 - paste_y + ymin p6 = p6 - paste_y + ymin
p5 = p5 - paste_x + xmin p7 = p7 - paste_x + xmin
p6 = p6 - paste_y + ymin p8 = p8 - paste_y + ymin
p7 = p7 - paste_x + xmin res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
p8 = p8 - paste_y + ymin else:
res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8] layout_res = self.layout_model.predict(image)
layout_cost = round(time.time() - layout_start, 2) layout_cost = round(time.time() - layout_start, 2)
logger.info(f'layout detection time: {layout_cost}') logger.info(f'layout detection time: {layout_cost}')
pil_img = Image.fromarray(image)
if self.apply_formula: if self.apply_formula:
# 公式检测 # 公式检测
mfd_start = time.time() mfd_start = time.time()
......
# Copyright (c) Opendatalab. All rights reserved.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment