Unverified Commit 0440ee87 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1260 from opendatalab/dev

fix: dup classify pdf type & improve layout detection for DocLayout_YOLO model 
parents fb468671 327fdf90
...@@ -179,7 +179,25 @@ class CustomPEKModel: ...@@ -179,7 +179,25 @@ class CustomPEKModel:
layout_res = self.layout_model(image, ignore_catids=[]) layout_res = self.layout_model(image, ignore_catids=[])
elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO: elif self.layout_model_name == MODEL_NAME.DocLayout_YOLO:
# doclayout_yolo # doclayout_yolo
layout_res = self.layout_model.predict(image) img_pil = Image.fromarray(image)
width, height = img_pil.size
# logger.info(f'width: {width}, height: {height}')
input_res = {"poly":[0,0,width,0,width,height,0,height]}
new_image, useful_list = crop_img(input_res, img_pil, crop_paste_x=width//2, crop_paste_y=0)
paste_x, paste_y, xmin, ymin, xmax, ymax, new_width, new_height = useful_list
layout_res = self.layout_model.predict(new_image)
for res in layout_res:
p1, p2, p3, p4, p5, p6, p7, p8 = res['poly']
p1 = p1 - paste_x + xmin
p2 = p2 - paste_y + ymin
p3 = p3 - paste_x + xmin
p4 = p4 - paste_y + ymin
p5 = p5 - paste_x + xmin
p6 = p6 - paste_y + ymin
p7 = p7 - paste_x + xmin
p8 = p8 - paste_y + ymin
res['poly'] = [p1, p2, p3, p4, p5, p6, p7, p8]
layout_cost = round(time.time() - layout_start, 2) layout_cost = round(time.time() - layout_start, 2)
logger.info(f'layout detection time: {layout_cost}') logger.info(f'layout detection time: {layout_cost}')
......
...@@ -123,6 +123,9 @@ def do_parse( ...@@ -123,6 +123,9 @@ def do_parse(
formula_enable=formula_enable, formula_enable=formula_enable,
table_enable=table_enable, table_enable=table_enable,
) )
pipe_result = infer_result.pipe_txt_mode(
image_writer, debug_mode=True, lang=lang
)
else: else:
infer_result = ds.apply( infer_result = ds.apply(
doc_analyze, doc_analyze,
...@@ -132,7 +135,7 @@ def do_parse( ...@@ -132,7 +135,7 @@ def do_parse(
formula_enable=formula_enable, formula_enable=formula_enable,
table_enable=table_enable, table_enable=table_enable,
) )
pipe_result = infer_result.pipe_auto_mode( pipe_result = infer_result.pipe_ocr_mode(
image_writer, debug_mode=True, lang=lang image_writer, debug_mode=True, lang=lang
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment