"test/git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "afda28accbc79035384952c0359f0e4de8454cb3"
Unverified Commit 0d83fb77 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #743 from myhloli/para-split-v3

refactor(para_split_v3): merge list and index block detection
parents 702b6ac9 244b8684
...@@ -237,6 +237,8 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename): ...@@ -237,6 +237,8 @@ def draw_span_bbox(pdf_info, pdf_bytes, out_path, filename):
BlockType.Text, BlockType.Text,
BlockType.Title, BlockType.Title,
BlockType.InterlineEquation, BlockType.InterlineEquation,
BlockType.List,
BlockType.Index,
]: ]:
for line in block['lines']: for line in block['lines']:
for span in line['spans']: for span in line['spans']:
......
This diff is collapsed.
...@@ -360,7 +360,7 @@ def parse_page_core(pdf_docs, magic_model, page_id, pdf_bytes_md5, imageWriter, ...@@ -360,7 +360,7 @@ def parse_page_core(pdf_docs, magic_model, page_id, pdf_bytes_md5, imageWriter,
need_drop, drop_reason) need_drop, drop_reason)
'''将span填入blocks中''' '''将span填入blocks中'''
block_with_spans, spans = fill_spans_in_blocks(all_bboxes, spans, 0.3) block_with_spans, spans = fill_spans_in_blocks(all_bboxes, spans, 0.5)
'''对block进行fix操作''' '''对block进行fix操作'''
fix_blocks = fix_block_spans(block_with_spans, img_blocks, table_blocks) fix_blocks = fix_block_spans(block_with_spans, img_blocks, table_blocks)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment