Unverified Commit bd3a7b37 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1757 from myhloli/dev

feat(pre_proc): add block type compatibility check for span allocation
parents 9fd10b66 19916856
......@@ -60,6 +60,19 @@ def merge_spans_to_line(spans, threshold=0.6):
return lines
def span_block_type_compatible(span_type, block_type):
if span_type in [ContentType.Text, ContentType.InlineEquation]:
return block_type in [BlockType.Text, BlockType.Title, BlockType.ImageCaption, BlockType.ImageFootnote, BlockType.TableCaption, BlockType.TableFootnote]
elif span_type == ContentType.InterlineEquation:
return block_type in [BlockType.InterlineEquation]
elif span_type == ContentType.Image:
return block_type in [BlockType.ImageBody]
elif span_type == ContentType.Table:
return block_type in [BlockType.TableBody]
else:
return False
def fill_spans_in_blocks(blocks, spans, radio):
"""将allspans中的span按位置关系,放入blocks中."""
block_with_spans = []
......@@ -78,8 +91,7 @@ def fill_spans_in_blocks(blocks, spans, radio):
block_spans = []
for span in spans:
span_bbox = span['bbox']
if calculate_overlap_area_in_bbox1_area_ratio(
span_bbox, block_bbox) > radio:
if calculate_overlap_area_in_bbox1_area_ratio(span_bbox, block_bbox) > radio and span_block_type_compatible(span['type'], block_type):
block_spans.append(span)
block_dict['spans'] = block_spans
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment