Unverified Commit c7a3a683 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1601 from myhloli/dev

refactor(pdf_parse): uncomment char bbox validation logic
parents 98c05682 c38060d5
...@@ -185,10 +185,13 @@ def calculate_iou(bbox1, bbox2): ...@@ -185,10 +185,13 @@ def calculate_iou(bbox1, bbox2):
bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1]) bbox1_area = (bbox1[2] - bbox1[0]) * (bbox1[3] - bbox1[1])
bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1]) bbox2_area = (bbox2[2] - bbox2[0]) * (bbox2[3] - bbox2[1])
if any([bbox1_area == 0, bbox2_area == 0]):
return 0
# Compute the intersection over union by taking the intersection area # Compute the intersection over union by taking the intersection area
# and dividing it by the sum of both areas minus the intersection area # and dividing it by the sum of both areas minus the intersection area
iou = intersection_area / float(bbox1_area + bbox2_area - iou = intersection_area / float(bbox1_area + bbox2_area - intersection_area)
intersection_area)
return iou return iou
......
...@@ -118,9 +118,10 @@ def fill_char_in_spans(spans, all_chars): ...@@ -118,9 +118,10 @@ def fill_char_in_spans(spans, all_chars):
for char in all_chars: for char in all_chars:
# 跳过非法bbox的char # 跳过非法bbox的char
x1, y1, x2, y2 = char['bbox'] # x1, y1, x2, y2 = char['bbox']
if abs(x1 - x2) <= 0.01 or abs(y1 - y2) <= 0.01: # if abs(x1 - x2) <= 0.01 or abs(y1 - y2) <= 0.01:
continue # continue
for span in spans: for span in spans:
if calculate_char_in_span(char['bbox'], span['bbox'], char['c']): if calculate_char_in_span(char['bbox'], span['bbox'], char['c']):
span['chars'].append(char) span['chars'].append(char)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment