"tools/vscode:/vscode.git/clone" did not exist on "24deecb3e37737957ff69d2895ca387c94ceda61"
Commit 97bcc8b2 authored by myhloli's avatar myhloli
Browse files

refactor(pdf_parse): improve code readability and maintainability

parent 034c59a8
......@@ -179,16 +179,15 @@ def txt_spans_extract_v2(pdf_page, spans, all_bboxes, all_discarded_blocks, lang
useful_spans = []
unuseful_spans = []
for span in spans:
for block in all_bboxes + all_discarded_blocks:
if block[7] in [BlockType.ImageBody, BlockType.TableBody, BlockType.InterlineEquation]:
continue
overlap_ratio = calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block[0:4])
if overlap_ratio > 0.5:
if block in all_bboxes:
useful_spans.append(span)
else:
unuseful_spans.append(span)
break
for block in all_bboxes + all_discarded_blocks:
if block[7] in [BlockType.ImageBody, BlockType.TableBody, BlockType.InterlineEquation]:
continue
if calculate_overlap_area_in_bbox1_area_ratio(span['bbox'], block[0:4]) > 0.5:
if block in all_bboxes:
useful_spans.append(span)
else:
unuseful_spans.append(span)
break
new_spans = []
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment