Unverified Commit e9203f91 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Merge pull request #1689 from myhloli/dev

fix(pdf_parse): Fixed the issue where some headings were missing in certain complex layouts.
parents b983238d 30bd3a83
...@@ -436,10 +436,11 @@ def cal_block_index(fix_blocks, sorted_bboxes): ...@@ -436,10 +436,11 @@ def cal_block_index(fix_blocks, sorted_bboxes):
block_bboxes.append(block['bbox']) block_bboxes.append(block['bbox'])
# 删除图表body block中的虚拟line信息, 并用real_lines信息回填 # 删除图表body block中的虚拟line信息, 并用real_lines信息回填
if block['type'] in [BlockType.ImageBody, BlockType.TableBody]: if block['type'] in [BlockType.ImageBody, BlockType.TableBody, BlockType.Title, BlockType.InterlineEquation]:
block['virtual_lines'] = copy.deepcopy(block['lines']) if 'real_lines' in block:
block['lines'] = copy.deepcopy(block['real_lines']) block['virtual_lines'] = copy.deepcopy(block['lines'])
del block['real_lines'] block['lines'] = copy.deepcopy(block['real_lines'])
del block['real_lines']
import numpy as np import numpy as np
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment