Commit 30bd3a83 authored by myhloli's avatar myhloli
Browse files

fix(pdf_parse): Fixed the issue where some headings were missing in certain complex layouts.

parent 619aabd2
...@@ -436,7 +436,8 @@ def cal_block_index(fix_blocks, sorted_bboxes): ...@@ -436,7 +436,8 @@ def cal_block_index(fix_blocks, sorted_bboxes):
block_bboxes.append(block['bbox']) block_bboxes.append(block['bbox'])
# 删除图表body block中的虚拟line信息, 并用real_lines信息回填 # 删除图表body block中的虚拟line信息, 并用real_lines信息回填
if block['type'] in [BlockType.ImageBody, BlockType.TableBody]: if block['type'] in [BlockType.ImageBody, BlockType.TableBody, BlockType.Title, BlockType.InterlineEquation]:
if 'real_lines' in block:
block['virtual_lines'] = copy.deepcopy(block['lines']) block['virtual_lines'] = copy.deepcopy(block['lines'])
block['lines'] = copy.deepcopy(block['real_lines']) block['lines'] = copy.deepcopy(block['real_lines'])
del block['real_lines'] del block['real_lines']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment