Commit 6b55fcfd authored by myhloli's avatar myhloli
Browse files

fix(pdf_parse): ensure block bounding boxes do not have negative values

- Add logic to set any negative values in block['bbox'] to 0
- This prevents potential errors when processing PDF blocks
parent b65b8f12
...@@ -373,6 +373,8 @@ def cal_block_index(fix_blocks, sorted_bboxes): ...@@ -373,6 +373,8 @@ def cal_block_index(fix_blocks, sorted_bboxes):
# 使用xycut排序 # 使用xycut排序
block_bboxes = [] block_bboxes = []
for block in fix_blocks: for block in fix_blocks:
# 如果block['bbox']任意值小于0,将其置为0
block['bbox'] = [max(0, x) for x in block['bbox']]
block_bboxes.append(block['bbox']) block_bboxes.append(block['bbox'])
# 删除图表body block中的虚拟line信息, 并用real_lines信息回填 # 删除图表body block中的虚拟line信息, 并用real_lines信息回填
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment