"docs/zh/git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "107624b2c5e923db5f447aa5737403f2df4f5ed8"
Unverified Commit 0508df99 authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Update mineru/utils/span_block_fix.py


Co-authored-by: default avatarCopilot <175728472+Copilot@users.noreply.github.com>
parent 18691cfd
......@@ -73,9 +73,10 @@ def fix_text_block(block):
span['type'] = ContentType.INLINE_EQUATION
# 假设block中的span超过80%的数量高度是宽度的两倍以上,则认为是纵向文本块
VERTICAL_TEXT_RATIO_THRESHOLD = 2 # Threshold for determining vertical text blocks
vertical_span_count = sum(
1 for span in block['spans']
if (span['bbox'][3] - span['bbox'][1]) / (span['bbox'][2] - span['bbox'][0]) > 2
if (span['bbox'][3] - span['bbox'][1]) / (span['bbox'][2] - span['bbox'][0]) > VERTICAL_TEXT_RATIO_THRESHOLD
)
total_span_count = len(block['spans'])
if total_span_count == 0:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment