"vscode:/vscode.git/clone" did not exist on "b1deeb539a8b55aa75da69e6934b4f4a15e63d49"
Commit 8caf59f7 authored by myhloli's avatar myhloli
Browse files

refactor(footnote_detection): adjust footnote detection threshold

- Change footnote detection threshold from 50% of page height to 30%
- Improve accuracy of footnote identification in PDF processing
parent 4df8523a
......@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
all_discarded_blocks = []
add_bboxes(discarded_blocks, BlockType.Discarded, all_discarded_blocks)
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半50%区域的"""
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半30%区域的"""
footnote_blocks = []
for discarded in discarded_blocks:
x0, y0, x1, y1 = discarded['bbox']
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h / 2):
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h * 0.7):
footnote_blocks.append([x0, y0, x1, y1])
"""移除在footnote下面的任何框"""
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment