Commit ae3b0a1e authored by myhloli's avatar myhloli
Browse files

fix(pdf_parse): improve line stop flag detection accuracy

- Add an additional condition to the line stop flag check
- Ensure character is to the right of the span's left boundary
- This change helps reduce false positives in line stop detection
parent 9e4d6a45
...@@ -151,6 +151,7 @@ def calculate_char_in_span(char_bbox, span_bbox, char_is_line_stop_flag): ...@@ -151,6 +151,7 @@ def calculate_char_in_span(char_bbox, span_bbox, char_is_line_stop_flag):
if char_is_line_stop_flag: if char_is_line_stop_flag:
if ( if (
(span_bbox[2] - span_height) < char_bbox[0] < span_bbox[2] (span_bbox[2] - span_height) < char_bbox[0] < span_bbox[2]
and char_center_x > span_bbox[0]
and span_bbox[1] < char_center_y < span_bbox[3] and span_bbox[1] < char_center_y < span_bbox[3]
and abs(char_center_y - span_center_y) < span_height / 4 and abs(char_center_y - span_center_y) < span_height / 4
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment