Commit b9f78c9b authored by myhloli's avatar myhloli
Browse files

refactor(ocr_dict_merge): add threshold parameter for line merging

- Add threshold parameter to merge_spans_to_line function
- Make threshold configurable for y-axis overlap check
- Improve flexibility and accuracy of line merging algorithm
parent 20ed0cd5
...@@ -24,7 +24,7 @@ def line_sort_spans_by_left_to_right(lines): ...@@ -24,7 +24,7 @@ def line_sort_spans_by_left_to_right(lines):
return line_objects return line_objects
def merge_spans_to_line(spans): def merge_spans_to_line(spans, threshold=0.6):
if len(spans) == 0: if len(spans) == 0:
return [] return []
else: else:
...@@ -49,7 +49,7 @@ def merge_spans_to_line(spans): ...@@ -49,7 +49,7 @@ def merge_spans_to_line(spans):
continue continue
# 如果当前的span与当前行的最后一个span在y轴上重叠,则添加到当前行 # 如果当前的span与当前行的最后一个span在y轴上重叠,则添加到当前行
if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], 0.5): if __is_overlaps_y_exceeds_threshold(span['bbox'], current_line[-1]['bbox'], threshold):
current_line.append(span) current_line.append(span)
else: else:
# 否则,开始新行 # 否则,开始新行
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment