Commit f3b09a0b authored by zhougaofeng's avatar zhougaofeng
Browse files

Update para_split_v2.py

parent c0a9d1c7
......@@ -763,16 +763,16 @@ def para_split(pdf_info_dict, debug_mode, lang="en"):
is_conn = __connect_para_inter_page(pre_page_paras, next_page_paras, pre_page_layout_bbox,
next_page_layout_bbox, page_num, lang)
if debug_able:
if is_conn:
logger.info(f"连接了第{page_num - 1}页和第{page_num}页的段落")
# if debug_able:
# if is_conn:
# logger.info(f"连接了第{page_num - 1}页和第{page_num}页的段落")
is_list_conn = __connect_list_inter_page(pre_page_paras, next_page_paras, pre_page_layout_bbox,
next_page_layout_bbox, all_page_list_info[page_num - 1],
all_page_list_info[page_num], page_num, lang)
if debug_able:
if is_list_conn:
logger.info(f"连接了第{page_num - 1}页和第{page_num}页的列表段落")
# if debug_able:
# if is_list_conn:
# logger.info(f"连接了第{page_num - 1}页和第{page_num}页的列表段落")
"""接下来可能会漏掉一些特别的一些可以合并的内容,对他们进行段落连接
1. 正文中有时出现一个行顶格,接下来几行缩进的情况。
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment