Commit 05c6f0b1 authored by myhloli's avatar myhloli
Browse files

fix: improve block filtering and cleanup in para_split.py

parent fefe2d36
......@@ -365,12 +365,12 @@ def para_split(page_info_list):
for page_info in page_info_list:
page_info['para_blocks'] = []
for block in all_blocks:
if block['page_num'] == page_info['page_idx']:
page_info['para_blocks'].append(block)
# 从block中删除不需要的page_num和page_size字段
del block['page_num']
del block['page_size']
if 'page_num' in block:
if block['page_num'] == page_info['page_idx']:
page_info['para_blocks'].append(block)
# 从block中删除不需要的page_num和page_size字段
del block['page_num']
del block['page_size']
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment