Commit e1c7a886 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update pdf_parse_union_core_v2.py

parent 44e0b598
...@@ -382,7 +382,7 @@ def revert_group_blocks(blocks): ...@@ -382,7 +382,7 @@ def revert_group_blocks(blocks):
return new_blocks return new_blocks
def parse_page_core(config_path,local_image_dir, def parse_page_core(ocr_status,config_path,local_image_dir,
page_doc: PageableData, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode page_doc: PageableData, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
): ):
need_drop = False need_drop = False
...@@ -429,7 +429,7 @@ def parse_page_core(config_path,local_image_dir, ...@@ -429,7 +429,7 @@ def parse_page_core(config_path,local_image_dir,
"""删除重叠spans中较小的那些""" """删除重叠spans中较小的那些"""
spans, dropped_spans_by_span_overlap = remove_overlaps_min_spans(spans) spans, dropped_spans_by_span_overlap = remove_overlaps_min_spans(spans)
"""对image和table截图""" """对image和table截图"""
spans = ocr_cut_image_and_table(config_path,local_image_dir, spans = ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,
spans, page_doc, page_id, pdf_bytes_md5, imageWriter spans, page_doc, page_id, pdf_bytes_md5, imageWriter
) )
...@@ -581,7 +581,7 @@ def pdf_parse_union(config_path,local_image_dir, ...@@ -581,7 +581,7 @@ def pdf_parse_union(config_path,local_image_dir,
"""解析pdf中的每一页""" """解析pdf中的每一页"""
if start_page_id <= page_id <= end_page_id: if start_page_id <= page_id <= end_page_id:
page_info = parse_page_core( page_info = parse_page_core(ocr_status,
config_path,local_image_dir,page, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode config_path,local_image_dir,page, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
) )
else: else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment