Commit e1c7a886 authored by zhougaofeng's avatar zhougaofeng
Browse files

Update pdf_parse_union_core_v2.py

parent 44e0b598
......@@ -382,7 +382,7 @@ def revert_group_blocks(blocks):
return new_blocks
def parse_page_core(config_path,local_image_dir,
def parse_page_core(ocr_status,config_path,local_image_dir,
page_doc: PageableData, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
):
need_drop = False
......@@ -429,7 +429,7 @@ def parse_page_core(config_path,local_image_dir,
"""删除重叠spans中较小的那些"""
spans, dropped_spans_by_span_overlap = remove_overlaps_min_spans(spans)
"""对image和table截图"""
spans = ocr_cut_image_and_table(config_path,local_image_dir,
spans = ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,
spans, page_doc, page_id, pdf_bytes_md5, imageWriter
)
......@@ -581,7 +581,7 @@ def pdf_parse_union(config_path,local_image_dir,
"""解析pdf中的每一页"""
if start_page_id <= page_id <= end_page_id:
page_info = parse_page_core(
page_info = parse_page_core(ocr_status,
config_path,local_image_dir,page, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
)
else:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment