Update pdf_parse_union_core_v2.py

b9336031 · zhougaofeng · 28e8f8b8 · b9336031
Commit b9336031 authored Dec 31, 2024 by zhougaofeng
Show whitespace changes
Inline Side-by-side

Showing with 4 additions and 4 deletions

magic_pdf/pdf_parse_union_core_v2.py magic_pdf/pdf_parse_union_core_v2.py +4 -4

No files found.
--- a/magic_pdf/pdf_parse_union_core_v2.py
+++ b/magic_pdf/pdf_parse_union_core_v2.py
@@ -381,7 +381,7 @@ def revert_group_blocks(blocks):
    return new_blocks
-def parse_page_core(ocr_status,config_path,local_image_dir,
+def parse_page_core(config_path,local_image_dir,
    page_doc: PageableData, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
 ):
    need_drop = False
@@ -428,7 +428,7 @@ def parse_page_core(ocr_status,config_path,local_image_dir,
    """删除重叠spans中较小的那些"""
    spans, dropped_spans_by_span_overlap = remove_overlaps_min_spans(spans)
    """对image和table截图"""
-    spans = ocr_cut_image_and_table(ocr_status,config_path,local_image_dir,
+    spans = ocr_cut_image_and_table(config_path,local_image_dir,
        spans, page_doc, page_id, pdf_bytes_md5, imageWriter
    )
@@ -524,7 +524,7 @@ def parse_page_core(ocr_status,config_path,local_image_dir,
    return page_info
-def pdf_parse_union(ocr_status,config_path,local_image_dir,
+def pdf_parse_union(config_path,local_image_dir,
    dataset: Dataset,
    model_list,
    imageWriter,
@@ -568,7 +568,7 @@ def pdf_parse_union(ocr_status,config_path,local_image_dir,
        """解析pdf中的每一页"""
        if start_page_id <= page_id <= end_page_id:
            page_info = parse_page_core(
-                ocr_status,config_path,local_image_dir,page, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
+                config_path,local_image_dir,page, magic_model, page_id, pdf_bytes_md5, imageWriter, parse_mode
            )
        else:
            page_info = page.get_page_info()