pdf_parse_by_ocr.py 641 Bytes
Newer Older
1
from magic_pdf.pdf_parse_union_core_v2 import pdf_parse_union
赵小蒙's avatar
赵小蒙 committed
2

赵小蒙's avatar
赵小蒙 committed
3

赵小蒙's avatar
赵小蒙 committed
4
5
6
7
8
9
10
def parse_pdf_by_ocr(pdf_bytes,
                     model_list,
                     imageWriter,
                     start_page_id=0,
                     end_page_id=None,
                     debug_mode=False,
                     ):
赵小蒙's avatar
赵小蒙 committed
11
12
13
14
15
16
17
18
    return pdf_parse_union(pdf_bytes,
                           model_list,
                           imageWriter,
                           "ocr",
                           start_page_id=start_page_id,
                           end_page_id=end_page_id,
                           debug_mode=debug_mode,
                           )