demo.py 926 Bytes
Newer Older
赵小蒙's avatar
赵小蒙 committed
1
2
import os

3
from loguru import logger
赵小蒙's avatar
赵小蒙 committed
4

5
6
from magic_pdf.data.data_reader_writer import FileBasedDataWriter
from magic_pdf.pipe.UNIPipe import UNIPipe
Xiaomeng Zhao's avatar
Xiaomeng Zhao committed
7

8
9
try:
    current_script_dir = os.path.dirname(os.path.abspath(__file__))
10
11
12
13
    demo_name = 'demo1'
    pdf_path = os.path.join(current_script_dir, f'{demo_name}.pdf')
    pdf_bytes = open(pdf_path, 'rb').read()
    jso_useful_key = {'_pdf_type': '', 'model_list': []}
14
15
    local_image_dir = os.path.join(current_script_dir, 'images')
    image_dir = str(os.path.basename(local_image_dir))
16
    image_writer = FileBasedDataWriter(local_image_dir)
17
18
    pipe = UNIPipe(pdf_bytes, jso_useful_key, image_writer)
    pipe.pipe_classify()
19
    pipe.pipe_analyze()
20
    pipe.pipe_parse()
21
22
    md_content = pipe.pipe_mk_markdown(image_dir, drop_mode='none')
    with open(f'{demo_name}.md', 'w', encoding='utf-8') as f:
23
24
        f.write(md_content)
except Exception as e:
25
    logger.exception(e)