"vscode:/vscode.git/clone" did not exist on "a5db5f6682de0b659ee5300f5f047278f5ec3cfa"
Unverified Commit 3e8d8a3a authored by Xiaomeng Zhao's avatar Xiaomeng Zhao Committed by GitHub
Browse files

Update demo.py

parent 1afff715
...@@ -5,6 +5,7 @@ from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedData ...@@ -5,6 +5,7 @@ from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedData
from magic_pdf.data.dataset import PymuDocDataset from magic_pdf.data.dataset import PymuDocDataset
from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze from magic_pdf.model.doc_analyze_by_custom_model import doc_analyze
from magic_pdf.config.enums import SupportedPdfParseMethod from magic_pdf.config.enums import SupportedPdfParseMethod
from magic_pdf.config.make_content_config import DropMode, MakeMode
# args # args
pdf_file_name = "demo1.pdf" # replace with the real pdf path pdf_file_name = "demo1.pdf" # replace with the real pdf path
...@@ -19,7 +20,6 @@ os.makedirs(local_image_dir, exist_ok=True) ...@@ -19,7 +20,6 @@ os.makedirs(local_image_dir, exist_ok=True)
image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter( image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(
local_md_dir local_md_dir
) )
image_dir = str(os.path.basename(local_image_dir))
# read bytes # read bytes
reader1 = FileBasedDataReader("") reader1 = FileBasedDataReader("")
...@@ -45,6 +45,9 @@ else: ...@@ -45,6 +45,9 @@ else:
### draw model result on each page ### draw model result on each page
infer_result.draw_model(os.path.join(local_md_dir, f"{name_without_suff}_model.pdf")) infer_result.draw_model(os.path.join(local_md_dir, f"{name_without_suff}_model.pdf"))
### get model inference result
model_inference_result = infer_result.get_infer_res()
### draw layout result on each page ### draw layout result on each page
pipe_result.draw_layout(os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf")) pipe_result.draw_layout(os.path.join(local_md_dir, f"{name_without_suff}_layout.pdf"))
...@@ -56,3 +59,12 @@ pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir) ...@@ -56,3 +59,12 @@ pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
### dump content list ### dump content list
pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir) pipe_result.dump_content_list(md_writer, f"{name_without_suff}_content_list.json", image_dir)
### get markdown content
md_content = pipe_result.get_markdown(image_dir, drop_mode=DropMode.WHOLE_PDF, md_make_mode=MakeMode.MM_MD)
### get content list content
content_list_content = pipe_result.get_content_list(image_dir, drop_mode=DropMode.NONE, md_make_mode=MakeMode.STANDARD_FORMAT)
### get middle json
middle_json_content = pipe_result.get_middle_json()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment