Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
3e8d8a3a
"vscode:/vscode.git/clone" did not exist on "a5db5f6682de0b659ee5300f5f047278f5ec3cfa"
Unverified
Commit
3e8d8a3a
authored
Jan 07, 2025
by
Xiaomeng Zhao
Committed by
GitHub
Jan 07, 2025
Browse files
Update demo.py
parent
1afff715
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
2 deletions
+14
-2
demo/demo.py
demo/demo.py
+14
-2
No files found.
demo/demo.py
View file @
3e8d8a3a
...
@@ -5,6 +5,7 @@ from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedData
...
@@ -5,6 +5,7 @@ from magic_pdf.data.data_reader_writer import FileBasedDataWriter, FileBasedData
from
magic_pdf.data.dataset
import
PymuDocDataset
from
magic_pdf.data.dataset
import
PymuDocDataset
from
magic_pdf.model.doc_analyze_by_custom_model
import
doc_analyze
from
magic_pdf.model.doc_analyze_by_custom_model
import
doc_analyze
from
magic_pdf.config.enums
import
SupportedPdfParseMethod
from
magic_pdf.config.enums
import
SupportedPdfParseMethod
from
magic_pdf.config.make_content_config
import
DropMode
,
MakeMode
# args
# args
pdf_file_name
=
"demo1.pdf"
# replace with the real pdf path
pdf_file_name
=
"demo1.pdf"
# replace with the real pdf path
...
@@ -19,7 +20,6 @@ os.makedirs(local_image_dir, exist_ok=True)
...
@@ -19,7 +20,6 @@ os.makedirs(local_image_dir, exist_ok=True)
image_writer
,
md_writer
=
FileBasedDataWriter
(
local_image_dir
),
FileBasedDataWriter
(
image_writer
,
md_writer
=
FileBasedDataWriter
(
local_image_dir
),
FileBasedDataWriter
(
local_md_dir
local_md_dir
)
)
image_dir
=
str
(
os
.
path
.
basename
(
local_image_dir
))
# read bytes
# read bytes
reader1
=
FileBasedDataReader
(
""
)
reader1
=
FileBasedDataReader
(
""
)
...
@@ -45,6 +45,9 @@ else:
...
@@ -45,6 +45,9 @@ else:
### draw model result on each page
### draw model result on each page
infer_result
.
draw_model
(
os
.
path
.
join
(
local_md_dir
,
f
"
{
name_without_suff
}
_model.pdf"
))
infer_result
.
draw_model
(
os
.
path
.
join
(
local_md_dir
,
f
"
{
name_without_suff
}
_model.pdf"
))
### get model inference result
model_inference_result
=
infer_result
.
get_infer_res
()
### draw layout result on each page
### draw layout result on each page
pipe_result
.
draw_layout
(
os
.
path
.
join
(
local_md_dir
,
f
"
{
name_without_suff
}
_layout.pdf"
))
pipe_result
.
draw_layout
(
os
.
path
.
join
(
local_md_dir
,
f
"
{
name_without_suff
}
_layout.pdf"
))
...
@@ -56,3 +59,12 @@ pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
...
@@ -56,3 +59,12 @@ pipe_result.dump_md(md_writer, f"{name_without_suff}.md", image_dir)
### dump content list
### dump content list
pipe_result
.
dump_content_list
(
md_writer
,
f
"
{
name_without_suff
}
_content_list.json"
,
image_dir
)
pipe_result
.
dump_content_list
(
md_writer
,
f
"
{
name_without_suff
}
_content_list.json"
,
image_dir
)
### get markdown content
md_content
=
pipe_result
.
get_markdown
(
image_dir
,
drop_mode
=
DropMode
.
WHOLE_PDF
,
md_make_mode
=
MakeMode
.
MM_MD
)
### get content list content
content_list_content
=
pipe_result
.
get_content_list
(
image_dir
,
drop_mode
=
DropMode
.
NONE
,
md_make_mode
=
MakeMode
.
STANDARD_FORMAT
)
### get middle json
middle_json_content
=
pipe_result
.
get_middle_json
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment