Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
ca8788fb
Commit
ca8788fb
authored
Jun 25, 2024
by
赵小蒙
Browse files
update cli
parent
6e8e81c9
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
24 additions
and
15 deletions
+24
-15
magic_pdf/cli/magicpdf.py
magic_pdf/cli/magicpdf.py
+24
-15
No files found.
magic_pdf/cli/magicpdf.py
View file @
ca8788fb
...
...
@@ -48,6 +48,7 @@ import csv
import
copy
parse_pdf_methods
=
click
.
Choice
([
"ocr"
,
"txt"
,
"auto"
])
use_inside_model
=
False
def
prepare_env
(
pdf_file_name
,
method
):
...
...
@@ -96,14 +97,18 @@ def do_parse(
elif
parse_method
==
"ocr"
:
pipe
=
OCRPipe
(
pdf_bytes
,
model_list
,
image_writer
,
is_debug
=
True
)
else
:
print
(
"unknown parse method"
)
logger
.
error
(
"unknown parse method"
)
sys
.
exit
(
1
)
pipe
.
pipe_classify
()
"""如果没有传入有效的模型数据,则使用内置
paddle
解析"""
"""如果没有传入有效的模型数据,则使用内置
model
解析"""
if
len
(
model_list
)
==
0
:
if
use_inside_model
:
pipe
.
pipe_analyze
()
else
:
logger
.
error
(
"need model list input"
)
sys
.
exit
(
1
)
pipe
.
pipe_parse
()
pdf_info
=
pipe
.
pdf_mid_data
[
"pdf_info"
]
...
...
@@ -267,7 +272,11 @@ def local_json_command(local_json, method):
help
=
"指定解析方法。txt: 文本型 pdf 解析方法, ocr: 光学识别解析 pdf, auto: 程序智能选择解析方法"
,
default
=
"auto"
,
)
def
pdf_command
(
pdf
,
model
,
method
):
@
click
.
option
(
"--inside_model"
,
type
=
click
.
BOOL
,
default
=
False
,
help
=
"使用内置模型测试"
)
def
pdf_command
(
pdf
,
model
,
method
,
inside_model
):
global
use_inside_model
use_inside_model
=
inside_model
def
read_fn
(
path
):
disk_rw
=
DiskReaderWriter
(
os
.
path
.
dirname
(
path
))
return
disk_rw
.
read
(
os
.
path
.
basename
(
path
),
AbsReaderWriter
.
MODE_BIN
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment