Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
b71993a9
Unverified
Commit
b71993a9
authored
Dec 18, 2024
by
Xiaomeng Zhao
Committed by
GitHub
Dec 18, 2024
Browse files
Merge pull request #1323 from myhloli/dev
feat(gradio-app): improve PDF conversion and UI functionalities
parents
e1d69928
bf2ff5a2
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
8 deletions
+8
-8
magic_pdf/tools/cli.py
magic_pdf/tools/cli.py
+1
-1
projects/gradio_app/app.py
projects/gradio_app/app.py
+7
-7
No files found.
magic_pdf/tools/cli.py
View file @
b71993a9
...
@@ -14,7 +14,7 @@ from magic_pdf.utils.office_to_pdf import convert_file_to_pdf
...
@@ -14,7 +14,7 @@ from magic_pdf.utils.office_to_pdf import convert_file_to_pdf
pdf_suffixes
=
[
'.pdf'
]
pdf_suffixes
=
[
'.pdf'
]
ms_office_suffixes
=
[
'.ppt'
,
'.pptx'
,
'.doc'
,
'.docx'
]
ms_office_suffixes
=
[
'.ppt'
,
'.pptx'
,
'.doc'
,
'.docx'
]
image_suffixes
=
[
'.png'
,
'.jpg'
]
image_suffixes
=
[
'.png'
,
'.jpeg'
,
'.jpg'
]
@
click
.
command
()
@
click
.
command
()
...
...
projects/gradio_app/app.py
View file @
b71993a9
...
@@ -97,6 +97,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
...
@@ -97,6 +97,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
def
to_markdown
(
file_path
,
end_pages
,
is_ocr
,
layout_mode
,
formula_enable
,
table_enable
,
language
):
def
to_markdown
(
file_path
,
end_pages
,
is_ocr
,
layout_mode
,
formula_enable
,
table_enable
,
language
):
file_path
=
to_pdf
(
file_path
)
# 获取识别的md文件以及压缩包文件路径
# 获取识别的md文件以及压缩包文件路径
local_md_dir
,
file_name
=
parse_pdf
(
file_path
,
'./output'
,
end_pages
-
1
,
is_ocr
,
local_md_dir
,
file_name
=
parse_pdf
(
file_path
,
'./output'
,
end_pages
-
1
,
is_ocr
,
layout_mode
,
formula_enable
,
table_enable
,
language
)
layout_mode
,
formula_enable
,
table_enable
,
language
)
...
@@ -182,14 +183,13 @@ def to_pdf(file_path):
...
@@ -182,14 +183,13 @@ def to_pdf(file_path):
return
tmp_file_path
return
tmp_file_path
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
with
gr
.
Blocks
()
as
demo
:
with
gr
.
Blocks
()
as
demo
:
gr
.
HTML
(
header
)
gr
.
HTML
(
header
)
with
gr
.
Row
():
with
gr
.
Row
():
with
gr
.
Column
(
variant
=
'panel'
,
scale
=
5
):
with
gr
.
Column
(
variant
=
'panel'
,
scale
=
5
):
file
=
gr
.
File
(
label
=
'Please upload a PDF or image'
,
file_types
=
[
'.pdf'
,
'.png'
,
'.jpeg'
,
'.jpg'
])
file
=
gr
.
File
(
label
=
'Please upload a PDF or image'
,
file_types
=
[
'.pdf'
,
'.png'
,
'.jpeg'
,
'.jpg'
])
max_pages
=
gr
.
Slider
(
1
,
1
0
,
5
,
step
=
1
,
label
=
'Max convert pages'
)
max_pages
=
gr
.
Slider
(
1
,
2
0
,
10
,
step
=
1
,
label
=
'Max convert pages'
)
with
gr
.
Row
():
with
gr
.
Row
():
layout_mode
=
gr
.
Dropdown
([
'layoutlmv3'
,
'doclayout_yolo'
],
label
=
'Layout model'
,
value
=
'layoutlmv3'
)
layout_mode
=
gr
.
Dropdown
([
'layoutlmv3'
,
'doclayout_yolo'
],
label
=
'Layout model'
,
value
=
'layoutlmv3'
)
language
=
gr
.
Dropdown
(
all_lang
,
label
=
'Language'
,
value
=
''
)
language
=
gr
.
Dropdown
(
all_lang
,
label
=
'Language'
,
value
=
''
)
...
@@ -200,25 +200,25 @@ if __name__ == '__main__':
...
@@ -200,25 +200,25 @@ if __name__ == '__main__':
with
gr
.
Row
():
with
gr
.
Row
():
change_bu
=
gr
.
Button
(
'Convert'
)
change_bu
=
gr
.
Button
(
'Convert'
)
clear_bu
=
gr
.
ClearButton
(
value
=
'Clear'
)
clear_bu
=
gr
.
ClearButton
(
value
=
'Clear'
)
pdf_show
=
PDF
(
label
=
'PDF preview'
,
interactive
=
True
,
height
=
800
)
pdf_show
=
PDF
(
label
=
'PDF preview'
,
interactive
=
False
,
visible
=
True
,
height
=
800
)
with
gr
.
Accordion
(
'Examples:'
):
with
gr
.
Accordion
(
'Examples:'
):
example_root
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'examples'
)
example_root
=
os
.
path
.
join
(
os
.
path
.
dirname
(
__file__
),
'examples'
)
gr
.
Examples
(
gr
.
Examples
(
examples
=
[
os
.
path
.
join
(
example_root
,
_
)
for
_
in
os
.
listdir
(
example_root
)
if
examples
=
[
os
.
path
.
join
(
example_root
,
_
)
for
_
in
os
.
listdir
(
example_root
)
if
_
.
endswith
(
'pdf'
)],
_
.
endswith
(
'pdf'
)],
inputs
=
pdf_show
inputs
=
file
)
)
with
gr
.
Column
(
variant
=
'panel'
,
scale
=
5
):
with
gr
.
Column
(
variant
=
'panel'
,
scale
=
5
):
output_file
=
gr
.
File
(
label
=
'convert result'
,
interactive
=
False
)
output_file
=
gr
.
File
(
label
=
'convert result'
,
interactive
=
False
)
with
gr
.
Tabs
():
with
gr
.
Tabs
():
with
gr
.
Tab
(
'Markdown rendering'
):
with
gr
.
Tab
(
'Markdown rendering'
):
md
=
gr
.
Markdown
(
label
=
'Markdown rendering'
,
height
=
9
00
,
show_copy_button
=
True
,
md
=
gr
.
Markdown
(
label
=
'Markdown rendering'
,
height
=
11
00
,
show_copy_button
=
True
,
latex_delimiters
=
latex_delimiters
,
line_breaks
=
True
)
latex_delimiters
=
latex_delimiters
,
line_breaks
=
True
)
with
gr
.
Tab
(
'Markdown text'
):
with
gr
.
Tab
(
'Markdown text'
):
md_text
=
gr
.
TextArea
(
lines
=
45
,
show_copy_button
=
True
)
md_text
=
gr
.
TextArea
(
lines
=
45
,
show_copy_button
=
True
)
file
.
upload
(
fn
=
to_pdf
,
inputs
=
file
,
outputs
=
pdf_show
)
file
.
change
(
fn
=
to_pdf
,
inputs
=
file
,
outputs
=
pdf_show
)
change_bu
.
click
(
fn
=
to_markdown
,
inputs
=
[
pdf_show
,
max_pages
,
is_ocr
,
layout_mode
,
formula_enable
,
table_enable
,
language
],
change_bu
.
click
(
fn
=
to_markdown
,
inputs
=
[
file
,
max_pages
,
is_ocr
,
layout_mode
,
formula_enable
,
table_enable
,
language
],
outputs
=
[
md
,
md_text
,
output_file
,
pdf_show
])
outputs
=
[
md
,
md_text
,
output_file
,
pdf_show
])
clear_bu
.
add
([
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
,
table_enable
,
language
])
clear_bu
.
add
([
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
,
table_enable
,
language
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment