Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
c29327fd
Commit
c29327fd
authored
Jun 30, 2025
by
myhloli
Browse files
feat: add support for additional image formats in file upload
parent
f41fc406
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
10 additions
and
9 deletions
+10
-9
mineru/cli/common.py
mineru/cli/common.py
+1
-1
mineru/cli/gradio_app.py
mineru/cli/gradio_app.py
+9
-8
No files found.
mineru/cli/common.py
View file @
c29327fd
...
...
@@ -17,7 +17,7 @@ from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
from
mineru.backend.vlm.vlm_analyze
import
aio_doc_analyze
as
aio_vlm_doc_analyze
pdf_suffixes
=
[
".pdf"
]
image_suffixes
=
[
".png"
,
".jpeg"
,
".jpg"
]
image_suffixes
=
[
".png"
,
".jpeg"
,
".jpg"
,
".webp"
,
".gif"
]
def
read_fn
(
path
):
...
...
mineru/cli/gradio_app.py
View file @
c29327fd
...
...
@@ -11,7 +11,7 @@ import gradio as gr
from
gradio_pdf
import
PDF
from
loguru
import
logger
from
mineru.cli.common
import
prepare_env
,
read_fn
,
aio_do_parse
from
mineru.cli.common
import
prepare_env
,
read_fn
,
aio_do_parse
,
pdf_suffixes
,
image_suffixes
from
mineru.utils.hash_utils
import
str_sha256
...
...
@@ -121,8 +121,8 @@ latex_delimiters = [
]
header_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
__file__
)),
'resources'
,
'header.html'
)
with
open
(
header_path
,
'r'
)
as
file
:
header
=
file
.
read
()
with
open
(
header_path
,
'r'
)
as
header_
file
:
header
=
header_
file
.
read
()
latin_lang
=
[
...
...
@@ -199,7 +199,8 @@ def main():
with
gr
.
Row
():
with
gr
.
Column
(
variant
=
'panel'
,
scale
=
5
):
with
gr
.
Row
():
file
=
gr
.
File
(
label
=
'Please upload a PDF or image'
,
file_types
=
[
'.pdf'
,
'.png'
,
'.jpeg'
,
'.jpg'
])
suffixes
=
pdf_suffixes
+
image_suffixes
input_file
=
gr
.
File
(
label
=
'Please upload a PDF or image'
,
file_types
=
suffixes
)
with
gr
.
Row
():
max_pages
=
gr
.
Slider
(
1
,
20
,
10
,
step
=
1
,
label
=
'Max convert pages'
)
with
gr
.
Row
():
...
...
@@ -223,7 +224,7 @@ def main():
gr
.
Examples
(
examples
=
[
os
.
path
.
join
(
example_root
,
_
)
for
_
in
os
.
listdir
(
example_root
)
if
_
.
endswith
(
'pdf'
)],
inputs
=
file
inputs
=
input_
file
)
with
gr
.
Column
(
variant
=
'panel'
,
scale
=
5
):
...
...
@@ -256,10 +257,10 @@ def main():
outputs
=
[
client_options
,
ocr_options
,
pipeline_options
]
)
file
.
change
(
fn
=
to_pdf
,
inputs
=
file
,
outputs
=
pdf_show
)
change_bu
.
click
(
fn
=
to_markdown
,
inputs
=
[
file
,
max_pages
,
is_ocr
,
formula_enable
,
table_enable
,
language
,
backend
,
url
],
input_
file
.
change
(
fn
=
to_pdf
,
inputs
=
input_
file
,
outputs
=
pdf_show
)
change_bu
.
click
(
fn
=
to_markdown
,
inputs
=
[
input_
file
,
max_pages
,
is_ocr
,
formula_enable
,
table_enable
,
language
,
backend
,
url
],
outputs
=
[
md
,
md_text
,
output_file
,
pdf_show
])
clear_bu
.
add
([
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
])
clear_bu
.
add
([
input_
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
])
demo
.
launch
(
server_name
=
'localhost'
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment