Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wangsen
MinerU
Commits
e8535410
Commit
e8535410
authored
Jun 26, 2025
by
myhloli
Browse files
feat: update Gradio app to improve Markdown conversion and enhance HTML header
parent
34e5d2ff
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
9 additions
and
5 deletions
+9
-5
mineru/cli/gradio_app.py
mineru/cli/gradio_app.py
+6
-2
mineru/resources/header.html
mineru/resources/header.html
+2
-3
pyproject.toml
pyproject.toml
+1
-0
No files found.
mineru/cli/gradio_app.py
View file @
e8535410
...
...
@@ -90,7 +90,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
return
re
.
sub
(
pattern
,
replace
,
markdown_text
)
def
to_markdown
(
file_path
,
end_pages
,
is_ocr
,
formula_enable
,
table_enable
,
language
,
backend
,
url
):
def
to_markdown
(
file_path
,
end_pages
=
10
,
is_ocr
=
False
,
formula_enable
=
True
,
table_enable
=
True
,
language
=
"ch"
,
backend
=
"pipeline"
,
url
=
None
):
file_path
=
to_pdf
(
file_path
)
# 获取识别的md文件以及压缩包文件路径
local_md_dir
,
file_name
=
parse_pdf
(
file_path
,
'./output'
,
end_pages
-
1
,
is_ocr
,
formula_enable
,
table_enable
,
language
,
backend
,
url
)
...
...
@@ -172,7 +172,7 @@ def to_pdf(file_path):
return
tmp_file_path
if
__name__
==
'__
main
__'
:
def
main
()
:
example_enable
=
False
with
gr
.
Blocks
()
as
demo
:
...
...
@@ -248,3 +248,7 @@ if __name__ == '__main__':
clear_bu
.
add
([
file
,
md
,
pdf_show
,
md_text
,
output_file
,
is_ocr
])
demo
.
launch
(
server_name
=
'localhost'
)
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
mineru/resources/header.html
View file @
e8535410
...
...
@@ -54,7 +54,7 @@
font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
"
>
MinerU: PDF Extraction Demo
MinerU
2
: PDF Extraction Demo
</h1>
</div>
</div>
...
...
@@ -66,8 +66,7 @@
color: #fafafa;
opacity: 0.8;
"
>
A one-stop, open-source, high-quality data extraction tool, supports
PDF/webpage/e-book extraction.
<br>
A one-stop, open-source, high-quality data extraction tool that supports converting PDF to Markdown and JSON.
<br>
</p>
<style>
.link-block
{
...
...
pyproject.toml
View file @
e8535410
...
...
@@ -109,6 +109,7 @@ mineru = "mineru.cli:client.main"
mineru-sglang-server
=
"mineru.cli.vlm_sglang_server:main"
mineru-models-download
=
"mineru.cli.models_download:download_models"
mineru-api
=
"mineru.cli.fast_api:main"
mineru-gradio
=
"mineru.cli.gradio_app:main"
[tool.setuptools.dynamic]
version
=
{
attr
=
"mineru.version.__version__"
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment