Commit e8535410 authored by myhloli's avatar myhloli
Browse files

feat: update Gradio app to improve Markdown conversion and enhance HTML header

parent 34e5d2ff
...@@ -90,7 +90,7 @@ def replace_image_with_base64(markdown_text, image_dir_path): ...@@ -90,7 +90,7 @@ def replace_image_with_base64(markdown_text, image_dir_path):
return re.sub(pattern, replace, markdown_text) return re.sub(pattern, replace, markdown_text)
def to_markdown(file_path, end_pages, is_ocr, formula_enable, table_enable, language, backend, url): def to_markdown(file_path, end_pages=10, is_ocr=False, formula_enable=True, table_enable=True, language="ch", backend="pipeline", url=None):
file_path = to_pdf(file_path) file_path = to_pdf(file_path)
# 获取识别的md文件以及压缩包文件路径 # 获取识别的md文件以及压缩包文件路径
local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr, formula_enable, table_enable, language, backend, url) local_md_dir, file_name = parse_pdf(file_path, './output', end_pages - 1, is_ocr, formula_enable, table_enable, language, backend, url)
...@@ -172,7 +172,7 @@ def to_pdf(file_path): ...@@ -172,7 +172,7 @@ def to_pdf(file_path):
return tmp_file_path return tmp_file_path
if __name__ == '__main__': def main():
example_enable = False example_enable = False
with gr.Blocks() as demo: with gr.Blocks() as demo:
...@@ -248,3 +248,7 @@ if __name__ == '__main__': ...@@ -248,3 +248,7 @@ if __name__ == '__main__':
clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr]) clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr])
demo.launch(server_name='localhost') demo.launch(server_name='localhost')
if __name__ == '__main__':
main()
\ No newline at end of file
...@@ -54,7 +54,7 @@ ...@@ -54,7 +54,7 @@
font-family: 'Trebuchet MS', 'Lucida Sans Unicode', font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
'Lucida Grande', 'Lucida Sans', Arial, sans-serif; 'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
"> ">
MinerU: PDF Extraction Demo MinerU 2: PDF Extraction Demo
</h1> </h1>
</div> </div>
</div> </div>
...@@ -66,8 +66,7 @@ ...@@ -66,8 +66,7 @@
color: #fafafa; color: #fafafa;
opacity: 0.8; opacity: 0.8;
"> ">
A one-stop, open-source, high-quality data extraction tool, supports A one-stop, open-source, high-quality data extraction tool that supports converting PDF to Markdown and JSON.<br>
PDF/webpage/e-book extraction.<br>
</p> </p>
<style> <style>
.link-block { .link-block {
......
...@@ -109,6 +109,7 @@ mineru = "mineru.cli:client.main" ...@@ -109,6 +109,7 @@ mineru = "mineru.cli:client.main"
mineru-sglang-server = "mineru.cli.vlm_sglang_server:main" mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
mineru-models-download = "mineru.cli.models_download:download_models" mineru-models-download = "mineru.cli.models_download:download_models"
mineru-api = "mineru.cli.fast_api:main" mineru-api = "mineru.cli.fast_api:main"
mineru-gradio = "mineru.cli.gradio_app:main"
[tool.setuptools.dynamic] [tool.setuptools.dynamic]
version = {attr = "mineru.version.__version__"} version = {attr = "mineru.version.__version__"}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment