Merge pull request #2834 from myhloli/dev

Dev

Merge pull request #2834 from myhloli/dev
Dev
d39aa87e · Xiaomeng Zhao · GitHub · 3043f55e · 359110e3 · d39aa87e
Unverified Commit d39aa87e authored Jun 30, 2025 by Xiaomeng Zhao Committed by GitHub Jun 30, 2025
17 changed files
--- a/docker/china/Dockerfile
+++ b/docker/china/Dockerfile
 # Use the official sglang image
-FROM lmsysorg/sglang:v0.4.7-cu124
+FROM lmsysorg/sglang:v0.4.8-cu124

 # install mineru latest
 RUN python3 -m pip install -U 'mineru[core]' -i https://mirrors.aliyun.com/pypi/simple --break-system-packages

--- a/docker/global/Dockerfile
+++ b/docker/global/Dockerfile
 # Use the official sglang image
-FROM lmsysorg/sglang:v0.4.7-cu124
+FROM lmsysorg/sglang:v0.4.8-cu124

 # install mineru latest
 RUN python3 -m pip install -U 'mineru[core]' --break-system-packages

--- a/mineru/backend/pipeline/pipeline_analyze.py
+++ b/mineru/backend/pipeline/pipeline_analyze.py
@@ -75,9 +75,9 @@ def doc_analyze(
 ):
    """
    适当调大MIN_BATCH_INFERENCE_SIZE可以提高性能，可能会增加显存使用量，
-    可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置，默认值为100。
+    可通过环境变量MINERU_MIN_BATCH_INFERENCE_SIZE设置，默认值为128。
    """
-    min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 100))
+    min_batch_inference_size = int(os.environ.get('MINERU_MIN_BATCH_INFERENCE_SIZE', 128))

    # 收集所有页面信息
    all_pages_info = []  # 存储(dataset_index, page_index, img, ocr, lang, width, height)

--- a/mineru/backend/vlm/vlm_analyze.py
+++ b/mineru/backend/vlm/vlm_analyze.py
@@ -25,6 +25,7 @@ class ModelSingleton:
        backend: str,
        model_path: str | None,
        server_url: str | None,
+        **kwargs,
    ) -> BasePredictor:
        key = (backend, model_path, server_url)
        if key not in self._models:
@@ -34,6 +35,7 @@ class ModelSingleton:
                backend=backend,
                model_path=model_path,
                server_url=server_url,
+                **kwargs,
            )
        return self._models[key]

@@ -75,15 +77,15 @@ async def aio_doc_analyze(
    if predictor is None:
        predictor = ModelSingleton().get_model(backend, model_path, server_url)

-    load_images_start = time.time()
+    # load_images_start = time.time()
    images_list, pdf_doc = load_images_from_pdf(pdf_bytes)
    images_base64_list = [image_dict["img_base64"] for image_dict in images_list]
-    load_images_time = round(time.time() - load_images_start, 2)
-    logger.info(f"load images cost: {load_images_time}, speed: {round(len(images_base64_list)/load_images_time, 3)} images/s")
+    # load_images_time = round(time.time() - load_images_start, 2)
+    # logger.info(f"load images cost: {load_images_time}, speed: {round(len(images_base64_list)/load_images_time, 3)} images/s")

-    infer_start = time.time()
+    # infer_start = time.time()
    results = await predictor.aio_batch_predict(images=images_base64_list)
-    infer_time = round(time.time() - infer_start, 2)
-    logger.info(f"infer finished, cost: {infer_time}, speed: {round(len(results)/infer_time, 3)} page/s")
+    # infer_time = round(time.time() - infer_start, 2)
+    # logger.info(f"infer finished, cost: {infer_time}, speed: {round(len(results)/infer_time, 3)} page/s")
    middle_json = result_to_middle_json(results, images_list, pdf_doc, image_writer)
-    return middle_json
+    return middle_json, results
--- a/mineru/cli/client.py
+++ b/mineru/cli/client.py
@@ -60,7 +60,8 @@ from .common import do_parse, read_fn, pdf_suffixes, image_suffixes
    '-l',
    '--lang',
    'lang',
-    type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']),
+    type=click.Choice(['ch', 'ch_server', 'ch_lite', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka',
+                       'latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']),
    help="""
    Input the languages in the pdf (if known) to improve OCR accuracy.  Optional.
    Without languages specified, 'ch' will be used by default.

--- a/mineru/cli/common.py
+++ b/mineru/cli/common.py
@@ -14,6 +14,7 @@ from mineru.utils.enum_class import MakeMode
 from mineru.utils.pdf_image_tools import images_bytes_to_pdf_bytes
 from mineru.backend.vlm.vlm_middle_json_mkcontent import union_make as vlm_union_make
 from mineru.backend.vlm.vlm_analyze import doc_analyze as vlm_doc_analyze
+from mineru.backend.vlm.vlm_analyze import aio_doc_analyze as aio_vlm_doc_analyze

 pdf_suffixes = [".pdf"]
 image_suffixes = [".png", ".jpeg", ".jpg"]
@@ -73,155 +74,308 @@ def convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id=0, end_page
    return output_bytes


-def do_parse(
-    output_dir,
-    pdf_file_names: list[str],
-    pdf_bytes_list: list[bytes],
-    p_lang_list: list[str],
-    backend="pipeline",
-    parse_method="auto",
-    p_formula_enable=True,
-    p_table_enable=True,
-    server_url=None,
-    f_draw_layout_bbox=True,
-    f_draw_span_bbox=True,
-    f_dump_md=True,
-    f_dump_middle_json=True,
-    f_dump_model_output=True,
-    f_dump_orig_pdf=True,
-    f_dump_content_list=True,
-    f_make_md_mode=MakeMode.MM_MD,
-    start_page_id=0,
-    end_page_id=None,
+def _prepare_pdf_bytes(pdf_bytes_list, start_page_id, end_page_id):
+    """准备处理PDF字节数据"""
+    result = []
+    for pdf_bytes in pdf_bytes_list:
+        new_pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id, end_page_id)
+        result.append(new_pdf_bytes)
+    return result
+
+
+def _process_output(
+        pdf_info,
+        pdf_bytes,
+        pdf_file_name,
+        local_md_dir,
+        local_image_dir,
+        md_writer,
+        f_draw_layout_bbox,
+        f_draw_span_bbox,
+        f_dump_orig_pdf,
+        f_dump_md,
+        f_dump_content_list,
+        f_dump_middle_json,
+        f_dump_model_output,
+        f_make_md_mode,
+        middle_json,
+        model_output=None,
+        is_pipeline=True
 ):
+    from mineru.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
+    """处理输出文件"""
+    if f_draw_layout_bbox:
+        draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir, f"{pdf_file_name}_layout.pdf")
+
+    if f_draw_span_bbox:
+        draw_span_bbox(pdf_info, pdf_bytes, local_md_dir, f"{pdf_file_name}_span.pdf")
+
+    if f_dump_orig_pdf:
+        md_writer.write(
+            f"{pdf_file_name}_origin.pdf",
+            pdf_bytes,
+        )
+
+    image_dir = str(os.path.basename(local_image_dir))
+
+    if f_dump_md:
+        make_func = pipeline_union_make if is_pipeline else vlm_union_make
+        md_content_str = make_func(pdf_info, f_make_md_mode, image_dir)
+        md_writer.write_string(
+            f"{pdf_file_name}.md",
+            md_content_str,
+        )
+
+    if f_dump_content_list:
+        make_func = pipeline_union_make if is_pipeline else vlm_union_make
+        content_list = make_func(pdf_info, MakeMode.CONTENT_LIST, image_dir)
+        md_writer.write_string(
+            f"{pdf_file_name}_content_list.json",
+            json.dumps(content_list, ensure_ascii=False, indent=4),
+        )
+
+    if f_dump_middle_json:
+        md_writer.write_string(
+            f"{pdf_file_name}_middle.json",
+            json.dumps(middle_json, ensure_ascii=False, indent=4),
+        )
+
+    if f_dump_model_output:
+        if is_pipeline:
+            md_writer.write_string(
+                f"{pdf_file_name}_model.json",
+                json.dumps(model_output, ensure_ascii=False, indent=4),
+            )
+        else:
+            output_text = ("\n" + "-" * 50 + "\n").join(model_output)
+            md_writer.write_string(
+                f"{pdf_file_name}_model_output.txt",
+                output_text,
+            )
+
+    logger.info(f"local output dir is {local_md_dir}")
+
+
+def _process_pipeline(
+        output_dir,
+        pdf_file_names,
+        pdf_bytes_list,
+        p_lang_list,
+        parse_method,
+        p_formula_enable,
+        p_table_enable,
+        f_draw_layout_bbox,
+        f_draw_span_bbox,
+        f_dump_md,
+        f_dump_middle_json,
+        f_dump_model_output,
+        f_dump_orig_pdf,
+        f_dump_content_list,
+        f_make_md_mode,
+):
+    """处理pipeline后端逻辑"""
+    from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
+    from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
+
+    infer_results, all_image_lists, all_pdf_docs, lang_list, ocr_enabled_list = (
+        pipeline_doc_analyze(
+            pdf_bytes_list, p_lang_list, parse_method=parse_method,
+            formula_enable=p_formula_enable, table_enable=p_table_enable
+        )
+    )
+
+    for idx, model_list in enumerate(infer_results):
+        model_json = copy.deepcopy(model_list)
+        pdf_file_name = pdf_file_names[idx]
+        local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
+        image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
+
+        images_list = all_image_lists[idx]
+        pdf_doc = all_pdf_docs[idx]
+        _lang = lang_list[idx]
+        _ocr_enable = ocr_enabled_list[idx]
+
+        middle_json = pipeline_result_to_middle_json(
+            model_list, images_list, pdf_doc, image_writer,
+            _lang, _ocr_enable, p_formula_enable
+        )
+
+        pdf_info = middle_json["pdf_info"]
+        pdf_bytes = pdf_bytes_list[idx]
+
+        _process_output(
+            pdf_info, pdf_bytes, pdf_file_name, local_md_dir, local_image_dir,
+            md_writer, f_draw_layout_bbox, f_draw_span_bbox, f_dump_orig_pdf,
+            f_dump_md, f_dump_content_list, f_dump_middle_json, f_dump_model_output,
+            f_make_md_mode, middle_json, model_json, is_pipeline=True
+        )
+
+
+async def _async_process_vlm(
+        output_dir,
+        pdf_file_names,
+        pdf_bytes_list,
+        backend,
+        f_draw_layout_bbox,
+        f_draw_span_bbox,
+        f_dump_md,
+        f_dump_middle_json,
+        f_dump_model_output,
+        f_dump_orig_pdf,
+        f_dump_content_list,
+        f_make_md_mode,
+        server_url=None,
+):
+    """异步处理VLM后端逻辑"""
+    parse_method = "vlm"
+    f_draw_span_bbox = False
+    if not backend.endswith("client"):
+        server_url = None
+
+    for idx, pdf_bytes in enumerate(pdf_bytes_list):
+        pdf_file_name = pdf_file_names[idx]
+        local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
+        image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
+
+        middle_json, infer_result = await aio_vlm_doc_analyze(
+            pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url
+        )
+
+        pdf_info = middle_json["pdf_info"]
+
+        _process_output(
+            pdf_info, pdf_bytes, pdf_file_name, local_md_dir, local_image_dir,
+            md_writer, f_draw_layout_bbox, f_draw_span_bbox, f_dump_orig_pdf,
+            f_dump_md, f_dump_content_list, f_dump_middle_json, f_dump_model_output,
+            f_make_md_mode, middle_json, infer_result, is_pipeline=False
+        )
+
+
+def _process_vlm(
+        output_dir,
+        pdf_file_names,
+        pdf_bytes_list,
+        backend,
+        f_draw_layout_bbox,
+        f_draw_span_bbox,
+        f_dump_md,
+        f_dump_middle_json,
+        f_dump_model_output,
+        f_dump_orig_pdf,
+        f_dump_content_list,
+        f_make_md_mode,
+        server_url=None,
+):
+    """同步处理VLM后端逻辑"""
+    parse_method = "vlm"
+    f_draw_span_bbox = False
+    if not backend.endswith("client"):
+        server_url = None

-    if backend == "pipeline":
-
-        from mineru.backend.pipeline.pipeline_middle_json_mkcontent import union_make as pipeline_union_make
-        from mineru.backend.pipeline.model_json_to_middle_json import result_to_middle_json as pipeline_result_to_middle_json
-        from mineru.backend.pipeline.pipeline_analyze import doc_analyze as pipeline_doc_analyze
-
-        for idx, pdf_bytes in enumerate(pdf_bytes_list):
-            new_pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id, end_page_id)
-            pdf_bytes_list[idx] = new_pdf_bytes
-
-        infer_results, all_image_lists, all_pdf_docs, lang_list, ocr_enabled_list = pipeline_doc_analyze(pdf_bytes_list, p_lang_list, parse_method=parse_method, formula_enable=p_formula_enable,table_enable=p_table_enable)
-
-        for idx, model_list in enumerate(infer_results):
-            model_json = copy.deepcopy(model_list)
-            pdf_file_name = pdf_file_names[idx]
-            local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
-            image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
-
-            images_list = all_image_lists[idx]
-            pdf_doc = all_pdf_docs[idx]
-            _lang = lang_list[idx]
-            _ocr_enable = ocr_enabled_list[idx]
-
-            middle_json = pipeline_result_to_middle_json(model_list, images_list, pdf_doc, image_writer, _lang, _ocr_enable, p_formula_enable)
-
-            pdf_info = middle_json["pdf_info"]
-
-            pdf_bytes = pdf_bytes_list[idx]
-            if f_draw_layout_bbox:
-                draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir, f"{pdf_file_name}_layout.pdf")
-
-            if f_draw_span_bbox:
-                draw_span_bbox(pdf_info, pdf_bytes, local_md_dir, f"{pdf_file_name}_span.pdf")
+    for idx, pdf_bytes in enumerate(pdf_bytes_list):
+        pdf_file_name = pdf_file_names[idx]
+        local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
+        image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)

-            if f_dump_orig_pdf:
-                md_writer.write(
-                    f"{pdf_file_name}_origin.pdf",
-                    pdf_bytes,
-                )
+        middle_json, infer_result = vlm_doc_analyze(
+            pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url
+        )

-            if f_dump_md:
-                image_dir = str(os.path.basename(local_image_dir))
-                md_content_str = pipeline_union_make(pdf_info, f_make_md_mode, image_dir)
-                md_writer.write_string(
-                    f"{pdf_file_name}.md",
-                    md_content_str,
-                )
+        pdf_info = middle_json["pdf_info"]

-            if f_dump_content_list:
-                image_dir = str(os.path.basename(local_image_dir))
-                content_list = pipeline_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
-                md_writer.write_string(
-                    f"{pdf_file_name}_content_list.json",
-                    json.dumps(content_list, ensure_ascii=False, indent=4),
-                )
+        _process_output(
+            pdf_info, pdf_bytes, pdf_file_name, local_md_dir, local_image_dir,
+            md_writer, f_draw_layout_bbox, f_draw_span_bbox, f_dump_orig_pdf,
+            f_dump_md, f_dump_content_list, f_dump_middle_json, f_dump_model_output,
+            f_make_md_mode, middle_json, infer_result, is_pipeline=False
+        )

-            if f_dump_middle_json:
-                md_writer.write_string(
-                    f"{pdf_file_name}_middle.json",
-                    json.dumps(middle_json, ensure_ascii=False, indent=4),
-                )

-            if f_dump_model_output:
-                md_writer.write_string(
-                    f"{pdf_file_name}_model.json",
-                    json.dumps(model_json, ensure_ascii=False, indent=4),
-                )
+def do_parse(
+        output_dir,
+        pdf_file_names: list[str],
+        pdf_bytes_list: list[bytes],
+        p_lang_list: list[str],
+        backend="pipeline",
+        parse_method="auto",
+        p_formula_enable=True,
+        p_table_enable=True,
+        server_url=None,
+        f_draw_layout_bbox=True,
+        f_draw_span_bbox=True,
+        f_dump_md=True,
+        f_dump_middle_json=True,
+        f_dump_model_output=True,
+        f_dump_orig_pdf=True,
+        f_dump_content_list=True,
+        f_make_md_mode=MakeMode.MM_MD,
+        start_page_id=0,
+        end_page_id=None,
+):
+    # 预处理PDF字节数据
+    pdf_bytes_list = _prepare_pdf_bytes(pdf_bytes_list, start_page_id, end_page_id)

-            logger.info(f"local output dir is {local_md_dir}")
+    if backend == "pipeline":
+        _process_pipeline(
+            output_dir, pdf_file_names, pdf_bytes_list, p_lang_list,
+            parse_method, p_formula_enable, p_table_enable,
+            f_draw_layout_bbox, f_draw_span_bbox, f_dump_md, f_dump_middle_json,
+            f_dump_model_output, f_dump_orig_pdf, f_dump_content_list, f_make_md_mode
+        )
    else:
-
        if backend.startswith("vlm-"):
            backend = backend[4:]

-        f_draw_span_bbox = False
-        parse_method = "vlm"
-        for idx, pdf_bytes in enumerate(pdf_bytes_list):
-            pdf_file_name = pdf_file_names[idx]
-            pdf_bytes = convert_pdf_bytes_to_bytes_by_pypdfium2(pdf_bytes, start_page_id, end_page_id)
-            local_image_dir, local_md_dir = prepare_env(output_dir, pdf_file_name, parse_method)
-            image_writer, md_writer = FileBasedDataWriter(local_image_dir), FileBasedDataWriter(local_md_dir)
-            middle_json, infer_result = vlm_doc_analyze(pdf_bytes, image_writer=image_writer, backend=backend, server_url=server_url)
-
-            pdf_info = middle_json["pdf_info"]
-
-            if f_draw_layout_bbox:
-                draw_layout_bbox(pdf_info, pdf_bytes, local_md_dir, f"{pdf_file_name}_layout.pdf")
-
-            if f_draw_span_bbox:
-                draw_span_bbox(pdf_info, pdf_bytes, local_md_dir, f"{pdf_file_name}_span.pdf")
-
-            if f_dump_orig_pdf:
-                md_writer.write(
-                    f"{pdf_file_name}_origin.pdf",
-                    pdf_bytes,
-                )
-
-            if f_dump_md:
-                image_dir = str(os.path.basename(local_image_dir))
-                md_content_str = vlm_union_make(pdf_info, f_make_md_mode, image_dir)
-                md_writer.write_string(
-                    f"{pdf_file_name}.md",
-                    md_content_str,
-                )
-
-            if f_dump_content_list:
-                image_dir = str(os.path.basename(local_image_dir))
-                content_list = vlm_union_make(pdf_info, MakeMode.CONTENT_LIST, image_dir)
-                md_writer.write_string(
-                    f"{pdf_file_name}_content_list.json",
-                    json.dumps(content_list, ensure_ascii=False, indent=4),
-                )
-
-            if f_dump_middle_json:
-                md_writer.write_string(
-                    f"{pdf_file_name}_middle.json",
-                    json.dumps(middle_json, ensure_ascii=False, indent=4),
-                )
+        _process_vlm(
+            output_dir, pdf_file_names, pdf_bytes_list, backend,
+            f_draw_layout_bbox, f_draw_span_bbox, f_dump_md, f_dump_middle_json,
+            f_dump_model_output, f_dump_orig_pdf, f_dump_content_list, f_make_md_mode,
+            server_url
+        )
+
+
+async def aio_do_parse(
+        output_dir,
+        pdf_file_names: list[str],
+        pdf_bytes_list: list[bytes],
+        p_lang_list: list[str],
+        backend="pipeline",
+        parse_method="auto",
+        p_formula_enable=True,
+        p_table_enable=True,
+        server_url=None,
+        f_draw_layout_bbox=True,
+        f_draw_span_bbox=True,
+        f_dump_md=True,
+        f_dump_middle_json=True,
+        f_dump_model_output=True,
+        f_dump_orig_pdf=True,
+        f_dump_content_list=True,
+        f_make_md_mode=MakeMode.MM_MD,
+        start_page_id=0,
+        end_page_id=None,
+):
+    # 预处理PDF字节数据
+    pdf_bytes_list = _prepare_pdf_bytes(pdf_bytes_list, start_page_id, end_page_id)

-            if f_dump_model_output:
-                model_output = ("\n" + "-" * 50 + "\n").join(infer_result)
-                md_writer.write_string(
-                    f"{pdf_file_name}_model_output.txt",
-                    model_output,
-                )
+    if backend == "pipeline":
+        # pipeline模式暂不支持异步，使用同步处理方式
+        _process_pipeline(
+            output_dir, pdf_file_names, pdf_bytes_list, p_lang_list,
+            parse_method, p_formula_enable, p_table_enable,
+            f_draw_layout_bbox, f_draw_span_bbox, f_dump_md, f_dump_middle_json,
+            f_dump_model_output, f_dump_orig_pdf, f_dump_content_list, f_make_md_mode
+        )
+    else:
+        if backend.startswith("vlm-"):
+            backend = backend[4:]

-            logger.info(f"local output dir is {local_md_dir}")
+        await _async_process_vlm(
+            output_dir, pdf_file_names, pdf_bytes_list, backend,
+            f_draw_layout_bbox, f_draw_span_bbox, f_dump_md, f_dump_middle_json,
+            f_dump_model_output, f_dump_orig_pdf, f_dump_content_list, f_make_md_mode,
+            server_url
+        )




--- a/mineru/cli/fast_api.py
+++ b/mineru/cli/fast_api.py
+import uuid
+import os
+from base64 import b64encode
+
+import uvicorn
+import argparse
+from pathlib import Path
+from glob import glob
+from fastapi import FastAPI, UploadFile, File, Form
+from fastapi.responses import JSONResponse
+from typing import List, Optional
+
+from loguru import logger
+
+from mineru.cli.common import aio_do_parse, read_fn
+from mineru.version import __version__
+
+app = FastAPI()
+
+
+def encode_image(image_path: str) -> str:
+    """Encode image using base64"""
+    with open(image_path, "rb") as f:
+        return b64encode(f.read()).decode()
+
+
+@app.post(path="/file_parse",)
+async def parse_pdf(
+        files: List[UploadFile] = File(...),
+        output_dir: str = Form("./output"),
+        lang_list: List[str] = Form(["ch"]),
+        backend: str = Form("pipeline"),
+        parse_method: str = Form("auto"),
+        formula_enable: bool = Form(True),
+        table_enable: bool = Form(True),
+        server_url: Optional[str] = Form(None),
+        reuturn_md: bool = Form(True),
+        reuturn_middle_json: bool = Form(False),
+        return_model_output: bool = Form(False),
+        reuturn_content_list: bool = Form(False),
+        return_images: bool = Form(False),
+        start_page_id: int = Form(0),
+        end_page_id: int = Form(99999),
+):
+    try:
+        # 创建唯一的输出目录
+        unique_dir = os.path.join(output_dir, str(uuid.uuid4()))
+        os.makedirs(unique_dir, exist_ok=True)
+
+        # 处理上传的PDF文件
+        pdf_file_names = []
+        pdf_bytes_list = []
+
+        for file in files:
+            content = await file.read()
+            file_path = Path(file.filename)
+
+            # 如果是图像文件或PDF，使用read_fn处理
+            if file_path.suffix.lower() in [".pdf", ".png", ".jpeg", ".jpg"]:
+                # 创建临时文件以便使用read_fn
+                temp_path = Path(unique_dir) / file_path.name
+                with open(temp_path, "wb") as f:
+                    f.write(content)
+
+                try:
+                    pdf_bytes = read_fn(temp_path)
+                    pdf_bytes_list.append(pdf_bytes)
+                    pdf_file_names.append(file_path.stem)
+                    os.remove(temp_path)  # 删除临时文件
+                except Exception as e:
+                    return JSONResponse(
+                        status_code=400,
+                        content={"error": f"处理文件失败: {str(e)}"}
+                    )
+            else:
+                return JSONResponse(
+                    status_code=400,
+                    content={"error": f"不支持的文件类型: {file_path.suffix}"}
+                )
+
+
+        # 设置语言列表，确保与文件数量一致
+        actual_lang_list = lang_list
+        if len(actual_lang_list) != len(pdf_file_names):
+            # 如果语言列表长度不匹配，使用第一个语言或默认"ch"
+            actual_lang_list = [actual_lang_list[0] if actual_lang_list else "ch"] * len(pdf_file_names)
+
+        # 调用异步处理函数
+        await aio_do_parse(
+            output_dir=unique_dir,
+            pdf_file_names=pdf_file_names,
+            pdf_bytes_list=pdf_bytes_list,
+            p_lang_list=actual_lang_list,
+            backend=backend,
+            parse_method=parse_method,
+            p_formula_enable=formula_enable,
+            p_table_enable=table_enable,
+            server_url=server_url,
+            f_draw_layout_bbox=False,
+            f_draw_span_bbox=False,
+            f_dump_md=reuturn_md,
+            f_dump_middle_json=reuturn_middle_json,
+            f_dump_model_output=return_model_output,
+            f_dump_orig_pdf=False,
+            f_dump_content_list=reuturn_content_list,
+            start_page_id=start_page_id,
+            end_page_id=end_page_id,
+        )
+
+        # 构建结果路径
+        result_dict = {}
+        for pdf_name in pdf_file_names:
+            result_dict[pdf_name] = {}
+            data = result_dict[pdf_name]
+
+            if backend.startswith("pipeline"):
+                parse_dir = os.path.join(unique_dir, pdf_name, parse_method)
+            else:
+                parse_dir = os.path.join(unique_dir, pdf_name, "vlm")
+
+            def get_infer_result(file_suffix_identifier: str):
+                """从结果文件中读取推理结果"""
+                result_file_path = os.path.join(parse_dir, f"{pdf_name}{file_suffix_identifier}")
+                if os.path.exists(result_file_path):
+                    with open(result_file_path, "r", encoding="utf-8") as fp:
+                        return fp.read()
+                return None
+
+
+            if os.path.exists(parse_dir):
+                if reuturn_md:
+                    data["md_content"] = get_infer_result(".md")
+                if reuturn_middle_json:
+                    data["middle_json"] = get_infer_result("_middle.json")
+                if return_model_output:
+                    if backend.startswith("pipeline"):
+                        data["model_output"] = get_infer_result("_model.json")
+                    else:
+                        data["model_output"] = get_infer_result("_model_output.txt")
+                if reuturn_content_list:
+                    data["content_list"] = get_infer_result("_content_list.json")
+                if return_images:
+                    image_paths = glob(f"{parse_dir}/images/*.jpg")
+                    data["images"] = {
+                        os.path.basename(
+                            image_path
+                        ): f"data:image/jpeg;base64,{encode_image(image_path)}"
+                        for image_path in image_paths
+                    }
+        return JSONResponse(
+            status_code=200,
+            content={
+                "backend": backend,
+                "version": __version__,
+                "results": result_dict
+            }
+        )
+    except Exception as e:
+        logger.exception(e)
+        return JSONResponse(
+            status_code=500,
+            content={"error": str(e)}
+        )
+
+
+def main():
+    """启动MinerU FastAPI服务器的命令行入口"""
+    parser = argparse.ArgumentParser(description='Start MinerU FastAPI Service')
+    parser.add_argument('--host', type=str, default='127.0.0.1', help='Server host (default: 127.0.0.1)')
+    parser.add_argument('--port', type=int, default=8000, help='Server port (default: 8000)')
+    parser.add_argument('--reload', action='store_true', help='Enable auto-reload (development mode)')
+    args = parser.parse_args()
+
+    print(f"Start MinerU FastAPI Service: http://{args.host}:{args.port}")
+    print("The API documentation can be accessed at the following address:")
+    print(f"- Swagger UI: http://{args.host}:{args.port}/docs")
+    print(f"- ReDoc: http://{args.host}:{args.port}/redoc")
+
+    uvicorn.run(
+        "mineru.cli.fast_api:app",
+        host=args.host,
+        port=args.port,
+        reload=args.reload
+    )
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- a/mineru/cli/gradio_app.py
+++ b/mineru/cli/gradio_app.py
+# Copyright (c) Opendatalab. All rights reserved.
+
+import base64
+import os
+import re
+import time
+import zipfile
+from pathlib import Path
+
+import gradio as gr
+from gradio_pdf import PDF
+from loguru import logger
+
+from mineru.cli.common import prepare_env, read_fn, aio_do_parse
+from mineru.utils.hash_utils import str_sha256
+
+
+async def parse_pdf(doc_path, output_dir, end_page_id, is_ocr, formula_enable, table_enable, language, backend, url):
+    os.makedirs(output_dir, exist_ok=True)
+
+    try:
+        file_name = f'{safe_stem(Path(doc_path).stem)}_{time.strftime("%y%m%d_%H%M%S")}'
+        pdf_data = read_fn(doc_path)
+        if is_ocr:
+            parse_method = 'ocr'
+        else:
+            parse_method = 'auto'
+
+        if backend.startswith("vlm"):
+            parse_method = "vlm"
+        if not backend.endswith("client"):
+            url = None
+        local_image_dir, local_md_dir = prepare_env(output_dir, file_name, parse_method)
+        await aio_do_parse(
+            output_dir=output_dir,
+            pdf_file_names=[file_name],
+            pdf_bytes_list=[pdf_data],
+            p_lang_list=[language],
+            parse_method=parse_method,
+            end_page_id=end_page_id,
+            p_formula_enable=formula_enable,
+            p_table_enable=table_enable,
+            backend=backend,
+            server_url=url,
+        )
+        return local_md_dir, file_name
+    except Exception as e:
+        logger.exception(e)
+        return None
+
+
+def compress_directory_to_zip(directory_path, output_zip_path):
+    """压缩指定目录到一个 ZIP 文件。
+
+    :param directory_path: 要压缩的目录路径
+    :param output_zip_path: 输出的 ZIP 文件路径
+    """
+    try:
+        with zipfile.ZipFile(output_zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
+
+            # 遍历目录中的所有文件和子目录
+            for root, dirs, files in os.walk(directory_path):
+                for file in files:
+                    # 构建完整的文件路径
+                    file_path = os.path.join(root, file)
+                    # 计算相对路径
+                    arcname = os.path.relpath(file_path, directory_path)
+                    # 添加文件到 ZIP 文件
+                    zipf.write(file_path, arcname)
+        return 0
+    except Exception as e:
+        logger.exception(e)
+        return -1
+
+
+def image_to_base64(image_path):
+    with open(image_path, 'rb') as image_file:
+        return base64.b64encode(image_file.read()).decode('utf-8')
+
+
+def replace_image_with_base64(markdown_text, image_dir_path):
+    # 匹配Markdown中的图片标签
+    pattern = r'\!\[(?:[^\]]*)\]\(([^)]+)\)'
+
+    # 替换图片链接
+    def replace(match):
+        relative_path = match.group(1)
+        full_path = os.path.join(image_dir_path, relative_path)
+        base64_image = image_to_base64(full_path)
+        return f'![{relative_path}](data:image/jpeg;base64,{base64_image})'
+
+    # 应用替换
+    return re.sub(pattern, replace, markdown_text)
+
+
+async def to_markdown(file_path, end_pages=10, is_ocr=False, formula_enable=True, table_enable=True, language="ch", backend="pipeline", url=None):
+    file_path = to_pdf(file_path)
+    # 获取识别的md文件以及压缩包文件路径
+    local_md_dir, file_name = await parse_pdf(file_path, './output', end_pages - 1, is_ocr, formula_enable, table_enable, language, backend, url)
+    archive_zip_path = os.path.join('./output', str_sha256(local_md_dir) + '.zip')
+    zip_archive_success = compress_directory_to_zip(local_md_dir, archive_zip_path)
+    if zip_archive_success == 0:
+        logger.info('压缩成功')
+    else:
+        logger.error('压缩失败')
+    md_path = os.path.join(local_md_dir, file_name + '.md')
+    with open(md_path, 'r', encoding='utf-8') as f:
+        txt_content = f.read()
+    md_content = replace_image_with_base64(txt_content, local_md_dir)
+    # 返回转换后的PDF路径
+    new_pdf_path = os.path.join(local_md_dir, file_name + '_layout.pdf')
+
+    return md_content, txt_content, archive_zip_path, new_pdf_path
+
+
+latex_delimiters = [
+    {'left': '$$', 'right': '$$', 'display': True},
+    {'left': '$', 'right': '$', 'display': False},
+    {'left': '\\(', 'right': '\\)', 'display': False},
+    {'left': '\\[', 'right': '\\]', 'display': True},
+]
+
+header_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'resources', 'header.html')
+with open(header_path, 'r') as file:
+    header = file.read()
+
+
+latin_lang = [
+        'af', 'az', 'bs', 'cs', 'cy', 'da', 'de', 'es', 'et', 'fr', 'ga', 'hr',  # noqa: E126
+        'hu', 'id', 'is', 'it', 'ku', 'la', 'lt', 'lv', 'mi', 'ms', 'mt', 'nl',
+        'no', 'oc', 'pi', 'pl', 'pt', 'ro', 'rs_latin', 'sk', 'sl', 'sq', 'sv',
+        'sw', 'tl', 'tr', 'uz', 'vi', 'french', 'german'
+]
+arabic_lang = ['ar', 'fa', 'ug', 'ur']
+cyrillic_lang = [
+        'rs_cyrillic', 'bg', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
+        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
+]
+east_slavic_lang = ["ru", "be", "uk"]
+devanagari_lang = [
+        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
+        'sa', 'bgc'
+]
+other_lang = ['ch', 'ch_lite', 'ch_server', 'en', 'korean', 'japan', 'chinese_cht', 'ta', 'te', 'ka']
+add_lang = ['latin', 'arabic', 'east_slavic', 'cyrillic', 'devanagari']
+
+# all_lang = ['', 'auto']
+all_lang = []
+# all_lang.extend([*other_lang, *latin_lang, *arabic_lang, *cyrillic_lang, *devanagari_lang])
+all_lang.extend([*other_lang, *add_lang])
+
+
+def safe_stem(file_path):
+    stem = Path(file_path).stem
+    # 只保留字母、数字、下划线和点，其他字符替换为下划线
+    return re.sub(r'[^\w.]', '_', stem)
+
+
+def to_pdf(file_path):
+
+    if file_path is None:
+        return None
+
+    pdf_bytes = read_fn(file_path)
+
+    # unique_filename = f'{uuid.uuid4()}.pdf'
+    unique_filename = f'{safe_stem(file_path)}.pdf'
+
+    # 构建完整的文件路径
+    tmp_file_path = os.path.join(os.path.dirname(file_path), unique_filename)
+
+    # 将字节数据写入文件
+    with open(tmp_file_path, 'wb') as tmp_pdf_file:
+        tmp_pdf_file.write(pdf_bytes)
+
+    return tmp_file_path
+
+
+def main():
+    example_enable = False
+
+    # try:
+    #     print("Start init SgLang engine...")
+    #     from mineru.backend.vlm.vlm_analyze import ModelSingleton
+    #     modelsingleton = ModelSingleton()
+    #     predictor = modelsingleton.get_model(
+    #         "sglang-engine",
+    #         None,
+    #         None,
+    #         mem_fraction_static=0.5,
+    #         enable_torch_compile=True,
+    #     )
+    #     print("SgLang engine init successfully.")
+    # except Exception as e:
+    #     logger.exception(e)
+
+    with gr.Blocks() as demo:
+        gr.HTML(header)
+        with gr.Row():
+            with gr.Column(variant='panel', scale=5):
+                with gr.Row():
+                    file = gr.File(label='Please upload a PDF or image', file_types=['.pdf', '.png', '.jpeg', '.jpg'])
+                with gr.Row():
+                    max_pages = gr.Slider(1, 20, 10, step=1, label='Max convert pages')
+                with gr.Row():
+                    backend = gr.Dropdown(["pipeline", "vlm-transformers", "vlm-sglang-client"], label="Backend", value="pipeline")
+                with gr.Row(visible=True) as ocr_options:
+                    language = gr.Dropdown(all_lang, label='Language', value='ch')
+                with gr.Row(visible=False) as client_options:
+                    url = gr.Textbox(label='Server URL', value='http://localhost:30000', placeholder='http://localhost:30000')
+                with gr.Row(visible=True) as pipeline_options:
+                    is_ocr = gr.Checkbox(label='Force enable OCR', value=False)
+                    formula_enable = gr.Checkbox(label='Enable formula recognition', value=True)
+                    table_enable = gr.Checkbox(label='Enable table recognition(test)', value=True)
+                with gr.Row():
+                    change_bu = gr.Button('Convert')
+                    clear_bu = gr.ClearButton(value='Clear')
+                pdf_show = PDF(label='PDF preview', interactive=False, visible=True, height=800)
+                if example_enable:
+                    example_root = os.path.join(os.path.dirname(__file__), 'examples')
+                    if os.path.exists(example_root):
+                        with gr.Accordion('Examples:'):
+                            gr.Examples(
+                                examples=[os.path.join(example_root, _) for _ in os.listdir(example_root) if
+                                          _.endswith('pdf')],
+                                inputs=file
+                            )
+
+            with gr.Column(variant='panel', scale=5):
+                output_file = gr.File(label='convert result', interactive=False)
+                with gr.Tabs():
+                    with gr.Tab('Markdown rendering'):
+                        md = gr.Markdown(label='Markdown rendering', height=1100, show_copy_button=True,
+                                         latex_delimiters=latex_delimiters,
+                                         line_breaks=True)
+                    with gr.Tab('Markdown text'):
+                        md_text = gr.TextArea(lines=45, show_copy_button=True)
+
+
+        # 更新界面函数
+        def update_interface(backend_choice):
+            if backend_choice in ["vlm-transformers", "vlm-sglang-engine"]:
+                return gr.update(visible=False), gr.update(visible=False), gr.update(visible=False)
+            elif backend_choice in ["vlm-sglang-client"]:  # pipeline
+                return gr.update(visible=True), gr.update(visible=False), gr.update(visible=False)
+            elif backend_choice in ["pipeline"]:
+                return gr.update(visible=False), gr.update(visible=True), gr.update(visible=True)
+            else:
+                pass
+
+
+        # 添加事件处理
+        backend.change(
+            fn=update_interface,
+            inputs=[backend],
+            outputs=[client_options, ocr_options, pipeline_options]
+        )
+
+        file.change(fn=to_pdf, inputs=file, outputs=pdf_show)
+        change_bu.click(fn=to_markdown, inputs=[file, max_pages, is_ocr, formula_enable, table_enable, language, backend, url],
+                        outputs=[md, md_text, output_file, pdf_show])
+        clear_bu.add([file, md, pdf_show, md_text, output_file, is_ocr])
+
+    demo.launch(server_name='localhost')
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
--- a/mineru/cli/models_download.py
+++ b/mineru/cli/models_download.py
@@ -3,6 +3,7 @@ import os
 import sys
 import click
 import requests
+from loguru import logger

 from mineru.utils.enum_class import ModelPath
 from mineru.utils.models_download_utils import auto_download_and_get_model_root_path
@@ -54,7 +55,32 @@ def configure_model(model_dir, model_type):
    }

    download_and_modify_json(json_url, config_file, json_mods)
-    print(f'The configuration file has been successfully configured, the path is: {config_file}')
+    logger.info(f'The configuration file has been successfully configured, the path is: {config_file}')
+
+
+def download_pipeline_models():
+    """下载Pipeline模型"""
+    model_paths = [
+        ModelPath.doclayout_yolo,
+        ModelPath.yolo_v8_mfd,
+        ModelPath.unimernet_small,
+        ModelPath.pytorch_paddle,
+        ModelPath.layout_reader,
+        ModelPath.slanet_plus
+    ]
+    download_finish_path = ""
+    for model_path in model_paths:
+        logger.info(f"Downloading model: {model_path}")
+        download_finish_path = auto_download_and_get_model_root_path(model_path, repo_mode='pipeline')
+    logger.info(f"Pipeline models downloaded successfully to: {download_finish_path}")
+    configure_model(download_finish_path, "pipeline")
+
+
+def download_vlm_models():
+    """下载VLM模型"""
+    download_finish_path = auto_download_and_get_model_root_path("/", repo_mode='vlm')
+    logger.info(f"VLM models downloaded successfully to: {download_finish_path}")
+    configure_model(download_finish_path, "vlm")


 @click.command()
@@ -102,30 +128,7 @@ def download_models(model_source, model_type):
            default='all'
        )

-    click.echo(f"Downloading {model_type} model from {os.getenv('MINERU_MODEL_SOURCE', None)}...")
-
-    def download_pipeline_models():
-        """下载Pipeline模型"""
-        model_paths = [
-            ModelPath.doclayout_yolo,
-            ModelPath.yolo_v8_mfd,
-            ModelPath.unimernet_small,
-            ModelPath.pytorch_paddle,
-            ModelPath.layout_reader,
-            ModelPath.slanet_plus
-        ]
-        download_finish_path = ""
-        for model_path in model_paths:
-            click.echo(f"Downloading model: {model_path}")
-            download_finish_path = auto_download_and_get_model_root_path(model_path, repo_mode='pipeline')
-        click.echo(f"Pipeline models downloaded successfully to: {download_finish_path}")
-        configure_model(download_finish_path, "pipeline")
-
-    def download_vlm_models():
-        """下载VLM模型"""
-        download_finish_path = auto_download_and_get_model_root_path("/", repo_mode='vlm')
-        click.echo(f"VLM models downloaded successfully to: {download_finish_path}")
-        configure_model(download_finish_path, "vlm")
+    logger.info(f"Downloading {model_type} model from {os.getenv('MINERU_MODEL_SOURCE', None)}...")

    try:
        if model_type == 'pipeline':
@@ -140,7 +143,7 @@ def download_models(model_source, model_type):
            sys.exit(1)

    except Exception as e:
-        click.echo(f"Download failed: {str(e)}", err=True)
+        logger.exception(f"An error occurred while downloading models: {str(e)}")
        sys.exit(1)

 if __name__ == '__main__':

--- a/mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py
+++ b/mineru/model/ocr/paddleocr2pytorch/pytorch_paddle.py
@@ -26,9 +26,10 @@ latin_lang = [
 ]
 arabic_lang = ['ar', 'fa', 'ug', 'ur']
 cyrillic_lang = [
-        'ru', 'rs_cyrillic', 'be', 'bg', 'uk', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
+        'rs_cyrillic', 'bg', 'mn', 'abq', 'ady', 'kbd', 'ava',  # noqa: E126
        'dar', 'inh', 'che', 'lbe', 'lez', 'tab'
 ]
+east_slavic_lang = ["ru", "be", "uk"]
 devanagari_lang = [
        'hi', 'mr', 'ne', 'bh', 'mai', 'ang', 'bho', 'mah', 'sck', 'new', 'gom',  # noqa: E126
        'sa', 'bgc'
@@ -69,6 +70,8 @@ class PytorchPaddleOCR(TextSystem):
            self.lang = 'cyrillic'
        elif self.lang in devanagari_lang:
            self.lang = 'devanagari'
+        elif self.lang in east_slavic_lang:
+            self.lang = 'east_slavic'
        else:
            pass


--- a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
+++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/arch_config.yaml
@@ -490,3 +490,82 @@ devanagari_PP-OCRv3_rec_infer:
 #    out_channels: 169
    fc_decay: 0.00001

+korean_PP-OCRv5_rec_infer:
+  model_type: rec
+  algorithm: SVTR_HGNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 11947
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+latin_PP-OCRv5_rec_infer:
+  model_type: rec
+  algorithm: SVTR_HGNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 504
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+eslav_PP-OCRv5_rec_infer:
+  model_type: rec
+  algorithm: SVTR_HGNet
+  Transform:
+  Backbone:
+    name: PPLCNetV3
+    scale: 0.95
+  Head:
+    name: MultiHead
+    out_channels_list:
+      CTCLabelDecode: 519
+    head_list:
+      - CTCHead:
+          Neck:
+            name: svtr
+            dims: 120
+            depth: 2
+            hidden_dims: 120
+            kernel_size: [ 1, 3 ]
+            use_guide: True
+          Head:
+            fc_decay: 0.00001
+      - NRTRHead:
+          nrtr_dim: 384
+          max_text_length: 25
+
+
--- a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt
+++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_eslav_dict.txt
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+]
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+©
+‥
+{
+}
+\
+|
+@
+^
+~
+÷
+∕
+∙
+⋅
+·
+±
+∓
+∩
+∪
+□
+←
+↔
+⇒
+⇐
+⇔
+∀
+∃
+∄
+∴
+∵
+∝
+∞
+⊥
+∟
+∠
+∡
+∢
+′
+″
+∥
+⊾
+⊿
+∂
+∫
+∬
+∭
+∮
+∯
+∰
+∑
+∏
+√
+∛
+∜
+∱
+∲
+∳
+∶
+∷
+∼
+®
+℉
+Ω
+℧
+Å
+⌀
+ℏ
+⅀
+⍺
+⍵
+¢
+€
+£
+¥
+₿
+Ⅰ
+Ⅱ
+Ⅲ
+Ⅳ
+Ⅴ
+Ⅵ
+Ⅶ
+Ⅷ
+Ⅸ
+Ⅹ
+Ⅺ
+Ⅻ
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+ⅵ
+ⅶ
+ⅷ
+ⅸ
+ⅹ
+ⅺ
+ⅻ
+➀
+➁
+➂
+➃
+➄
+➅
+➆
+➇
+➈
+➉
+➊
+➋
+➌
+➍
+➎
+➏
+➐
+➑
+➒
+➓
+❶
+❷
+❸
+❹
+❺
+❻
+❼
+❽
+❾
+❿
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+●
+▶
+𝑢
+︽
+–
+﹥
+𝜓
+•
+∋
+ƒ
+०
+⬆
+Ạ
+◀
+
+▫
+︾
+À
+Á
+Â
+Ã
+Ä
+Å
+Æ
+Ç
+È
+É
+Ê
+Ë
+Ì
+Í
+Î
+Ï
+Ð
+Ñ
+Ò
+Ó
+Ô
+Õ
+Ö
+Ø
+Ù
+Ú
+Û
+Ü
+Ý
+Þ
+à
+á
+â
+ã
+ä
+å
+æ
+ç
+è
+é
+ê
+ë
+ì
+í
+î
+ï
+ð
+ñ
+ò
+ó
+ô
+õ
+ö
+ø
+ù
+ú
+û
+ü
+ý
+þ
+ÿ
+¡
+¤
+¦
+§
+¨
+ª
+«
+¬
+¯
+°
+²
+³
+´
+µ
+¶
+¸
+¹
+º
+»
+¼
+½
+¾
+¿
+×
+‐
+‑
+‒
+—
+―
+‖
+‗
+‘
+’
+‚
+‛
+“
+”
+„
+‟
+†
+‡
+‣
+․
+…
+‧
+‰
+‴
+‵
+‶
+‷
+‸
+‹
+›
+※
+‼
+‽
+‾
+₤
+₡
+₹
+−
+∖
+∗
+≈
+≠
+≡
+≤
+≥
+⊂
+⊃
+↑
+→
+↓
+↕
+™
+Ω
+℮
+∆
+✓
+✗
+✘
+▪
+◼
+✔
+✕
+☑
+☒
+№
+₽
+₴
+Α
+α
+Β
+β
+Γ
+γ
+Δ
+δ
+Ε
+ε
+Ζ
+ζ
+Η
+η
+Θ
+θ
+Ι
+ι
+Κ
+κ
+Λ
+λ
+Μ
+μ
+Ν
+ν
+Ξ
+ξ
+Ο
+ο
+Π
+π
+Ρ
+ρ
+Σ
+σ
+ς
+Τ
+τ
+Υ
+υ
+Φ
+φ
+Χ
+χ
+Ψ
+ψ
+ω
+А
+Б
+В
+Г
+Ґ
+Д
+Е
+Ё
+Є
+Ж
+З
+И
+І
+Ї
+Й
+К
+Л
+М
+Н
+О
+П
+Р
+С
+Т
+У
+Ў
+Ф
+Х
+Ц
+Ч
+Ш
+Щ
+Ъ
+Ы
+Ь
+Э
+Ю
+Я
+а
+б
+в
+г
+ґ
+д
+е
+ё
+є
+ж
+з
+и
+і
+ї
+й
+к
+л
+м
+н
+о
+п
+р
+с
+т
+у
+ў
+ф
+х
+ц
+ч
+ш
+щ
+ъ
+ы
+ь
+э
+ю
+я
--- a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt
+++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_korean_dict.txt
--- a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt
+++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/dict/ppocrv5_latin_dict.txt
+!
+"
+#
+$
+%
+&
+'
+(
+)
+*
+
+,
+-
+.
+/
+0
+1
+2
+3
+4
+5
+6
+7
+8
+9
+:
+;
+<
+=
+>
+?
+@
+A
+B
+C
+D
+E
+F
+G
+H
+I
+J
+K
+L
+M
+N
+O
+P
+Q
+R
+S
+T
+U
+V
+W
+X
+Y
+Z
+[
+\
+]
+^
+_
+`
+a
+b
+c
+d
+e
+f
+g
+h
+i
+j
+k
+l
+m
+n
+o
+p
+q
+r
+s
+t
+u
+v
+w
+x
+y
+z
+{
+|
+}
+~
+¡
+¢
+£
+¤
+¥
+¦
+§
+¨
+©
+ª
+«
+¬
+
+®
+¯
+°
+±
+²
+³
+´
+µ
+¶
+·
+¸
+¹
+º
+»
+¼
+½
+¾
+¿
+À
+Á
+Â
+Ã
+Ä
+Å
+Æ
+Ç
+È
+É
+Ê
+Ë
+Ì
+Í
+Î
+Ï
+Ð
+Ñ
+Ò
+Ó
+Ô
+Õ
+Ö
+×
+Ø
+Ù
+Ú
+Û
+Ü
+Ý
+Þ
+ß
+à
+á
+â
+ã
+ä
+å
+æ
+ç
+è
+é
+ê
+ë
+ì
+í
+î
+ï
+ð
+ñ
+ò
+ó
+ô
+õ
+ö
+÷
+ø
+ù
+ú
+û
+ü
+ý
+þ
+ÿ
+Ą
+ą
+Ć
+ć
+Č
+č
+Ď
+ď
+Đ
+đ
+Ė
+ė
+Ę
+ę
+Ě
+ě
+Ğ
+ğ
+Į
+į
+İ
+ı
+Ĺ
+ĺ
+Ľ
+ľ
+Ł
+ł
+Ń
+ń
+Ň
+ň
+ō
+Ő
+ő
+Œ
+œ
+Ŕ
+ŕ
+Ř
+ř
+Ś
+ś
+Ş
+ş
+Š
+š
+Ť
+ť
+Ū
+ū
+Ů
+ů
+Ű
+ű
+Ų
+ų
+Ÿ
+Ź
+ź
+Ż
+ż
+Ž
+ž
+ƒ
+ʒ
+Ω
+α
+β
+γ
+δ
+ε
+ζ
+η
+θ
+ι
+κ
+λ
+μ
+ν
+ξ
+ο
+π
+ρ
+ς
+σ
+τ
+υ
+φ
+χ
+ψ
+ω
+з
+०
+Ṡ
+ẞ
+Ạ
+‐
+‑
+‒
+–
+—
+―
+‖
+‗
+‘
+’
+‚
+‛
+“
+”
+„
+‟
+†
+‡
+•
+‣
+․
+‥
+…
+‧
+‰
+′
+″
+‴
+‵
+‶
+‷
+‸
+‹
+›
+※
+‼
+‽
+‾
+⁄
+₂
+₃
+₡
+₤
+€
+₴
+₹
+₽
+₿
+℉
+ℏ
+№
+™
+Ω
+℧
+Å
+℮
+⅀
+Ⅰ
+Ⅱ
+Ⅲ
+Ⅳ
+Ⅴ
+Ⅵ
+Ⅶ
+Ⅷ
+Ⅸ
+Ⅹ
+Ⅺ
+Ⅻ
+ⅰ
+ⅱ
+ⅲ
+ⅳ
+ⅴ
+ⅵ
+ⅶ
+ⅷ
+ⅸ
+ⅹ
+ⅺ
+ⅻ
+←
+↑
+→
+↓
+↔
+↕
+⇐
+⇒
+⇔
+∀
+∂
+∃
+∄
+∅
+∆
+∋
+∏
+∑
+−
+∓
+∕
+∖
+∗
+∙
+√
+∛
+∜
+∝
+∞
+∟
+∠
+∡
+∢
+∥
+∧
+∨
+∩
+∪
+∫
+∬
+∭
+∮
+∯
+∰
+∱
+∲
+∳
+∴
+∵
+∶
+∷
+∼
+≈
+≠
+≡
+≤
+≥
+⊂
+⊃
+⊥
+⊾
+⊿
+⋅
+⌀
+⍵
+⍺
+①
+②
+③
+④
+⑤
+⑥
+⑦
+⑧
+⑨
+⑩
+─
+│
+└
+├
+■
+□
+▪
+▫
+▶
+◀
+●
+◼
+☑
+☒
+✓
+✔
+✕
+✗
+✘
+❶
+❷
+❸
+❹
+❺
+❻
+❼
+❽
+❾
+❿
+➀
+➁
+➂
+➃
+➄
+➅
+➆
+➇
+➈
+➉
+➊
+➋
+➌
+➍
+➎
+➏
+➐
+➑
+➒
+➓
+⬆
+、
+ﬁ
+ﬂ
+︽
+︾
+﹥
+�
+𝑢
+𝜓
--- a/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
+++ b/mineru/model/ocr/paddleocr2pytorch/pytorchocr/utils/resources/models_config.yml
@@ -24,17 +24,17 @@ lang:
    rec: en_PP-OCRv4_rec_infer.pth
    dict: en_dict.txt
  korean:
-    det: Multilingual_PP-OCRv3_det_infer.pth
-    rec: korean_PP-OCRv3_rec_infer.pth
-    dict: korean_dict.txt
+    det: ch_PP-OCRv5_det_infer.pth
+    rec: korean_PP-OCRv5_rec_infer.pth
+    dict: ppocrv5_korean_dict.txt
  japan:
    det: ch_PP-OCRv5_det_infer.pth
    rec: ch_PP-OCRv5_rec_server_infer.pth
-    dict: japan_dict.txt
+    dict: ppocrv5_dict.txt
  chinese_cht:
    det: ch_PP-OCRv5_det_infer.pth
    rec: ch_PP-OCRv5_rec_server_infer.pth
-    dict: chinese_cht_dict.txt
+    dict: ppocrv5_dict.txt
  ta:
    det: Multilingual_PP-OCRv3_det_infer.pth
    rec: ta_PP-OCRv3_rec_infer.pth
@@ -48,9 +48,9 @@ lang:
    rec: ka_PP-OCRv3_rec_infer.pth
    dict: ka_dict.txt
  latin:
-    det: en_PP-OCRv3_det_infer.pth
-    rec: latin_PP-OCRv3_rec_infer.pth
-    dict: latin_dict.txt
+    det: ch_PP-OCRv5_det_infer.pth
+    rec: latin_PP-OCRv5_rec_infer.pth
+    dict: ppocrv5_latin_dict.txt
  arabic:
    det: Multilingual_PP-OCRv3_det_infer.pth
    rec: arabic_PP-OCRv3_rec_infer.pth
@@ -62,4 +62,8 @@ lang:
  devanagari:
    det: Multilingual_PP-OCRv3_det_infer.pth
    rec: devanagari_PP-OCRv3_rec_infer.pth
-    dict: devanagari_dict.txt
\ No newline at end of file
+    dict: devanagari_dict.txt
+  east_slavic:
+    det: ch_PP-OCRv5_det_infer.pth
+    rec: eslav_PP-OCRv5_rec_infer.pth
+    dict: ppocrv5_eslav_dict.txt
\ No newline at end of file
--- a/mineru/resources/header.html
+++ b/mineru/resources/header.html
+<html><head>
+  <link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.15.4/css/all.css">
+<style>
+  .link-block {
+    border: 1px solid transparent;
+    border-radius: 24px;
+    background-color: rgba(54, 54, 54, 1);
+    cursor: pointer !important;
+  }
+  .link-block:hover {
+    background-color: rgba(54, 54, 54, 0.75) !important;
+    cursor: pointer !important;
+  }
+  .external-link {
+    display: inline-flex;
+    align-items: center;
+    height: 36px;
+    line-height: 36px;
+    padding: 0 16px;
+    cursor: pointer !important;
+  }
+  .external-link,
+  .external-link:hover {
+    cursor: pointer !important;
+  }
+  a {
+    text-decoration: none;
+  }
+</style></head>
+
+<body>
+  <div style="
+      display: flex;
+      flex-direction: column;
+      justify-content: center;
+      align-items: center;
+      text-align: center;
+      background: linear-gradient(45deg, #007bff 0%, #0056b3 100%);
+      padding: 24px;
+      gap: 24px;
+      border-radius: 8px;
+    ">
+    <div style="
+        display: flex;
+        flex-direction: column;
+        align-items: center;
+        gap: 16px;
+      ">
+      <div style="display: flex; flex-direction: column; gap: 8px">
+        <h1 style="
+            font-size: 48px;
+            color: #fafafa;
+            margin: 0;
+            font-family: 'Trebuchet MS', 'Lucida Sans Unicode',
+              'Lucida Grande', 'Lucida Sans', Arial, sans-serif;
+          ">
+          MinerU 2: PDF Extraction Demo
+        </h1>
+      </div>
+    </div>
+
+    <p style="
+        margin: 0;
+        line-height: 1.6rem;
+        font-size: 16px;
+        color: #fafafa;
+        opacity: 0.8;
+      ">
+      A one-stop, open-source, high-quality data extraction tool that supports converting PDF to Markdown and JSON.<br>
+    </p>
+    <style>
+      .link-block {
+        display: inline-block;
+      }
+      .link-block + .link-block {
+        margin-left: 20px;
+      }
+    </style>
+
+    <div class="column has-text-centered">
+      <div class="publication-links">
+        <!-- Code Link. -->
+        <span class="link-block">
+          <a href="https://github.com/opendatalab/MinerU" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+            <span class="icon" style="margin-right: 4px">
+              <i class="fab fa-github" style="color: white; margin-right: 4px"></i>
+            </span>
+            <span style="color: white">Code</span>
+          </a>
+        </span>
+
+        <!-- arXiv Link. -->
+        <span class="link-block">
+          <a href="https://arxiv.org/abs/2409.18839" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+            <span class="icon" style="margin-right: 8px">
+              <i class="fas fa-file" style="color: white"></i>
+            </span>
+            <span style="color: white">Paper</span>
+          </a>
+        </span>
+
+        <!-- Homepage Link. -->
+        <span class="link-block">
+          <a href="https://mineru.net/home?source=online" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+            <span class="icon" style="margin-right: 8px">
+              <i class="fas fa-home" style="color: white"></i>
+            </span>
+            <span style="color: white">Homepage</span>
+          </a>
+        </span>
+
+        <!-- Client Link. -->
+        <span class="link-block">
+          <a href="https://mineru.net/client?source=online" class="external-link button is-normal is-rounded is-dark" style="text-decoration: none; cursor: pointer">
+            <span class="icon" style="margin-right: 8px">
+              <i class="fas fa-download" style="color: white"></i>
+            </span>
+            <span style="color: white">Download</span>
+          </a>
+        </span>
+
+      </div>
+    </div>
+
+    <!-- New Demo Links -->
+  </div>
+
+
+</body></html>
\ No newline at end of file
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -43,7 +43,7 @@ vlm = [
    "pydantic",
 ]
 sglang = [
-    "sglang[all]==0.4.7",
+    "sglang[all]>=0.4.7,<0.4.9",
 ]
 pipeline = [
    "matplotlib>=3.10,<4",
@@ -62,9 +62,20 @@ pipeline = [
    "transformers>=4.49.0,!=4.51.0,<5.0.0",
    "fast-langdetect>=0.2.3,<0.3.0",
 ]
+api = [
+    "fastapi",
+    "python-multipart",
+    "uvicorn",
+]
+gradio = [
+    "gradio>=5.34,<6",
+    "gradio-pdf>=0.0.22",
+]
 core = [
    "mineru[vlm]",
    "mineru[pipeline]",
+    "mineru[api]",
+    "mineru[gradio]",
 ]
 all = [
    "mineru[core]",
@@ -97,6 +108,8 @@ Repository = "https://github.com/opendatalab/MinerU"
 mineru = "mineru.cli:client.main"
 mineru-sglang-server = "mineru.cli.vlm_sglang_server:main"
 mineru-models-download = "mineru.cli.models_download:download_models"
+mineru-api = "mineru.cli.fast_api:main"
+mineru-gradio = "mineru.cli.gradio_app:main"

 [tool.setuptools.dynamic]
 version = {attr = "mineru.version.__version__"}